├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.yml ├── dependabot.yml └── workflows │ ├── code_quality_control.yml │ ├── cos_integration.yml │ ├── docs.yml │ ├── docs_test.yml │ ├── label.yml │ ├── lints.yml │ ├── pr_request_checks.yml │ ├── pull-request-links.yml │ ├── pylint.yml │ ├── python-publish.yml │ ├── quality.yml │ ├── ruff.yml │ ├── run_test.yml │ ├── stale.yml │ ├── test.yml │ ├── testing.yml │ ├── unit-test.yml │ └── welcome.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── .DS_Store ├── .readthedocs.yaml ├── 20_swarm_model_ideas.md ├── README.md ├── assets │ ├── css │ │ └── extra.css │ └── img │ │ ├── SwarmsLogoIcon.png │ │ ├── swarmsbanner.png │ │ ├── tools │ │ ├── output.png │ │ ├── poetry_setup.png │ │ └── toml.png │ │ └── zetascale.png ├── contributing.md ├── index.md ├── mkdocs.yml ├── overrides │ └── main.html ├── requirements.txt ├── stylesheets │ └── extra.css ├── swarms │ ├── aco.md │ ├── firefly.md │ ├── index.md │ ├── pso.md │ ├── qb.md │ └── so.md └── vision.md ├── example.py ├── examples ├── ant_colony.py ├── fire_fly_example.py ├── fish_school_example.py ├── mixture_of_mambas.py ├── new_mergers.py ├── nnt.py ├── queen_bee_transformer_hierarchy_example.py ├── silu_visualization.py ├── simple_moe.py ├── sop.py ├── swarmalator_example.py └── switch_moe.py ├── multi_modal_mergers.py ├── pyproject.toml ├── queen_bee_transformer_hierarchy.py ├── requirements.txt ├── scripts ├── code_quality.sh ├── get_package_requirements.py ├── requirementstxt_to_pyproject.py ├── test_name.sh └── tests.sh ├── swarms_torch ├── __init__.py ├── mergers │ ├── __init__.py │ ├── all_new_evo_mergers.py │ └── mm_mergers.py ├── pso │ ├── __init__.py │ ├── multi_swarm_pso.py │ ├── multi_swarm_pso2.py │ ├── multi_swarm_pso_transformer.py │ └── transformer_pso.py ├── structs │ ├── __init__.py │ ├── ant_colony_swarm.py │ ├── cellular_transformer.py │ ├── firefly.py │ ├── fish_school.py │ ├── graph_cellular_automa.py │ ├── hivemind_swarm_transformer.py │ ├── ma_agent.py │ ├── mas_model.py │ ├── mixtral_expert.py │ ├── mixture_of_mamba.py │ ├── neuronal_transformer.py │ ├── parallel_wrapper.py │ ├── queen_bee.py │ ├── simple_moe.py │ └── switch_moe.py ├── swarmalators │ ├── __init__.py │ ├── swarmalator_base.py │ ├── swarmalator_transformer.py │ └── swarmalator_visualize.py └── utils │ ├── __init__.py │ ├── particle_swarm.py │ ├── sakana.py │ └── spiral_optimization.py ├── test.py ├── test_switch_moe_fix.py └── tests ├── ant_colony.py ├── cellular_swarm.py ├── fish_school.py ├── neuronal_transformer.py ├── particle_swarm.py ├── queen_bee.py ├── spiral_optimization.py ├── swarmalator_base.py ├── test.py ├── test_mixture_of_mamba.py ├── transformer_hive.py └── transformer_pso.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [kyegomez] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: #Nothing 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a detailed report on the bug and it's root cause. Conduct root cause error analysis 4 | title: "[BUG] " 5 | labels: bug 6 | assignees: kyegomez 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is and what the main root cause error is. Test very thoroughly before submitting. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: 'kyegomez' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.yml: -------------------------------------------------------------------------------- 1 | 4 | 5 | {% block announce %} 6 |
7 | Star, fork, and contribute to Swarms on GitHub! 8 |
9 | {% endblock %} -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs 2 | mkdocs-material 3 | mkdocs-glightbox 4 | mkdocs-git-authors-plugin 5 | mkdocs-git-revision-date-plugin 6 | mkdocs-git-committers-plugin 7 | mkdocstrings 8 | mike 9 | mkdocs-jupyter 10 | mkdocs-git-committers-plugin-2 11 | mkdocs-git-revision-date-localized-plugin 12 | mkdocs-redirects 13 | mkdocs-material-extensions 14 | mkdocs-simple-hooks 15 | mkdocs-awesome-pages-plugin 16 | mkdocs-versioning 17 | mkdocs-mermaid2-plugin 18 | mkdocs-include-markdown-plugin 19 | mkdocs-enumerate-headings-plugin 20 | mkdocs-autolinks-plugin 21 | mkdocs-minify-html-plugin 22 | mkdocs-autolinks-plugin 23 | 24 | # Requirements for core 25 | jinja2~=3.1 26 | markdown~=3.7 27 | mkdocs-material-extensions~=1.3 28 | pygments~=2.18 29 | pymdown-extensions~=10.9 30 | 31 | # Requirements for plugins 32 | babel~=2.16 33 | colorama~=0.4 34 | paginate~=0.5 35 | regex>=2022.4 -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --md-primary-fg-color: #8315F9; 3 | --md-accent-fg-color: #00FFCE; 4 | } -------------------------------------------------------------------------------- /docs/swarms/aco.md: -------------------------------------------------------------------------------- 1 | # `AntColonyOptimization` Class 2 | 3 | The `AntColonyOptimization` class implements the Ant Colony Optimization (ACO) algorithm. ACO is a probabilistic technique for solving computational problems which can be reduced to finding good paths through graphs. 4 | 5 | ## Attributes 6 | 7 | - `goal` (str): The goal string to be optimized. 8 | - `num_ants` (int): Number of ants. 9 | - `evaporation_rate` (float): Evaporation rate. 10 | - `alpha` (int): The relative importance of the pheromone. 11 | - `beta` (int): The relative importance of the heuristic information. 12 | - `num_iterations` (int): The number of iterations. 13 | - `pheromones` (torch.Tensor): The pheromone levels. 14 | - `solutions` (list): The solutions found by the ants. 15 | 16 | ## Methods 17 | ------- 18 | 19 | ### `__init__(self, goal: str = None, num_ants: int = 10000, evaporation_rate: float = 0.1, alpha: int = 1, beta: int = 1, num_iterations: int = 10010)` 20 | 21 | The constructor for the `AntColonyOptimization` class. Initializes the pheromone levels and the solutions. 22 | 23 | #### Parameters 24 | 25 | - `goal` (str, optional): The goal string to be optimized. 26 | - `num_ants` (int, optional): Number of ants. Default is 10000. 27 | - `evaporation_rate` (float, optional): Evaporation rate. Default is 0.1. 28 | - `alpha` (int, optional): The relative importance of the pheromone. Default is 1. 29 | - `beta` (int, optional): The relative importance of the heuristic information. Default is 1. 30 | - `num_iterations` (int, optional): The number of iterations. Default is 10010. 31 | 32 | #### Example 33 | 34 | ``` 35 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=10000, num_iterations=1000) 36 | ``` 37 | 38 | 39 | ### `fitness(self, solution)` 40 | 41 | Computes the fitness of a solution. The fitness is the negative of the Euclidean distance between the solution and the goal. 42 | 43 | #### Parameters 44 | 45 | - `solution` (torch.Tensor): The solution to compute the fitness for. 46 | 47 | #### Returns 48 | 49 | - `fitness` (float): The fitness of the solution. 50 | 51 | #### Example 52 | 53 | ``` 54 | fitness = aco.fitness(solution) 55 | ``` 56 | 57 | 58 | ### `update_pheromones(self)` 59 | 60 | Updates the pheromone levels based on the fitness of the solutions. 61 | 62 | #### Example 63 | 64 | ``` 65 | aco.update_pheromones() 66 | ``` 67 | 68 | 69 | ### `choose_next_path(self)` 70 | 71 | Chooses the next path based on the pheromone levels. The probability of choosing a path is proportional to the pheromone level of the path. 72 | 73 | #### Returns 74 | 75 | - `path` (int): The chosen path. 76 | 77 | #### Example 78 | 79 | ``` 80 | path = aco.choose_next_path() 81 | ``` 82 | 83 | 84 | ### `optimize(self)` 85 | 86 | Optimizes the goal string. Updates the solutions and the pheromone levels for a given number of iterations and returns the best solution. 87 | 88 | #### Returns 89 | 90 | - `best_solution` (str): The best solution. 91 | 92 | #### Example 93 | 94 | ``` 95 | best_solution = aco.optimize() 96 | print("Best Matched String:", best_solution) 97 | ``` 98 | 99 | 100 | Usage Examples 101 | -------------- 102 | 103 | ### Example 1: Optimize a String 104 | 105 | In this example, we will optimize the string "Hello ACO" using 10000 ants and 1000 iterations. 106 | 107 | ``` 108 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=10000, num_iterations=1000) 109 | best_solution = aco.optimize() 110 | print("Best Matched String:", best_solution) 111 | ``` 112 | 113 | 114 | ### Example 2: Using a Different Number of Ants 115 | 116 | In this example, we will optimize the string "Hello ACO" using 5000 ants and 1000 iterations. 117 | 118 | ``` 119 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=5000, num_iterations=1000) 120 | best_solution = aco.optimize() 121 | print("Best Matched String:", best_solution) 122 | ``` 123 | 124 | 125 | ### Example 3: Using a Different Evaporation Rate 126 | 127 | In this example, we will optimize the string "Hello ACO" using 10000 ants, an evaporation rate of 0.2, and 1000 iterations. 128 | 129 | ``` 130 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=10000, evaporation_rate=0.2, num_iterations=1000) 131 | best_solution = aco.optimize() 132 | print("Best Matched String:", best_solution) 133 | ``` 134 | -------------------------------------------------------------------------------- /docs/swarms/firefly.md: -------------------------------------------------------------------------------- 1 | # FireflyOptimizer 2 | 3 | ```python 4 | class FireflyOptimizer(cost_function, steps=5000, species=4, population_size=1000, dimensions=10, lower_bound=-4.0, upper_bound=4.0, mix_species_every=25, beta0=2.0, gamma=1.0, alpha=0.1, alpha_decay=0.995, use_genetic_algorithm=False, breed_every=10, tournament_size=100, num_children=500, use_cuda=True, verbose=True) 5 | ``` 6 | 7 | The `FireflyOptimizer` class implements the Firefly Algorithm to minimize a given objective function. It simulates the flashing behavior of fireflies to explore the search space efficiently. 8 | 9 | ## Parameters 10 | 11 | - **cost_function** (callable): 12 | The objective function to minimize. Should accept a `torch.Tensor` and return a `torch.Tensor` of costs. 13 | 14 | - **steps** (int, optional): 15 | Number of optimization steps. Default: `5000`. 16 | 17 | - **species** (int, optional): 18 | Number of species in the population. Default: `4`. 19 | 20 | - **population_size** (int, optional): 21 | Number of fireflies in each species. Default: `1000`. 22 | 23 | - **dimensions** (int, optional): 24 | Dimensionality of the search space. Default: `10`. 25 | 26 | - **lower_bound** (float, optional): 27 | Lower bound of the search space. Default: `-4.0`. 28 | 29 | - **upper_bound** (float, optional): 30 | Upper bound of the search space. Default: `4.0`. 31 | 32 | - **mix_species_every** (int, optional): 33 | Interval (in steps) to mix species. Default: `25`. 34 | 35 | - **beta0** (float, optional): 36 | Base attractiveness coefficient. Default: `2.0`. 37 | 38 | - **gamma** (float, optional): 39 | Light absorption coefficient controlling intensity decay. Default: `1.0`. 40 | 41 | - **alpha** (float, optional): 42 | Randomness scaling factor. Default: `0.1`. 43 | 44 | - **alpha_decay** (float, optional): 45 | Decay rate of `alpha` per step. Default: `0.995`. 46 | 47 | - **use_genetic_algorithm** (bool, optional): 48 | Whether to include genetic algorithm operations. Default: `False`. 49 | 50 | - **breed_every** (int, optional): 51 | Steps between breeding operations when using genetic algorithm. Default: `10`. 52 | 53 | - **tournament_size** (int, optional): 54 | Number of participants in each tournament selection. Default: `100`. 55 | 56 | - **num_children** (int, optional): 57 | Number of offspring produced during breeding. Default: `500`. 58 | 59 | - **use_cuda** (bool, optional): 60 | Use CUDA for computations if available. Default: `True`. 61 | 62 | - **verbose** (bool, optional): 63 | Print progress messages during optimization. Default: `True`. 64 | 65 | ## Attributes 66 | 67 | | Attribute | Type | Description | 68 | |--------------------|-----------------|--------------------------------------------------------| 69 | | `fireflies` | `torch.Tensor` | Positions of the fireflies in the search space. | 70 | | `device` | `torch.device` | Device used for computations (`cpu` or `cuda`). | 71 | | `current_alpha` | `float` | Current value of `alpha` during optimization. | 72 | 73 | ## Methods 74 | 75 | ### `optimize()` 76 | 77 | Runs the optimization loop for the specified number of steps. 78 | 79 | **Example:** 80 | 81 | ```python 82 | optimizer.optimize() 83 | ``` 84 | 85 | ### `get_best_solution()` 86 | 87 | Retrieves the best solution found by the optimizer. 88 | 89 | **Returns:** 90 | 91 | - **best_firefly** (`torch.Tensor`): 92 | The best solution vector found. 93 | 94 | **Example:** 95 | 96 | ```python 97 | best_solution = optimizer.get_best_solution() 98 | print(f"Best solution: {best_solution}") 99 | ``` 100 | 101 | ### `generate()` 102 | 103 | Generates a new set of fireflies, reinitializing their positions. 104 | 105 | **Returns:** 106 | 107 | - **fireflies** (`torch.Tensor`): 108 | The new set of fireflies. 109 | 110 | **Example:** 111 | 112 | ```python 113 | optimizer.generate() 114 | ``` 115 | 116 | ### `reset()` 117 | 118 | Resets the optimizer to its initial state, including `alpha` and firefly positions. 119 | 120 | **Example:** 121 | 122 | ```python 123 | optimizer.reset() 124 | ``` 125 | 126 | --- 127 | 128 | **Note:** The Firefly Algorithm is inspired by the flashing behavior of fireflies and is suitable for continuous optimization problems. This implementation allows for customization and includes optional genetic algorithm operations for enhanced performance. 129 | 130 | **Example Usage:** 131 | 132 | ```python 133 | from swarms_torch.firefly import FireflyOptimizer 134 | from torch import Tensor 135 | 136 | 137 | def rosenbrock(x: Tensor) -> Tensor: 138 | return ( 139 | 100 * (x[..., 1:] - x[..., :-1] ** 2) ** 2 + (1 - x[..., :-1]) ** 2 140 | ).sum(dim=-1) 141 | 142 | 143 | if __name__ == "__main__": 144 | optimizer = FireflyOptimizer( 145 | cost_function=rosenbrock, 146 | steps=100, 147 | species=10, 148 | population_size=100, 149 | dimensions=10, 150 | lower_bound=-4, 151 | upper_bound=4, 152 | # Many more parameters can be set, see the documentation for more details 153 | ) 154 | optimizer.optimize() 155 | best_solution = optimizer.get_best_solution() 156 | print(f"Best solution: {best_solution}") 157 | ``` -------------------------------------------------------------------------------- /docs/swarms/index.md: -------------------------------------------------------------------------------- 1 | # Swarms Installation Guide 2 | 3 |
4 |

5 | 6 | 10 | 11 |

12 |
13 | 14 | You can install `swarms` with pip in a 15 | [**Python>=3.10**](https://www.python.org/) environment. 16 | 17 | ## Prerequisites 18 | 19 | Before you begin, ensure you have the following installed: 20 | 21 | - Python 3.10 or higher: [Download Python](https://www.python.org/) 22 | - pip (specific version recommended): `pip >= 21.0` 23 | - git (for cloning the repository): [Download Git](https://git-scm.com/) 24 | 25 | ## Installation Options 26 | 27 | === "pip (Recommended)" 28 | 29 | #### Headless Installation 30 | 31 | The headless installation of `swarms` is designed for environments where graphical user interfaces (GUI) are not needed, making it more lightweight and suitable for server-side applications. 32 | 33 | ```bash 34 | pip3 install swarms-torch 35 | ``` 36 | 37 | === "Development Installation" 38 | 39 | === "Using virtualenv" 40 | 41 | 1. **Clone the repository and navigate to the root directory:** 42 | 43 | ```bash 44 | git clone https://github.com/kyegomez/swarms-pytorch 45 | cd swarms 46 | ``` 47 | 48 | 2. **Setup Python environment and activate it:** 49 | 50 | ```bash 51 | python3 -m venv venv 52 | source venv/bin/activate 53 | pip install --upgrade pip 54 | ``` 55 | 56 | 3. **Install Swarms:** 57 | 58 | - Headless install: 59 | 60 | ```bash 61 | pip install -e . 62 | ``` 63 | 64 | - Desktop install: 65 | 66 | ```bash 67 | pip install -e .[desktop] 68 | ``` 69 | 70 | === "Using Anaconda" 71 | 72 | 1. **Create and activate an Anaconda environment:** 73 | 74 | ```bash 75 | conda create -n swarms python=3.10 76 | conda activate swarms 77 | ``` 78 | 79 | 2. **Clone the repository and navigate to the root directory:** 80 | 81 | ```bash 82 | git clone https://github.com/kyegomez/swarms-pytorch 83 | cd swarms 84 | ``` 85 | 86 | 3. **Install Swarms:** 87 | 88 | - Headless install: 89 | 90 | ```bash 91 | pip install -e . 92 | ``` 93 | 94 | - Desktop install: 95 | 96 | ```bash 97 | pip install -e .[desktop] 98 | ``` 99 | 100 | === "Using Poetry" 101 | 102 | 1. **Clone the repository and navigate to the root directory:** 103 | 104 | ```bash 105 | git clone https://github.com/kyegomez/swarms-pytorch 106 | cd swarms 107 | ``` 108 | 109 | 2. **Setup Python environment and activate it:** 110 | 111 | ```bash 112 | poetry env use python3.10 113 | poetry shell 114 | ``` 115 | 116 | 3. **Install Swarms:** 117 | 118 | - Headless install: 119 | 120 | ```bash 121 | poetry install 122 | ``` 123 | 124 | - Desktop install: 125 | 126 | ```bash 127 | poetry install --extras "desktop" 128 | ``` 129 | 130 | === "CI/CD Pipelines" 131 | 132 | Integrating Swarms into your CI/CD pipeline ensures automated testing and deployment. 133 | 134 | #### Using GitHub Actions 135 | 136 | ```yaml 137 | # .github/workflows/ci.yml 138 | name: CI 139 | 140 | on: 141 | push: 142 | branches: [ main ] 143 | pull_request: 144 | branches: [ main ] 145 | 146 | jobs: 147 | build: 148 | 149 | runs-on: ubuntu-latest 150 | 151 | steps: 152 | - uses: actions/checkout@v2 153 | - name: Set up Python 154 | uses: actions/setup-python@v2 155 | with: 156 | python-version: 3.10 157 | - name: Install dependencies 158 | run: | 159 | python -m venv venv 160 | source venv/bin/activate 161 | pip install --upgrade pip 162 | pip install -e . 163 | - name: Run tests 164 | run: | 165 | source venv/bin/activate 166 | pytest 167 | ``` 168 | 169 | #### Using Jenkins 170 | 171 | ```groovy 172 | pipeline { 173 | agent any 174 | 175 | stages { 176 | stage('Clone repository') { 177 | steps { 178 | git 'https://github.com/kyegomez/swarms-pytorch' 179 | } 180 | } 181 | stage('Setup Python') { 182 | steps { 183 | sh 'python3 -m venv venv' 184 | sh 'source venv/bin/activate && pip install --upgrade pip' 185 | } 186 | } 187 | stage('Install dependencies') { 188 | steps { 189 | sh 'source venv/bin/activate && pip install -e .' 190 | } 191 | } 192 | stage('Run tests') { 193 | steps { 194 | sh 'source venv/bin/activate && pytest' 195 | } 196 | } 197 | } 198 | } 199 | ``` 200 | -------------------------------------------------------------------------------- /docs/swarms/pso.md: -------------------------------------------------------------------------------- 1 | # `ParticleSwarmOptimization` Class 2 | 3 | The `ParticleSwarmOptimization` class implements the Particle Swarm Optimization (PSO) algorithm. PSO is a computational method that optimizes a problem by iteratively trying to improve a candidate solution with regard to a given measure of quality. It solves a problem by having a population of candidate solutions, here dubbed particles, and moving these particles around in the search-space according to simple mathematical formulae over the particle's position and velocity. 4 | 5 | ## Attributes 6 | 7 | - `goal` (str): The goal string to be optimized. 8 | - `n_particles` (int): Number of particles. 9 | - `inertia` (float): Inertia weight. 10 | - `personal_best_weight` (float): Personal best weight. 11 | - `global_best_weight` (float): Global best weight. 12 | - `particles` (torch.Tensor): The particles' positions. 13 | - `velocities` (torch.Tensor): The particles' velocities. 14 | - `personal_best` (torch.Tensor): The personal best positions of each particle. 15 | - `global_best` (torch.Tensor): The global best position. 16 | 17 | ## Methods 18 | 19 | ### `__init__(self, goal: str = None, n_particles: int = 100, inertia: float = 0.5, personal_best_weight: float = 1.5, global_best_weight: float = 1.5, dim: int = 1)` 20 | 21 | The constructor for the `ParticleSwarmOptimization` class. Initializes the particles with random positions and velocities, and the personal best and global best with the initial positions of the particles. 22 | 23 | #### Parameters 24 | 25 | - `goal` (str, optional): The goal string to be optimized. 26 | - `n_particles` (int, optional): Number of particles. Default is 100. 27 | - `inertia` (float, optional): Inertia weight. Default is 0.5. 28 | - `personal_best_weight` (float, optional): Personal best weight. Default is 1.5. 29 | - `global_best_weight` (float, optional): Global best weight. Default is 1.5. 30 | - `dim` (int, optional): The dimension of the problem. Default is 1. 31 | 32 | #### Example 33 | 34 | ``` 35 | pso = ParticleSwarmOptimization(goal="Attention is all you need", n_particles=100) 36 | ``` 37 | 38 | 39 | ### `compute_fitness(self, particle)` 40 | 41 | Computes the fitness value of a particle. The fitness value is the inverse of the Euclidean distance between the particle and the goal. 42 | 43 | #### Parameters 44 | 45 | - `particle` (torch.Tensor): The particle to compute the fitness value for. 46 | 47 | #### Returns 48 | 49 | - `fitness` (float): The fitness value of the particle. 50 | 51 | #### Example 52 | 53 | ``` 54 | fitness = pso.compute_fitness(particle) 55 | ``` 56 | 57 | 58 | ### `update(self)` 59 | 60 | Updates the personal best and global best, and the velocity and position of each particle. 61 | 62 | #### Example 63 | 64 | ``` 65 | pso.update() 66 | ``` 67 | 68 | 69 | ### `optimize(self, iterations: int = 1000)` 70 | 71 | Optimizes the goal string. Updates the particles for a given number of iterations and prints the best particle at each iteration. 72 | 73 | #### Parameters 74 | 75 | - `iterations` (int, optional): The maximum number of iterations. Default is 1000. 76 | 77 | #### Example 78 | 79 | ``` 80 | pso.optimize(iterations=1000) 81 | ``` 82 | 83 | 84 | Usage Examples 85 | -------------- 86 | 87 | ### Example 1: Optimize a String 88 | 89 | In this example, we will optimize the string "Attention is all you need" using 100 particles. 90 | 91 | ```python 92 | pso = ParticleSwarmOptimization(goal="Attention is all you need", n_particles=100) 93 | pso.optimize(iterations=1000) 94 | ``` 95 | ### Example 2: Optimize a Different String 96 | 97 | In this example, we will optimize the string "Hello, World!" using 200 particles. 98 | 99 | ```python 100 | pso = ParticleSwarmOptimization(goal="Hello, World!", n_particles=200) 101 | pso.optimize(iterations=1000) 102 | ``` 103 | 104 | 105 | ### Example 3: Using Different Weights 106 | 107 | In this example, we will optimize the string "Particle Swarm Optimization" using 100 particles, an inertia weight of 0.8, a personal best weight of 2.0, and a global best weight of 2.0. 108 | 109 | ```python 110 | pso = ParticleSwarmOptimization(goal="Particle Swarm Optimization", n_particles=100, inertia=0.8, personal_best_weight=2.0, global_best_weight=2.0) 111 | pso.optimize(iterations=1000) 112 | ``` 113 | 114 | 115 | ### Example 4: Using a Large Number of Particles 116 | 117 | In this example, we will optimize the string "Large number of particles" using 1000 particles. 118 | 119 | ```python 120 | pso = ParticleSwarmOptimization(goal="Large number of particles", n_particles=1000) 121 | pso.optimize(iterations=1000) 122 | ``` 123 | 124 | 125 | ### Example 5: Using a Small Number of Iterations 126 | 127 | In this example, we will optimize the string "Small number of iterations" using 100 particles and 100 iterations. 128 | 129 | ```python 130 | pso = ParticleSwarmOptimization(goal="Small number of iterations", n_particles=100) 131 | pso.optimize(iterations=100) 132 | ``` 133 | 134 | 135 | ### Example 6: Using a Large Number of Iterations 136 | 137 | In this example, we will optimize the string "Large number of iterations" using 100 particles and 10000 iterations. 138 | 139 | ```python 140 | pso = ParticleSwarmOptimization(goal="Large number of iterations", n_particles=100) 141 | pso.optimize(iterations=10000) 142 | ``` 143 | 144 | 145 | ### Example 7: Using Different Characters 146 | 147 | In this example, we will optimize the string "1234567890" using 100 particles. 148 | 149 | ```python 150 | pso = ParticleSwarmOptimization(goal="1234567890", n_particles=100) 151 | pso.optimize(iterations=1000) 152 | ``` 153 | 154 | 155 | ### Example 8: Using Special Characters 156 | 157 | In this example, we will optimize the string "!@#$%^&*()" using 100 particles. 158 | 159 | ```python 160 | pso = ParticleSwarmOptimization(goal="!@#$%^&*()", n_particles=100) 161 | pso.optimize(iterations=1000) 162 | ``` 163 | 164 | 165 | ### Example 9: Using a Long String 166 | 167 | In this example, we will optimize a long string using 100 particles. 168 | 169 | ```python 170 | pso = ParticleSwarmOptimization(goal="This is a very long string that we want to optimize using Particle Swarm Optimization.", n_particles=100) 171 | pso.optimize(iterations=1000) 172 | ``` 173 | 174 | 175 | ### Example 10: Using a Short String 176 | 177 | In this example, we will optimize a short string using 100 particles. 178 | 179 | ```python 180 | pso = ParticleSwarmOptimization(goal="Short", n_particles=100) 181 | pso.optimize(iterations=1000) 182 | ``` -------------------------------------------------------------------------------- /docs/swarms/qb.md: -------------------------------------------------------------------------------- 1 | # `QueenBeeGa` Class 2 | 3 | The `QueenBeeGa` class implements the Queen Bee Genetic Algorithm (GA). This GA is inspired by the evolution of bees, where the fittest solution is designated as the queen and the rest of the population contends to mate with it. The strong exploitation is balanced by a higher than normal mutation rate. 4 | 5 | ## Attributes 6 | --- 7 | - `goal` (str): The goal string to be optimized. 8 | - `pop_size` (int): Population size. 9 | - `mutation_prob` (float): Mutation probability. 10 | - `strong_mutation_rate` (float): Strong mutation rate. 11 | - `strong_mutation_prob` (float): Strong mutation probability. 12 | - `num_tournament_participants` (int): Number of tournament participants. 13 | - `gene_length` (int): Length of the gene. 14 | - `gene_midpoint` (int): Midpoint of the gene. 15 | - `target_gene` (torch.Tensor): The target gene. 16 | - `strong_mutate_pool_size` (float): Size of the strong mutate pool. 17 | - `num_code_mutate` (float): Number of code mutations. 18 | - `strong_num_code_mutate` (float): Number of strong code mutations. 19 | - `pool` (torch.Tensor): The pool of genes. 20 | - `queen` (torch.Tensor): The queen gene. 21 | - `queen_fitness` (float): The fitness of the queen. 22 | - `generation` (int): The current generation. 23 | 24 | ## Methods 25 | ------- 26 | 27 | ### `__init__(self, goal: str = "Attention is all you need", pop_size: int = 100, mutation_prob: float = 0.04, strong_mutation_rate: float = 0.1, strong_mutation_prob: float = 0.25, num_tournament_participants: int = 25)` 28 | 29 | The constructor for the `QueenBeeGa` class. Initializes the pool of genes, the queen, and the queen's fitness. 30 | 31 | #### Parameters 32 | 33 | - `goal` (str, optional): The goal string to be optimized. Default is "Attention is all you need". 34 | - `pop_size` (int, optional): Population size. Default is 100. 35 | - `mutation_prob` (float, optional): Mutation probability. Default is 0.04. 36 | - `strong_mutation_rate` (float, optional): Strong mutation rate. Default is 0.1. 37 | - `strong_mutation_prob` (float, optional): Strong mutation probability. Default is 0.25. 38 | - `num_tournament_participants` (int, optional): Number of tournament participants. Default is 25. 39 | 40 | #### Example 41 | 42 | ``` 43 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25) 44 | ``` 45 | 46 | 47 | ### `encode(s)` 48 | 49 | Converts a string to its ASCII values. 50 | 51 | #### Parameters 52 | 53 | - `s` (str): The string to encode. 54 | 55 | #### Returns 56 | 57 | - `encoded` (torch.Tensor): The encoded string. 58 | 59 | #### Example 60 | 61 | ``` 62 | encoded = QueenBeeGa.encode("Hello") 63 | ``` 64 | 65 | 66 | ### `decode(t)` 67 | 68 | Converts a tensor of ASCII values back to a string. 69 | 70 | #### Parameters 71 | 72 | - `t` (torch.Tensor): The tensor to decode. 73 | 74 | #### Returns 75 | 76 | - `decoded` (str): The decoded string. 77 | 78 | #### Example 79 | 80 | ``` 81 | decoded = QueenBeeGa.decode(encoded) 82 | ``` 83 | 84 | 85 | ### `run(self, max_generations: int = 1000)` 86 | 87 | Runs the Queen Bee GA. Evolves the population for a given number of generations. 88 | 89 | #### Parameters 90 | 91 | - `max_generations` (int, optional): The maximum number of generations. Default is 1000. 92 | 93 | #### Example 94 | 95 | ``` 96 | optimizer.run(max_generations=100) 97 | ``` 98 | 99 | 100 | ### `_evolve(self)` 101 | 102 | Executes one step of the evolution process. Sorts the population by fitness, displays the queen and the population, and updates the queen and the population. 103 | 104 | #### Example 105 | 106 | ``` 107 | optimizer._evolve() 108 | ``` 109 | 110 | 111 | ### `_check_convergence(self)` 112 | 113 | Checks if any of the solutions has achieved the goal. 114 | 115 | #### Returns 116 | 117 | - `converged` (bool): Whether any of the solutions has achieved the goal. 118 | 119 | #### Example 120 | 121 | ``` 122 | converged = optimizer._check_convergence() 123 | ``` 124 | ------ 125 | 126 | ## Usage Examples 127 | -------------- 128 | 129 | ### Example 1: Optimize a String 130 | 131 | In this example, we will optimize the string "Attention is all you need" using a population size of 100, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 25 tournament participants. 132 | 133 | ```python 134 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25) 135 | optimizer.run(max_generations=100) 136 | ``` 137 | 138 | 139 | ### Example 2: Using a Different Goal String 140 | 141 | In this example, we will optimize the string "Hello, World!" using a population size of 100, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 25 tournament participants. 142 | 143 | ```python 144 | optimizer = QueenBeeGa(goal="Hello, World!", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25) 145 | optimizer.run(max_generations=100) 146 | ``` 147 | 148 | 149 | ### Example 3: Using a Different Population Size 150 | 151 | In this example, we will optimize the string "Attention is all you need" using a population size of 200, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 25 tournament participants. 152 | 153 | ```python 154 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=200, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25) 155 | optimizer.run(max_generations=100) 156 | ``` 157 | 158 | 159 | ### Example 4: Using Different Mutation Probabilities 160 | 161 | In this example, we will optimize the string "Attention is all you need" using a population size of 100, a mutation probability of 0.05, a strong mutation rate of 0.1, a strong mutation probability of 0.3, and 25 tournament participants. 162 | 163 | ```python 164 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.05, strong_mutation_rate=0.1, strong_mutation_prob=0.3, num_tournament_participants=25) 165 | optimizer.run(max_generations=100) 166 | ``` 167 | 168 | 169 | ### Example 5: Using a Different Number of Tournament Participants 170 | 171 | In this example, we will optimize the string "Attention is all you need" using a population size of 100, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 30 tournament participants. 172 | 173 | ```python 174 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=30) 175 | optimizer.run(max_generations=100) 176 | ``` -------------------------------------------------------------------------------- /docs/swarms/so.md: -------------------------------------------------------------------------------- 1 | # `SPO` Class 2 | 3 | 4 | The `SPO` class implements the Spiral Optimization (SPO) algorithm. This algorithm is used for optimization towards a target string. 5 | 6 | ## Attributes 7 | ---------- 8 | 9 | - `goal` (torch.Tensor): The goal string to be optimized. 10 | - `m` (int): Number of search points. 11 | - `k_max` (int): Maximum number of iterations. 12 | - `n_dim` (int): Length of the goal string. 13 | - `points` (torch.Tensor): The search points. 14 | - `center` (torch.Tensor): The center point. 15 | 16 | ## Methods 17 | ------- 18 | 19 | ### `__init__(self, goal: str = None, m: int = 10, k_max: int = 1000)` 20 | 21 | The constructor for the `SPO` class. Initializes the search points and the center. 22 | 23 | #### Parameters 24 | 25 | - `goal` (str, optional): The goal string to be optimized. 26 | - `m` (int, optional): Number of search points. Default is 10. 27 | - `k_max` (int, optional): Maximum number of iterations. Default is 1000. 28 | 29 | #### Example 30 | 31 | ``` 32 | spo = SPO(goal="Hello SPO", m=100, k_max=1000) 33 | ``` 34 | 35 | 36 | ### `_step_rate(self, k)` 37 | 38 | Defines the step rate function. 39 | 40 | #### Parameters 41 | 42 | - `k` (int): Current iteration. 43 | 44 | #### Returns 45 | 46 | - `step_rate` (float): Step rate for the current iteration. 47 | 48 | #### Example 49 | 50 | ``` 51 | step_rate = spo._step_rate(k) 52 | ``` 53 | 54 | 55 | ### `_update_points(self, k)` 56 | 57 | Updates the search points based on the spiral model. 58 | 59 | #### Parameters 60 | 61 | - `k` (int): Current iteration. 62 | 63 | #### Example 64 | 65 | ``` 66 | spo._update_points(k) 67 | ``` 68 | 69 | 70 | ### `_update_center(self)` 71 | 72 | Finds the best search point and sets it as the new center. 73 | 74 | #### Example 75 | 76 | ``` 77 | spo._update_center() 78 | ``` 79 | 80 | 81 | ### `optimize(self)` 82 | 83 | Runs the optimization loop. Updates the search points and the center for a given number of iterations. 84 | 85 | #### Example 86 | 87 | ``` 88 | spo.optimize() 89 | ``` 90 | 91 | 92 | ### `best_string(self)` 93 | 94 | Converts the best found point to its string representation. 95 | 96 | #### Returns 97 | 98 | - `best_string` (str): The best string. 99 | 100 | #### Example 101 | 102 | ``` 103 | best_string = spo.best_string() 104 | print("Best Matched String:", best_string) 105 | ``` 106 | 107 | 108 | ## Usage Examples 109 | -------------- 110 | 111 | ### Example 1: Optimize a String 112 | 113 | In this example, we will optimize the string "Attention is all you need" using 100 search points and 1000 iterations. 114 | 115 | ```python 116 | spo = SPO(goal="Attention is all you need", m=100, k_max=1000) 117 | spo.optimize() 118 | print("Best Matched String:", spo.best_string()) 119 | ``` 120 | 121 | 122 | ### Example 2: Using a Different Goal String 123 | 124 | In this example, we will optimize the string "Hello, World!" using 100 search points and 1000 iterations. 125 | 126 | ```python 127 | spo = SPO(goal="Hello, World!", m=100, k_max=1000) 128 | spo.optimize() 129 | print("Best Matched String:", spo.best_string()) 130 | ``` 131 | 132 | 133 | ### Example 3: Using a Different Number of Search Points 134 | 135 | In this example, we will optimize the string "Attention is all you need" using 200 search points and 1000 iterations. 136 | 137 | ```python 138 | spo = SPO(goal="Attention is all you need", m=200, k_max=1000) 139 | spo.optimize() 140 | print("Best Matched String:", spo.best_string()) 141 | ``` 142 | -------------------------------------------------------------------------------- /docs/vision.md: -------------------------------------------------------------------------------- 1 | # Advancing Deep Learning Through Novel Swarm Intelligence Model Architectures 2 | 3 | The rapid evolution of deep learning has undeniably revolutionized numerous facets of technology and society. However, the prevailing approach—scaling up models using vast amounts of data and computational power—is reaching its practical limits. To transcend these boundaries and usher in the next epoch of artificial intelligence (AI), we are embarking on a mission to develop **Novel Swarm Intelligence Model Architectures**. This initiative is predicated on the belief that the future of deep learning hinges not on the quantity of data or compute, but on innovative architectural paradigms that can emulate and surpass natural intelligence systems. 4 | 5 | ### **The Need for Alternate Model Architectures** 6 | 7 | Current deep learning models predominantly rely on increasing layers, parameters, and training data to achieve marginal improvements. This methodology is akin to building taller towers on shaky foundations—it is unsustainable and inefficient. The challenges are multifold: 8 | 9 | - **Data Saturation**: High-quality, labeled data is becoming scarce and expensive to procure. Moreover, models trained on massive datasets often fail to generalize well to unseen scenarios. 10 | - **Computational Constraints**: The energy consumption and computational requirements for training colossal models are exorbitant, leading to environmental and economic concerns. 11 | - **Diminishing Returns**: Simply scaling existing architectures yields progressively smaller performance gains, indicating a plateau in this trajectory. 12 | 13 | To overcome these hurdles, we must pivot towards creating novel model architectures that can achieve superior performance without reliance on data or compute scaling. 14 | 15 | ### **Swarm Intelligence: A Paradigm Shift** 16 | 17 | Nature offers profound insights into efficient and intelligent systems. Swarm intelligence, observed in colonies of ants, flocks of birds, and schools of fish, exemplifies how simple agents can collectively perform complex tasks through local interactions and without centralized control. 18 | 19 | Applying swarm intelligence to AI involves developing architectures where numerous smaller models (agents) collaborate, communicate, and adapt to achieve a common goal. This approach offers several advantages: 20 | 21 | - **Scalability**: Systems can be scaled horizontally by adding more agents without exponentially increasing computational demands. 22 | - **Robustness**: The decentralized nature ensures that the failure of individual agents does not compromise the entire system. 23 | - **Adaptability**: Agents can adapt to new information and environments dynamically, enhancing the system's ability to generalize. 24 | 25 | ### **Artificial Superintelligence Through Swarms** 26 | 27 | Artificial Superintelligence (ASI) represents AI that surpasses human intelligence across all domains. Achieving ASI through a swarm of models rather than a singular entity offers a more feasible and resilient path: 28 | 29 | - **Diversity of Thought**: Multiple agents with varied specializations can approach problems from different perspectives, leading to more creative and effective solutions. 30 | - **Collective Learning**: Agents can share knowledge and learn from each other's experiences, accelerating the overall learning process. 31 | - **Emergent Behavior**: Complex and intelligent behaviors can emerge from the interactions of simple agents, potentially leading to capabilities beyond programmed instructions. 32 | 33 | ### **Our Mission Objectives** 34 | 35 | 1. **Innovate Model Architectures**: Design and develop novel swarm-based model architectures that can learn and perform tasks more efficiently than traditional models. 36 | 2. **Reduce Reliance on Data and Compute**: Create systems that require less data and computational power by leveraging the collective intelligence of agent swarms. 37 | 3. **Enhance Generalization and Adaptability**: Build models capable of adapting to new and unforeseen situations through decentralized learning and collaboration. 38 | 4. **Pave the Way for ASI**: Establish foundational architectures that can evolve into artificial superintelligence through emergent behaviors and continuous learning. 39 | 40 | ### **Conclusion** 41 | 42 | The future of deep learning and AI advancement lies not in the augmentation of data and computational resources but in the fundamental reimagining of model architectures. By embracing swarm intelligence, we aim to break through the current limitations and unlock new potentials in AI capabilities. 43 | 44 | Our mission to develop **Novel Swarm Intelligence Model Architectures** is more than an academic pursuit; it is a strategic imperative to ensure that AI continues to evolve sustainably and beneficially. We are committed to pioneering this paradigm shift, confident that it will lead to breakthroughs not just in technology, but in how intelligence—artificial or otherwise—is understood and harnessed. 45 | 46 | --- 47 | 48 | Together, we will forge a path toward an AI future that is efficient, adaptable, and intelligent beyond the sum of its parts. -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch import MixtureOfMambas 3 | 4 | # Create a 3D tensor for text 5 | x = torch.rand(1, 512, 512) 6 | 7 | # Create an instance of the MixtureOfMambas model 8 | model = MixtureOfMambas( 9 | num_mambas=2, # Number of Mambas in the model 10 | dim=512, # Dimension of the input tensor 11 | d_state=1024, # Dimension of the hidden state 12 | depth=4, # Number of layers in the model 13 | d_conv=1024, # Dimension of the convolutional layers 14 | expand=4, # Expansion factor for the model 15 | fusion_method="absmax", # Fusion method for combining Mambas' outputs 16 | custom_fusion_func=None, # Custom fusion function (if any) 17 | ) 18 | 19 | # Pass the input tensor through the model and print the output shape 20 | print(model(x).shape) 21 | -------------------------------------------------------------------------------- /examples/ant_colony.py: -------------------------------------------------------------------------------- 1 | from swarms_torch.structs.ant_colony_swarm import AntColonyOptimization 2 | 3 | # Usage: 4 | goal_string = "Hello ACO" 5 | aco = AntColonyOptimization(goal_string, num_iterations=1000) 6 | best_solution = aco.optimize() 7 | print("Best Matched String:", best_solution) 8 | -------------------------------------------------------------------------------- /examples/fire_fly_example.py: -------------------------------------------------------------------------------- 1 | from swarms_torch.structs.firefly import FireflyOptimizer 2 | from torch import Tensor 3 | 4 | 5 | def rosenbrock(x: Tensor) -> Tensor: 6 | return ( 7 | 100 * (x[..., 1:] - x[..., :-1] ** 2) ** 2 + (1 - x[..., :-1]) ** 2 8 | ).sum(dim=-1) 9 | 10 | 11 | if __name__ == "__main__": 12 | optimizer = FireflyOptimizer( 13 | cost_function=rosenbrock, 14 | steps=100, 15 | species=10, 16 | population_size=100, 17 | dimensions=10, 18 | lower_bound=-4, 19 | upper_bound=4, 20 | # Many more parameters can be set, see the documentation for more details 21 | ) 22 | optimizer.optimize() 23 | best_solution = optimizer.get_best_solution() 24 | print(f"Best solution: {best_solution}") 25 | -------------------------------------------------------------------------------- /examples/fish_school_example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.structs.fish_school import Fish, FishSchool 3 | 4 | # Create random source and target sequences 5 | src = torch.randn(10, 32, 512) 6 | tgt = torch.randn(10, 32, 512) 7 | 8 | # Create random labels 9 | labels = torch.randint(0, 512, (10, 32)) 10 | 11 | # Create a fish and train it on the random data 12 | fish = Fish(512, 8, 6) 13 | fish.train(src, tgt, labels) 14 | print(fish.food) # Print the fish's food 15 | 16 | # Create a fish school and optimize it on the random data 17 | school = FishSchool(10, 512, 8, 6, 100) 18 | school.forward(src, tgt, labels) 19 | print(school.fish[0].food) # Print the first fish's food 20 | -------------------------------------------------------------------------------- /examples/mixture_of_mambas.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.structs.mixture_of_mamba import MixtureOfMambas 3 | 4 | # Example Usage 5 | num_models = 3 6 | dim = 16 7 | state_range = (1, 20) 8 | conv_range = (1, 10) 9 | expand_range = (1, 5) 10 | 11 | mixture_model = MixtureOfMambas( 12 | num_models, dim, state_range, conv_range, expand_range 13 | ) 14 | x = torch.randn(2, 64, dim).to("cuda") 15 | output = mixture_model( 16 | x, fusion_method="average" 17 | ) # Or use 'weighted' with weights 18 | -------------------------------------------------------------------------------- /examples/new_mergers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.mergers.all_new_evo_mergers import ( 3 | hyperslice_merge, 4 | random_subspace_merge, 5 | dimensional_cross_fusion, 6 | weighted_evolutionary_crossover, 7 | permutation_weight_swapping, 8 | ) 9 | 10 | # Example of how to use the logger and merge methods 11 | if __name__ == "__main__": 12 | # Example models, replace with actual model instances 13 | model_1 = torch.nn.Linear(10, 10) 14 | model_2 = torch.nn.Linear(10, 10) 15 | model_3 = torch.nn.Linear(10, 10) 16 | 17 | # Perform HyperSlice merge 18 | merged_model_hs = hyperslice_merge( 19 | [model_1, model_2, model_3], slice_indices=[0, 2, 4] 20 | ) 21 | 22 | # Perform Random Subspace merge 23 | merged_model_rs = random_subspace_merge( 24 | [model_1, model_2, model_3], subspace_fraction=0.5 25 | ) 26 | 27 | # Perform Dimensional Cross-fusion merge 28 | merged_model_dc = dimensional_cross_fusion([model_1, model_2], cross_axis=0) 29 | 30 | # Perform Weighted Evolutionary Crossover merge 31 | merged_model_wc = weighted_evolutionary_crossover( 32 | [model_1, model_2, model_3], performance_scores=[0.7, 0.85, 0.65] 33 | ) 34 | 35 | # Perform Permutation-based Weight Swapping 36 | merged_model_pw = permutation_weight_swapping( 37 | [model_1, model_2], permutation_seed=42 38 | ) 39 | -------------------------------------------------------------------------------- /examples/nnt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.structs.neuronal_transformer import NNTransformer 3 | 4 | x = torch.randn(1, 10) 5 | 6 | network = NNTransformer( 7 | # transformer cells 8 | neuron_count=5, 9 | # num states 10 | num_states=10, 11 | # input dim 12 | input_dim=10, 13 | # output dim 14 | output_dim=10, 15 | # nhead 16 | nhead=2, 17 | ) 18 | 19 | 20 | output = network(x) 21 | print(output) 22 | -------------------------------------------------------------------------------- /examples/silu_visualization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | # SiLU (Sigmoid-weighted Linear Unit) activation function 6 | def silu(x): 7 | return x * (1 / (1 + np.exp(-x))) 8 | 9 | 10 | # Generate inputs and calculate SiLU outputs 11 | input_values = np.linspace(-10, 10, 100) 12 | output_values = silu(input_values) 13 | 14 | # Create 3D plot 15 | fig = plt.figure() 16 | ax = fig.add_subplot(111, projection="3d") 17 | 18 | # Scatter plot of SiLU outputs 19 | ax.scatter( 20 | input_values, output_values, input_values, c=output_values, cmap="viridis" 21 | ) 22 | ax.set_xlabel("Input") 23 | ax.set_ylabel("Output") 24 | ax.set_zlabel("Input") 25 | ax.set_title("3D Visualization of SiLU Activation Function") 26 | 27 | plt.show() 28 | -------------------------------------------------------------------------------- /examples/simple_moe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch import SimpleMoE 3 | 4 | # Example usage: 5 | input_dim = 512 # Dimension of input tokens 6 | hidden_dim = 1024 # Hidden dimension of experts 7 | output_dim = 512 # Output dimension, should match input dimension for residual connection 8 | num_experts = 4 # Number of experts 9 | 10 | moe = SimpleMoE(input_dim, hidden_dim, output_dim, num_experts) 11 | 12 | # Create a sample input tensor (batch_size, seq_len, input_dim) 13 | x = torch.rand(10, 16, input_dim) 14 | 15 | # Forward pass through the MoE layer 16 | output = moe(x) 17 | print(output) 18 | -------------------------------------------------------------------------------- /examples/sop.py: -------------------------------------------------------------------------------- 1 | from swarms_torch import SPO 2 | 3 | # Example Usage 4 | goal_str = "Attention is all you need" 5 | optimizer = SPO(goal_str) 6 | optimizer.optimize() 7 | print(f"Optimized String: {optimizer.best_string()}") 8 | -------------------------------------------------------------------------------- /examples/swarmalator_example.py: -------------------------------------------------------------------------------- 1 | from swarms_torch import visualize_swarmalators, simulate_swarmalators 2 | 3 | # Init for Swarmalator 4 | # Example usage: 5 | N = 100 6 | J, alpha, beta, gamma, epsilon_a, epsilon_r, R = [0.1] * 7 7 | D = 3 # Ensure D is an integer 8 | xi, sigma_i = simulate_swarmalators( 9 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 10 | ) 11 | 12 | # Call the visualization function 13 | visualize_swarmalators(xi) 14 | -------------------------------------------------------------------------------- /examples/switch_moe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch import SwitchMoE 3 | 4 | # Example usage: 5 | input_dim = 768 # Dimension of input tokens 6 | hidden_dim = 2048 # Hidden dimension of experts 7 | output_dim = 768 # Output dimension, should match input dimension for residual connection 8 | num_experts = 16 # Number of experts 9 | 10 | moe_layer = SwitchMoE( 11 | dim=input_dim, 12 | hidden_dim=hidden_dim, 13 | output_dim=output_dim, 14 | num_experts=num_experts, 15 | use_aux_loss=False, 16 | ) 17 | 18 | # Create a sample input tensor (batch_size, seq_len, input_dim) 19 | x = torch.rand(32, 128, input_dim) 20 | 21 | # Forward pass through the MoE layer with auxiliary loss computation 22 | output, auxiliary_loss = moe_layer(x) 23 | 24 | # Now, 'output' contains the MoE output, and 'auxiliary_loss' contains the load balancing loss. 25 | # This auxiliary loss should be added to the main loss function during training. 26 | 27 | print(output) 28 | print(auxiliary_loss) 29 | -------------------------------------------------------------------------------- /multi_modal_mergers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.mergers.mm_mergers import ( 3 | modality_weighted_merge, 4 | modality_specific_layer_swap, 5 | cross_modality_weight_crossover, 6 | hierarchical_modality_fusion, 7 | modality_mutation_merge, 8 | ) 9 | 10 | if __name__ == "__main__": 11 | # Example models, replace with actual multi-modal model instances 12 | model_1 = torch.nn.Linear( 13 | 100, 50 14 | ) # Assume multi-modal model (e.g., image + text) 15 | model_2 = torch.nn.Linear(100, 50) 16 | model_3 = torch.nn.Linear(100, 50) 17 | 18 | # Perform Modality-Weighted Merge 19 | merged_model_wm = modality_weighted_merge( 20 | [model_1, model_2, model_3], modality_weights=[0.6, 0.3, 0.1] 21 | ) 22 | 23 | # Perform Modality-Specific Layer Swap 24 | merged_model_ls = modality_specific_layer_swap( 25 | [model_1, model_2], modality_layer_map=["image", "text"] 26 | ) 27 | 28 | # Perform Cross-Modality Weight Crossover 29 | merged_model_cm = cross_modality_weight_crossover( 30 | [model_1, model_2], modality_pairs=[(0, 1)], crossover_fraction=0.5 31 | ) 32 | 33 | # Perform Hierarchical Modality Fusion 34 | merged_model_hf = hierarchical_modality_fusion( 35 | [model_1, model_2, model_3], modality_hierarchy=[[0], [1, 2]] 36 | ) 37 | 38 | # Perform Modality Mutation Merge 39 | merged_model_mm = modality_mutation_merge( 40 | [model_1, model_2, model_3], mutation_rate=0.01 41 | ) 42 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core>=1.0.0"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "swarms-torch" 7 | version = "0.2.3" 8 | description = "swarms-torch - Pytorch" 9 | license = "MIT" 10 | authors = ["Kye Gomez "] 11 | homepage = "https://github.com/kyegomez/swarms-pytorch" 12 | documentation = "https://github.com/kyegomez/swarms-pytorch" # Add this if you have documentation. 13 | readme = "README.md" # Assuming you have a README.md 14 | repository = "https://github.com/kyegomez/swarms-pytorch" 15 | keywords = ["artificial intelligence", "deep learning", "optimizers", "Prompt Engineering"] 16 | classifiers = [ 17 | "Development Status :: 4 - Beta", 18 | "Intended Audience :: Developers", 19 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 20 | "License :: OSI Approved :: MIT License", 21 | "Programming Language :: Python :: 3.6" 22 | ] 23 | packages = [ 24 | { include = "swarms_torch" }, 25 | { include = "swarms_torch/**/*.py" }, 26 | ] 27 | 28 | 29 | 30 | [tool.poetry.dependencies] 31 | python = "^3.6" 32 | torch = "*" 33 | einops = "*" 34 | zetascale = "*" 35 | pytest = "*" 36 | torchvision = "*" 37 | loguru = "*" 38 | einx = "*" 39 | 40 | 41 | 42 | 43 | [tool.poetry.group.lint.dependencies] 44 | ruff = ">=0.0.249,<0.1.10" 45 | types-toml = "^0.10.8.1" 46 | types-redis = "^4.3.21.6" 47 | types-pytz = "^2023.3.0.0" 48 | black = "^23.1.0" 49 | types-chardet = "^5.0.4.6" 50 | mypy-protobuf = "^3.0.0" 51 | 52 | 53 | [tool.autopep8] 54 | max_line_length = 80 55 | ignore = "E501,W6" # or ["E501", "W6"] 56 | in-place = true 57 | recursive = true 58 | aggressive = 3 59 | 60 | [tool.ruff] 61 | line-length = 80 62 | 63 | [tool.black] 64 | line-length = 80 65 | target-version = ['py38'] 66 | preview = true -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python 2 | torch 3 | einops 4 | zetascale 5 | pytest 6 | torchvision 7 | loguru 8 | einx 9 | 10 | -------------------------------------------------------------------------------- /scripts/code_quality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Navigate to the directory containing the 'swarms' folder 4 | # cd /path/to/your/code/directory 5 | 6 | # Run autopep8 with max aggressiveness (-aaa) and in-place modification (-i) 7 | # on all Python files (*.py) under the 'swarms' directory. 8 | autopep8 --in-place --aggressive --aggressive --recursive --experimental --list-fixes swarms/ 9 | 10 | # Run black with default settings, since black does not have an aggressiveness level. 11 | # Black will format all Python files it finds in the 'swarms' directory. 12 | black --experimental-string-processing swarms/ 13 | 14 | # Run ruff on the 'swarms' directory. 15 | # Add any additional flags if needed according to your version of ruff. 16 | ruff --unsafe_fix 17 | 18 | # YAPF 19 | yapf --recursive --in-place --verbose --style=google --parallel swarms 20 | -------------------------------------------------------------------------------- /scripts/get_package_requirements.py: -------------------------------------------------------------------------------- 1 | import pkg_resources 2 | 3 | 4 | def get_package_versions(requirements_path, output_path): 5 | try: 6 | with open(requirements_path, "r") as file: 7 | requirements = file.readlines() 8 | except FileNotFoundError: 9 | print(f"Error: The file '{requirements_path}' was not found.") 10 | return 11 | 12 | package_versions = [] 13 | 14 | for requirement in requirements: 15 | # Skip empty lines and comments 16 | if requirement.strip() == "" or requirement.strip().startswith("#"): 17 | continue 18 | 19 | # Extract package name 20 | package_name = requirement.split("==")[0].strip() 21 | try: 22 | version = pkg_resources.get_distribution(package_name).version 23 | package_versions.append(f"{package_name}=={version}") 24 | except pkg_resources.DistributionNotFound: 25 | package_versions.append(f"{package_name}: not installed") 26 | 27 | with open(output_path, "w") as file: 28 | for package_version in package_versions: 29 | file.write(package_version + "\n") 30 | print(f"Versions written to {output_path}") 31 | 32 | 33 | # Usage 34 | get_package_versions("requirements.txt", "installed_versions.txt") 35 | -------------------------------------------------------------------------------- /scripts/requirementstxt_to_pyproject.py: -------------------------------------------------------------------------------- 1 | import toml 2 | import pkg_resources 3 | 4 | 5 | def update_pyproject_versions(pyproject_path): 6 | try: 7 | with open(pyproject_path, "r") as file: 8 | data = toml.load(file) 9 | except FileNotFoundError: 10 | print(f"Error: The file '{pyproject_path}' was not found.") 11 | return 12 | except toml.TomlDecodeError: 13 | print(f"Error: The file '{pyproject_path}' is not a valid TOML file.") 14 | return 15 | 16 | dependencies = ( 17 | data.get("tool", {}).get("poetry", {}).get("dependencies", {}) 18 | ) 19 | 20 | for package in dependencies: 21 | if package.lower() == "python": 22 | continue # Skip the Python version dependency 23 | 24 | try: 25 | version = pkg_resources.get_distribution(package).version 26 | dependencies[package] = version 27 | except pkg_resources.DistributionNotFound: 28 | print(f"Warning: Package '{package}' not installed.") 29 | 30 | with open(pyproject_path, "w") as file: 31 | toml.dump(data, file) 32 | 33 | print(f"Updated versions written to {pyproject_path}") 34 | 35 | 36 | # Usage 37 | update_pyproject_versions("pyproject.toml") 38 | -------------------------------------------------------------------------------- /scripts/test_name.sh: -------------------------------------------------------------------------------- 1 | find ./tests -name "*.py" -type f | while read file 2 | do 3 | filename=$(basename "$file") 4 | dir=$(dirname "$file") 5 | if [[ $filename != test_* ]]; then 6 | mv "$file" "$dir/test_$filename" 7 | fi 8 | done -------------------------------------------------------------------------------- /scripts/tests.sh: -------------------------------------------------------------------------------- 1 | find ./tests -name '*.py' -exec pytest {} \; -------------------------------------------------------------------------------- /swarms_torch/__init__.py: -------------------------------------------------------------------------------- 1 | from swarms_torch.structs.ant_colony_swarm import AntColonyOptimization 2 | from swarms_torch.structs.cellular_transformer import CellularSwarm 3 | from swarms_torch.structs.fish_school import Fish, FishSchool 4 | from swarms_torch.structs.hivemind_swarm_transformer import HivemindSwarm 5 | from swarms_torch.structs.mixture_of_mamba import MixtureOfMambas 6 | from swarms_torch.pso.multi_swarm_pso import MultiSwarmPSO 7 | from swarms_torch.structs.neuronal_transformer import NNTransformer 8 | from swarms_torch.utils.particle_swarm import ParticleSwarmOptimization 9 | from swarms_torch.structs.queen_bee import QueenBeeGa 10 | from swarms_torch.utils.spiral_optimization import SPO 11 | from swarms_torch.pso.transformer_pso import ( 12 | Particle, 13 | TransformerParticleSwarmOptimization, 14 | ) 15 | from swarms_torch.structs.firefly import FireflyOptimizer 16 | from queen_bee_transformer_hierarchy import ( 17 | QueenBeeTransformerHierarchy, 18 | GeneticTransformerEvolution, 19 | QueenWorkerCommunication, 20 | WorkerTransformer, 21 | ) 22 | from swarms_torch.structs import * # noqa 23 | 24 | __all__ = [ 25 | "ParticleSwarmOptimization", 26 | "AntColonyOptimization", 27 | "QueenBeeGa", 28 | "NNTransformer", 29 | "CellularSwarm", 30 | "SPO", 31 | "Fish", 32 | "FishSchool", 33 | "MultiSwarmPSO", 34 | "Particle", 35 | "TransformerParticleSwarmOptimization", 36 | "HivemindSwarm", 37 | "MixtureOfMambas", 38 | "FireflyOptimizer", 39 | "QueenBeeTransformerHierarchy", 40 | "GeneticTransformerEvolution", 41 | "QueenWorkerCommunication", 42 | "WorkerTransformer", 43 | ] 44 | -------------------------------------------------------------------------------- /swarms_torch/mergers/__init__.py: -------------------------------------------------------------------------------- 1 | from swarms_torch.mergers.all_new_evo_mergers import ( 2 | hyperslice_merge, 3 | random_subspace_merge, 4 | dimensional_cross_fusion, 5 | weighted_evolutionary_crossover, 6 | permutation_weight_swapping, 7 | ) 8 | from swarms_torch.mergers.mm_mergers import ( 9 | modality_weighted_merge, 10 | modality_specific_layer_swap, 11 | cross_modality_weight_crossover, 12 | hierarchical_modality_fusion, 13 | modality_mutation_merge, 14 | ) 15 | 16 | __all__ = [ 17 | "hyperslice_merge", 18 | "random_subspace_merge", 19 | "dimensional_cross_fusion", 20 | "weighted_evolutionary_crossover", 21 | "permutation_weight_swapping", 22 | "modality_weighted_merge", 23 | "modality_specific_layer_swap", 24 | "cross_modality_weight_crossover", 25 | "hierarchical_modality_fusion", 26 | "modality_mutation_merge", 27 | ] 28 | -------------------------------------------------------------------------------- /swarms_torch/pso/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/swarms-pytorch/0fa9adeb7da3e33ec53dba65b985c606a7ff558a/swarms_torch/pso/__init__.py -------------------------------------------------------------------------------- /swarms_torch/pso/multi_swarm_pso.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | 4 | 5 | class MultiSwarmPSO: 6 | """ 7 | Multi-Swarm PSO Algorithm 8 | 9 | Parameters 10 | ---------- 11 | target_string : str 12 | The target string to be generated 13 | num_sub_swarms : int 14 | The number of sub-swarms 15 | num_particles_per_swarm : int 16 | The number of particles per sub-swarm 17 | max_iterations : int 18 | The maximum number of iterations to run the algorithm 19 | 20 | Attributes 21 | ---------- 22 | target_string : str 23 | The target string to be generated 24 | num_sub_swarms : int 25 | The number of sub-swarms 26 | num_particles_per_swarm : int 27 | The number of particles per sub-swarm 28 | num_dimensions : int 29 | The number of dimensions in the search space 30 | max_iterations : int 31 | The maximum number of iterations to run the algorithm 32 | 33 | Methods 34 | ------- 35 | generate_random_string() 36 | Generates a random string of length num_dimensions 37 | fitness_function(position) 38 | Calculates the fitness of a given position 39 | diversification_method(sub_swarms) 40 | Adds a new sub-swarm if the number of sub-swarms is less than the maximum 41 | optimize() 42 | Runs the Multi-Swarm PSO algorithm 43 | 44 | References 45 | ---------- 46 | .. [1] https://www.researchgate.net/publication/221172800_Multi-swarm_Particle_Swarm_Optimization 47 | 48 | 49 | Usage: 50 | ------ 51 | target_string = "hello world" 52 | multi_swarm = MultiSwarm(target_string) 53 | multi_swarm.optimize() 54 | 55 | 56 | 57 | """ 58 | 59 | def __init__( 60 | self, 61 | target_string, 62 | num_sub_swarms=5, 63 | num_particles_per_swarm=20, 64 | max_iterations=100, 65 | ): 66 | self.target_string = target_string 67 | self.num_sub_swarms = num_sub_swarms 68 | self.num_particles_per_swarm = num_particles_per_swarm 69 | self.num_dimensions = len(target_string) 70 | self.max_iterations = max_iterations 71 | 72 | def generate_random_string(self): 73 | """ 74 | Generates a random string of length num_dimensions 75 | 76 | """ 77 | return "".join( 78 | random.choice(string.ascii_lowercase + " ") 79 | for _ in range(self.num_dimensions) 80 | ) 81 | 82 | def fitness_function(self, position): 83 | """Fitness function to be maximized""" 84 | fitness = sum(a == b for a, b in zip(position, self.target_string)) 85 | return fitness 86 | 87 | def diversification_method(self, sub_swarms): 88 | """Diversification method to add a new sub-swarm if the number of sub-swarms is less than the maximum""" 89 | if len(sub_swarms) < self.num_sub_swarms: 90 | new_sub_swarm = [ 91 | self.generate_random_string() 92 | for _ in range(self.num_particles_per_swarm) 93 | ] 94 | sub_swarms.append(new_sub_swarm) 95 | 96 | def optimize(self): 97 | """Optimizes the fitness function""" 98 | sub_swarms = [ 99 | [ 100 | self.generate_random_string() 101 | for _ in range(self.num_particles_per_swarm) 102 | ] 103 | for _ in range(self.num_sub_swarms) 104 | ] 105 | 106 | for iteration in range(self.max_iterations): 107 | for sub_swarm in sub_swarms: 108 | for particle in sub_swarm: 109 | fitness = self.fitness_function(particle) 110 | if fitness > 0: 111 | index_to_change = random.randint( 112 | 0, self.num_dimensions - 1 113 | ) 114 | new_char = random.choice(string.ascii_lowercase + " ") 115 | new_position = list(particle) 116 | new_position[index_to_change] = new_char 117 | new_position = "".join(new_position) 118 | particle = new_position 119 | 120 | self.diversification_method(sub_swarms) 121 | 122 | global_best_fitness = max( 123 | self.fitness_function(particle) 124 | for sub_swarm in sub_swarms 125 | for particle in sub_swarm 126 | ) 127 | global_best_position = [ 128 | particle 129 | for sub_swarm in sub_swarms 130 | for particle in sub_swarm 131 | if self.fitness_function(particle) == global_best_fitness 132 | ][0] 133 | print( 134 | f"Iteration {iteration}: Global Best Fitness =" 135 | f" {global_best_fitness}, Global Best Position =" 136 | f" {global_best_position}" 137 | ) 138 | 139 | global_best_fitness = max( 140 | self.fitness_function(particle) 141 | for sub_swarm in sub_swarms 142 | for particle in sub_swarm 143 | ) 144 | global_best_position = [ 145 | particle 146 | for sub_swarm in sub_swarms 147 | for particle in sub_swarm 148 | if self.fitness_function(particle) == global_best_fitness 149 | ][0] 150 | print( 151 | f"Final Result: Global Best Fitness = {global_best_fitness}, Global" 152 | f" Best Position = {global_best_position}" 153 | ) 154 | 155 | 156 | # Example usage 157 | if __name__ == "__main__": 158 | target_string = "hello world" 159 | multi_swarm = MultiSwarmPSO(target_string) 160 | multi_swarm.optimize() 161 | -------------------------------------------------------------------------------- /swarms_torch/pso/multi_swarm_pso2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Particle: 5 | def __init__(self, dim, minx, maxx): 6 | self.position = torch.rand(dim) * (maxx - minx) + minx 7 | self.velocity = torch.rand(dim) * (maxx - minx) + minx 8 | self.best_position = self.position.clone() 9 | self.best_score = float("inf") 10 | 11 | def update_velocity(self, global_best, w=0.7, c1=1.5, c2=1.5): 12 | r1 = torch.rand(self.position.size()) 13 | r2 = torch.rand(self.position.size()) 14 | self.velocity = ( 15 | w * self.velocity 16 | + c1 * r1 * (self.best_position - self.position) 17 | + c2 * r2 * (global_best - self.position) 18 | ) 19 | 20 | def update_position(self, minx, maxx): 21 | self.position += self.velocity 22 | self.position = torch.clamp(self.position, minx, maxx) 23 | 24 | 25 | class Swarm: 26 | def __init__(self, num_particles, dim, minx, maxx): 27 | self.particles = [ 28 | Particle(dim, minx, maxx) for _ in range(num_particles) 29 | ] 30 | self.global_best = None 31 | self.global_best_score = float("inf") 32 | 33 | def update_global_best(self): 34 | for particle in self.particles: 35 | if particle.best_score < self.global_best_score: 36 | self.global_best = particle.best_position.clone() 37 | self.global_best_score = particle.best_score 38 | 39 | def move_particles(self, minx, maxx): 40 | for particle in self.particles: 41 | particle.update_velocity(self.global_best) 42 | particle.update_position(minx, maxx) 43 | 44 | 45 | class MultiSwarm: 46 | def __init__(self, num_swarms, num_particles, dim, minx, maxx): 47 | self.swarms = [ 48 | Swarm(num_particles, dim, minx, maxx) for _ in range(num_swarms) 49 | ] 50 | self.minx = minx 51 | self.maxx = maxx 52 | 53 | def optimize(self, func, max_iter): 54 | for _ in range(max_iter): 55 | for swarm in self.swarms: 56 | swarm.update_global_best() 57 | swarm.move_particles(self.minx, self.maxx) 58 | best_swarm = min(self.swarms, key=lambda s: s.global_best_score) 59 | return best_swarm.global_best, best_swarm.global_best_score 60 | 61 | 62 | def rosenbrock(x, a=1, b=100): 63 | return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2 64 | 65 | 66 | # num_swarms = 5 67 | # num_particles = 20 68 | # dim = 2 69 | # minx = -5 70 | # maxx = 5 71 | # max_iter = 100 72 | 73 | # multi_swarm = MultiSwarm(num_swarms, num_particles, dim, minx, maxx) 74 | 75 | # best_position, best_score = multi_swarm.optimize(rosenbrock, max_iter) 76 | 77 | # print(f"Best position: {best_position}") 78 | # print(f"Best score: {best_score}") 79 | -------------------------------------------------------------------------------- /swarms_torch/pso/multi_swarm_pso_transformer.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | # import torch.nn as nn 3 | # from copy import deepcopy 4 | # from swarms_torch.transformer_pso import Particle, TransformerParticleSwarmOptimization 5 | 6 | # class MultiSwarm(nn.Module): 7 | # def __init__( 8 | # self, 9 | # num_swarms, 10 | # *args, 11 | # **kwargs 12 | # ): 13 | # #create multiple instances of the transformerparticle swarm optimization 14 | # self.swarms = [TransformerParticleSwarmOptimization(*args, **kwargs) for _ in range(num_swarms)] 15 | # self.num_swarms = num_swarms 16 | 17 | # def optimize(self, iterations): 18 | # for _ in range(iterations): 19 | # #update each swarm 20 | # for swarm in self.swarms: 21 | # swarm.update() 22 | 23 | # #apply diversification strategy 24 | # self.diversification_method() 25 | 26 | # def diversification_strategy(self): 27 | # for i in range(self.num_swarms): 28 | # for j in range(i + 1, self.num_swarms): 29 | # if self.is_collided(self.swarms[i].global_best, self.swarms[j].global_best): 30 | # #handle collision by launching a new swarm or re init one of the swarms 31 | # self.handle_collision(i, j) 32 | 33 | # def is_collided(self, global_best_1, global_best_2): 34 | # #Check if difference between the global bests or 2 swarms is below a threshold 35 | # diff = sum((global_best_1[key] - global_best_2[key]).abs().sum() for key in global_best_1.keys()) 36 | # COLLISION_THRESHOLD = 0.1 37 | 38 | # return diff < COLLISION_THRESHOLD 39 | 40 | # def handle_collision(self, idx1, idx2): 41 | # #for simplicity re init 2nd swarm 42 | # self.swarms[idx2] = TransformerParticleSwarmOptimization(*self.swarms[idx2].model_args, **self.swarms[idx2].kwargs) 43 | 44 | # import torch 45 | # from torch.utils.data import DataLoader, TensorDataset 46 | 47 | # # Generate random data 48 | # num_samples = 1000 49 | # input_dim = 50 # Length of input sequence 50 | # num_classes = 2 51 | 52 | # inputs = torch.randint(0, 1000, (num_samples, input_dim)) 53 | # targets = torch.randint(0, num_classes, (num_samples,)) 54 | 55 | # dataset = TensorDataset(inputs, targets) 56 | # data_loader = DataLoader(dataset, batch_size=32, shuffle=True) 57 | 58 | # # Define hyperparameters and model arguments 59 | # model_args = (1000, 512, 8, 6, 2) # (input_dim, d_model, nhead, num_layers, output_dim) 60 | # optimizer_args = { 61 | # "model_constructor": Particle, 62 | # "model_args": model_args, 63 | # "device": "cpu", 64 | # "criterion": torch.nn.CrossEntropyLoss(), 65 | # "data_loader": data_loader 66 | # } 67 | 68 | # # Create MultiSwarmOptimizer 69 | # num_swarms = 3 70 | # mso = MultiSwarm(num_swarms, **optimizer_args) 71 | 72 | # # Optimize 73 | # mso.optimize(iterations=10) 74 | 75 | # # Get the best model from the best-performing swarm 76 | # best_swarm = max(mso.swarms, key=lambda s: s.compute_fitness(s.global_best)) 77 | # best_model = best_swarm.get_best_model() 78 | 79 | import torch 80 | import torch.nn as nn 81 | from copy import deepcopy 82 | 83 | 84 | class Particle(nn.Module): 85 | def __init__(self, input_dim, hidden_dim, output_dim): 86 | super(Particle, self).__init__() 87 | self.transformer = nn.Transformer(input_dim, hidden_dim) 88 | self.fc = nn.Linear(hidden_dim, output_dim) 89 | 90 | def forward(self, x): 91 | x = self.transformer(x) 92 | x = self.fc(x) 93 | return x 94 | 95 | 96 | class MultiSwarmOptimizer: 97 | def __init__( 98 | self, 99 | particle, 100 | num_particles, 101 | num_subswarms, 102 | fitness_func, 103 | bounds, 104 | num_epochs, 105 | ): 106 | self.particle = particle 107 | self.num_particles = num_particles 108 | self.num_subswarms = num_subswarms 109 | self.fitness_func = fitness_func 110 | self.bounds = bounds 111 | self.num_epochs = num_epochs 112 | 113 | self.subswarms = [] 114 | for _ in range(num_subswarms): 115 | self.subswarms.append( 116 | [deepcopy(particle) for _ in range(num_particles)] 117 | ) 118 | 119 | def optimize(self): 120 | for epoch in range(self.num_epochs): 121 | for subswarm in self.subswarms: 122 | for particle in subswarm: 123 | fitness = self.fitness_func(particle) 124 | if fitness > particle.best_fitness: 125 | particle.best_fitness = fitness 126 | particle.best_position = deepcopy(particle.position) 127 | 128 | best_particle = max(subswarm, key=lambda p: p.best_fitness) 129 | for particle in subswarm: 130 | particle.velocity = ( 131 | particle.velocity 132 | + 0.5 * (particle.best_position - particle.position) 133 | + 0.5 134 | * (best_particle.best_position - particle.position) 135 | ) 136 | particle.position = particle.position + particle.velocity 137 | particle.position = torch.clamp( 138 | particle.position, *self.bounds 139 | ) 140 | 141 | best_subswarm = max( 142 | self.subswarms, key=lambda s: max(p.best_fitness for p in s) 143 | ) 144 | best_particle = max(best_subswarm, key=lambda p: p.best_fitness) 145 | print( 146 | f"Epoch {epoch+1}/{self.num_epochs}, Best Fitness:" 147 | f" {best_particle.best_fitness}" 148 | ) 149 | 150 | best_subswarm = max( 151 | self.subswarms, key=lambda s: max(p.best_fitness for p in s) 152 | ) 153 | best_particle = max(best_subswarm, key=lambda p: p.best_fitness) 154 | return best_particle 155 | 156 | def get_best_model(self): 157 | return self.particle 158 | 159 | 160 | # import torch 161 | # import torch.nn as nn 162 | # from random import random 163 | 164 | 165 | # # Define the fitness function 166 | # def fitness_func(particle): 167 | # # This is a dummy fitness function. Replace it with your own. 168 | # return random() 169 | 170 | 171 | # # Define the bounds for the particle positions 172 | # bounds = (-1.0, 1.0) 173 | 174 | # # Define the number of particles, sub-swarms, and epochs 175 | # num_particles = 10 176 | # num_subswarms = 5 177 | # num_epochs = 100 178 | 179 | # # Define the dimensions for the transformer model 180 | # input_dim = 10 181 | # hidden_dim = 20 182 | # output_dim = 2 183 | 184 | # # Create a particle (transformer model) 185 | # particle = Particle(input_dim, hidden_dim, output_dim) 186 | 187 | # # Create the multi-swarm optimizer 188 | # optimizer = MultiSwarmOptimizer( 189 | # particle, num_particles, num_subswarms, fitness_func, bounds, num_epochs 190 | # ) 191 | 192 | # # Run the optimization 193 | # best_particle = optimizer.optimize() 194 | 195 | # # The best_particle is the model with the highest fitness score 196 | # print(best_particle) 197 | -------------------------------------------------------------------------------- /swarms_torch/pso/transformer_pso.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from copy import deepcopy 4 | 5 | 6 | class Particle(nn.Module): 7 | """ 8 | Simple Transformer model for classification. 9 | 10 | Parameters 11 | ---------- 12 | input_dim : int 13 | The number of expected features in the input (required). 14 | d_model : int 15 | The number of expected features in the encoder/decoder inputs (required). 16 | nhead : int 17 | The number of heads in the multiheadattention models (required). 18 | num_layers : int 19 | The number of sub-encoder-layers in the encoder (required). 20 | output_dim : int 21 | The number of classes to predict (required). 22 | 23 | Usage: 24 | >>> model = SimpleTransformer(1000, 512, 8, 6, 10) 25 | >>> model(x) 26 | 27 | 28 | """ 29 | 30 | def __init__(self, input_dim, d_model, nhead, num_layers, output_dim): 31 | super(Particle, self).__init__() 32 | self.embedding = nn.Embedding(input_dim, d_model) 33 | self.transformer = nn.Transformer( 34 | d_model, nhead, num_layers, num_layers 35 | ) 36 | self.fc = nn.Linear(d_model, output_dim) 37 | 38 | def forward(self, x): 39 | """ 40 | Forward pass through the model. 41 | 42 | """ 43 | x = self.embedding(x) 44 | x = self.transformer(x, x) 45 | return self.fc(x[-1]) 46 | 47 | 48 | class TransformerParticleSwarmOptimization(nn.Module): 49 | """ 50 | Transformer Particle Swarm Optimization. 51 | 52 | Parameters 53 | ---------- 54 | model_constructor : function 55 | Function to create a new model instance. 56 | model_args : tuple 57 | Arguments for the model constructor. 58 | device : str 59 | 'cuda' or 'cpu'. 60 | criterion : nn.Module 61 | Loss function. 62 | data_loader : torch.utils.data.DataLoader 63 | Data loader. 64 | n_particles : int 65 | Number of particles. 66 | inertia : float 67 | Inertia weight. 68 | personal_best_weight : float 69 | Personal best weight. 70 | global_best_weight : float 71 | Global best weight. 72 | 73 | Usage: 74 | >>> pso = TransformerParticleSwarmOptimization( 75 | ... SimpleTransformer, 76 | ... (1000, 512, 8, 6, 10), 77 | ... device="cuda", 78 | ... criterion=nn.CrossEntropyLoss(), 79 | ... data_loader=your_dataloader 80 | ... ) 81 | 82 | """ 83 | 84 | def __init__( 85 | self, 86 | model_constructor, # Function to create a new model instance 87 | model_args, # Arguments for the model constructor 88 | device, # 'cuda' or 'cpu' 89 | criterion, 90 | data_loader, 91 | n_particles=10, 92 | inertia=0.5, 93 | personal_best_weight=1.5, 94 | global_best_weight=1.5, 95 | ): 96 | super(TransformerParticleSwarmOptimization, self).__init__() 97 | self.model_constructor = model_constructor 98 | self.model_args = model_args 99 | self.criterion = criterion 100 | self.data_loader = data_loader 101 | self.device = device 102 | 103 | self.n_particles = n_particles 104 | self.inertia = inertia 105 | self.personal_best_weight = personal_best_weight 106 | self.global_best_weight = global_best_weight 107 | 108 | # Representing particles using model parameters 109 | param_size = sum( 110 | p.numel() for p in model_constructor(*model_args).parameters() 111 | ) 112 | self.particles = [ 113 | self.model_constructor(*model_args).to(device) 114 | for _ in range(n_particles) 115 | ] 116 | self.velocities = [ 117 | torch.zeros((param_size,)).to(device) for _ in range(n_particles) 118 | ] 119 | self.personal_best = [deepcopy(p.state_dict()) for p in self.particles] 120 | self.global_best = deepcopy(self.particles[0].state_dict()) 121 | 122 | def compute_fitness(self, model_state): 123 | """ 124 | Compute the fitness of a model. 125 | """ 126 | model = self.model_constructor(*self.model_args).to(self.device) 127 | model.load_state_dict(model_state) 128 | model.eval() 129 | 130 | total_loss = 0.0 131 | with torch.no_grad(): 132 | for inputs, targets in self.data_loader: 133 | outputs = model(inputs.to(self.device)) 134 | loss = self.criterion(outputs, targets.to(self.device)) 135 | total_loss += loss.item() 136 | return 1.0 / (1.0 + total_loss) 137 | 138 | def update(self): 139 | """ 140 | Update particles. 141 | """ 142 | # Update particles 143 | for idx, particle in enumerate(self.particles): 144 | fitness = self.compute_fitness(particle.state_dict()) 145 | 146 | # Update personal best 147 | if fitness > self.compute_fitness(self.personal_best[idx]): 148 | self.personal_best[idx] = deepcopy(particle.state_dict()) 149 | 150 | # Update global best 151 | if fitness > self.compute_fitness(self.global_best): 152 | self.global_best = deepcopy(particle.state_dict()) 153 | 154 | # Update velocities and positions 155 | for name, param in particle.named_parameters(): 156 | delta = self.personal_best_weight * torch.rand_like(param) * ( 157 | self.personal_best[idx][name].to(self.device) - param.data 158 | ) + self.global_best_weight * torch.rand_like(param) * ( 159 | self.global_best[name].to(self.device) - param.data 160 | ) 161 | self.velocities[idx] += ( 162 | self.inertia * self.velocities[idx] + delta 163 | ) 164 | param.data += self.velocities[idx] 165 | 166 | def optimize(self, iterations=1000): 167 | """Optimize the model.""" 168 | for _ in range(iterations): 169 | self.update() 170 | best_particle_score = self.compute_fitness(self.global_best) 171 | print( 172 | f"Iteration {_ + 1}/{iterations} - Best Particle Fitness:" 173 | f" {best_particle_score}" 174 | ) 175 | 176 | def get_best_model(self): 177 | """Get the best model.""" 178 | best_model = self.model_constructor(*self.model_args).to(self.device) 179 | best_model.load_state_dict(self.global_best) 180 | return best_model 181 | 182 | 183 | # # Define model and optimization parameters 184 | # input_dim = 1000 185 | # d_model = 512 186 | # nhead = 8 187 | # num_layers = 3 188 | # output_dim = 10 189 | 190 | # batch_size = 32 191 | # sequence_length = 50 192 | 193 | # # Instantiate the optimizer 194 | # pso = ParticleSwarmOptimization( 195 | # SimpleTransformer, 196 | # (input_dim, d_model, nhead, num_layers, output_dim), 197 | # device="cuda", # or 'cpu' 198 | # criterion=nn.CrossEntropyLoss(), 199 | # # data_loader=your_dataloader # replace with your dataloader 200 | # ) 201 | 202 | # # Run optimization 203 | # pso.optimize(iterations=100) 204 | 205 | # # Get the best model 206 | # best_model = pso.get_best_model() 207 | 208 | # # Generate a random input tensor 209 | # x = torch.randint(0, input_dim, (batch_size, sequence_length)).to( 210 | # "cuda" 211 | # ) # ensure it's on the same device as your model 212 | 213 | # # Pass the tensor through the model 214 | # output = best_model(x) 215 | # print(output.shape) # should be [batch_size, output_dim] 216 | -------------------------------------------------------------------------------- /swarms_torch/structs/__init__.py: -------------------------------------------------------------------------------- 1 | from swarms_torch.structs.parallel_wrapper import ParallelSwarm 2 | from swarms_torch.structs.switch_moe import SwitchGate, SwitchMoE 3 | from swarms_torch.structs.simple_moe import GatingMechanism, SimpleMoE 4 | from queen_bee_transformer_hierarchy import ( 5 | QueenBeeTransformerHierarchy, 6 | GeneticTransformerEvolution, 7 | QueenWorkerCommunication, 8 | WorkerTransformer, 9 | ) 10 | 11 | __all__ = [ 12 | "ParallelSwarm", 13 | "SwitchGate", 14 | "SwitchMoE", 15 | "GatingMechanism", 16 | "SimpleMoE", 17 | "QueenBeeTransformerHierarchy", 18 | "GeneticTransformerEvolution", 19 | "QueenWorkerCommunication", 20 | "WorkerTransformer", 21 | ] 22 | -------------------------------------------------------------------------------- /swarms_torch/structs/ant_colony_swarm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class AntColonyOptimization(nn.Module): 6 | """ 7 | Ant Colony Optimization 8 | Overview: https://en.wikipedia.org/wiki/Ant_colony_optimization_algorithms 9 | 10 | How does it work? 11 | 1. Initialize pheromone levels for each path 12 | 2. For each ant, choose the next path based on the pheromone levels 13 | 3. Update the pheromone levels 14 | 4. Repeat step 2 to 3 until the maximum number of iterations is reached 15 | 16 | Parameters 17 | ---------- 18 | goal: str 19 | The goal string to be optimized 20 | num_ants: int 21 | Number of ants 22 | evaporation_rate: float 23 | Evaporation rate 24 | 25 | Usage 26 | ----- 27 | from swarms_torch import AntColonyOptimization 28 | 29 | goal_string = "Hello ACO" 30 | aco = AntColonyOptimization(goal_string, num_iterations=1000) 31 | best_solution = aco.optimize() 32 | 33 | print("Best Matched String:", best_solution) 34 | 35 | Features to implement 36 | -------- 37 | 1. Add a stopping criterion 38 | 2. Add a callback function to track the progress 39 | 3. Add a function to plot the pheromone levels 40 | 4. Add a function to plot the ants 41 | 5. Add a function to plot the best solution 42 | 43 | """ 44 | 45 | def __init__( 46 | self, 47 | goal: str = None, 48 | num_ants: int = 10000, 49 | evaporation_rate: float = 0.1, 50 | alpha: int = 1, 51 | beta: int = 1, 52 | num_iterations: int = 10010, 53 | ): 54 | self.goal = torch.tensor([ord(c) for c in goal], dtype=torch.float32) 55 | self.num_ants = num_ants 56 | self.evaporation_rate = evaporation_rate 57 | self.alpha = alpha 58 | self.beta = beta 59 | self.num_iterations = num_iterations 60 | # Pheromone levels can be initialized for different paths 61 | # (architectures) 62 | self.pheromones = torch.ones(num_ants) 63 | self.solutions = [] 64 | 65 | def fitness(self, solution): 66 | """Fitness of a solution""" 67 | return -torch.norm(solution - self.goal) 68 | 69 | def update_pheromones(self): 70 | """Update pheromone levels""" 71 | for i, solution in enumerate(self.solutions): 72 | self.pheromones[i] = (1 - self.evaporation_rate) * self.pheromones[ 73 | i 74 | ] + self.fitness(solution) 75 | 76 | def choose_next_path(self): 77 | """Choose the next path based on the pheromone levels""" 78 | probabilities = (self.pheromones**self.alpha) * ( 79 | (1.0 / (1 + self.pheromones)) ** self.beta 80 | ) 81 | 82 | probabilities /= probabilities.sum() 83 | 84 | return torch.multinomial(probabilities, num_samples=1).item() 85 | 86 | def optimize(self): 87 | """Optimize the goal string""" 88 | for iteration in range(self.num_iterations): 89 | self.solutions = [] 90 | for _ in range(self.num_ants): 91 | # This is a placeholder. Actual implementation will define how 92 | # ants traverse the search space. 93 | solution = torch.randint( 94 | 32, 127, (len(self.goal),), dtype=torch.float32 95 | ) # Random characters. 96 | self.solutions.append(solution) 97 | self.update_pheromones() 98 | 99 | best_solution_index = self.pheromones.argmax().item() 100 | best_solution = self.solutions[best_solution_index] 101 | return "".join([chr(int(c)) for c in best_solution]) 102 | -------------------------------------------------------------------------------- /swarms_torch/structs/cellular_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class TransformerCell(nn.Module): 6 | def __init__( 7 | self, 8 | input_dim, 9 | nhead, 10 | num_layers=1, 11 | neighborhood_size=3, 12 | ): 13 | super(TransformerCell, self).__init__() 14 | self.transformer = nn.Transformer( 15 | input_dim, nhead=nhead, num_encoder_layers=num_layers 16 | ) 17 | self.neighborhood_size = neighborhood_size 18 | 19 | def forward(self, x, neigbors): 20 | x = self.transformer(x, x) 21 | 22 | out = torch.cat([x] + neigbors, dim=0) 23 | 24 | return out 25 | 26 | 27 | class CellularSwarm(nn.Module): 28 | """ 29 | CellularSwarm 30 | 31 | Architecture: 32 | - Input -> TransformerCell -> TransformerCell -> ... -> Output 33 | 34 | Overview: 35 | CellularSwarm is a cellular neural network that uses a transformer cell 36 | to process the input. 37 | 38 | Args: 39 | cell_count (int): Number of transformer cells 40 | input_dim (int): Input dimension 41 | nhead (int): Number of heads in the transformer cell 42 | time_steps (int): Number of time steps to run the network 43 | 44 | Returns: 45 | torch.Tensor: Output tensor 46 | 47 | Usage: 48 | >>> x = torch.randn(10, 32, 512) 49 | >>> model = CellularSwarm(cell_count=5, input_dim=512, nhead=8) 50 | >>> output = model(x) 51 | >>> print(output) 52 | 53 | 54 | """ 55 | 56 | def __init__(self, cell_count, input_dim, nhead, time_steps=4): 57 | super(CellularSwarm, self).__init__() 58 | self.cells = nn.ModuleList( 59 | [TransformerCell(input_dim, nhead) for _ in range(cell_count)] 60 | ) 61 | self.time_steps = time_steps 62 | 63 | def forward(self, x): 64 | for _ in range(self.time_steps): 65 | for i, cell in enumerate(self.cells): 66 | # get neighboring cells states 67 | start_idx = max(0, i - cell.neighborhood_size) 68 | 69 | end_idx = min(len(self.cells), i + cell.neighborhood_size + 1) 70 | 71 | neighbors = [ 72 | self.cells[j].transformer(x, x) 73 | for j in range(start_idx, end_idx) 74 | if j != i 75 | ] 76 | 77 | x = cell(x, neighbors) 78 | return x 79 | -------------------------------------------------------------------------------- /swarms_torch/structs/graph_cellular_automa.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class GraphCellularAutomata(nn.Module): 6 | def __init__(self, input_dim, hidden_dim, output_dim): 7 | super(GraphCellularAutomata, self).__init__() 8 | 9 | self.mlp = nn.Sequential( 10 | nn.Linear(input_dim, hidden_dim), 11 | nn.ReLU(), 12 | nn.Linear(hidden_dim, output_dim), 13 | ) 14 | 15 | def forward(self, x): 16 | return self.mlp(x) 17 | 18 | 19 | class ReplicationModel(nn.Module): 20 | def __init__(self, input_dim, hidden_dim): 21 | super(ReplicationModel, self).__init__() 22 | 23 | self.mlp = nn.Sequential( 24 | nn.Linear(input_dim, hidden_dim), 25 | nn.ReLU(), 26 | nn.Linear(hidden_dim, 1), 27 | nn.Sigmoid(), # for binary classification 28 | ) 29 | 30 | def forward(self, x): 31 | return self.mlp(x) 32 | 33 | 34 | class WeightUpdateModel(nn.Module): 35 | def __init__(self, input_dim, hidden_dim): 36 | super(WeightUpdateModel, self).__init__() 37 | 38 | self.mlp = nn.Sequential( 39 | nn.Linear(input_dim, hidden_dim), 40 | nn.ReLU(), 41 | nn.Linear(hidden_dim, 1), 42 | ) 43 | 44 | def forward(self, x): 45 | return self.mlp(x) 46 | 47 | 48 | class NDP(nn.Module): 49 | def __init__(self, embedding_dim, hidden_dim): 50 | super(NDP, self).__init__() 51 | 52 | self.gc_automata = GraphCellularAutomata( 53 | embedding_dim, hidden_dim, embedding_dim 54 | ) 55 | self.replication_model = ReplicationModel(embedding_dim, hidden_dim) 56 | self.weight_update_model = WeightUpdateModel( 57 | 2 * embedding_dim, hidden_dim 58 | ) 59 | 60 | def forward(self, node_embeddings, adjacency_matrix): 61 | # Update node embeddings using Graph Cellular Automata 62 | updated_embeddings = self.gc_automata(node_embeddings) 63 | 64 | # Check which nodes need to replicate 65 | replication_decisions = self.replication_model(updated_embeddings) 66 | 67 | # Weight update (assuming weighted network) 68 | num_nodes = node_embeddings.shape[0] 69 | edge_weights = torch.zeros((num_nodes, num_nodes)) 70 | 71 | for i in range(num_nodes): 72 | for j in range(num_nodes): 73 | combined_embedding = torch.cat( 74 | (updated_embeddings[i], updated_embeddings[j]) 75 | ) 76 | 77 | edge_weights[i, j] = self.weight_update_model( 78 | combined_embedding 79 | ) 80 | 81 | return updated_embeddings, replication_decisions, edge_weights 82 | 83 | 84 | # # Usage examples 85 | # embedding_dim = 16 86 | # hidden_dim = 32 87 | # node_embeddings = torch.rand((10, embedding_dim)) # For 10 nodes 88 | # adjacency_matrix = torch.rand((10, 10)) # Dummy adjacency matrix for 10 89 | # nodes 90 | 91 | # model = NDP(embedding_dim, hidden_dim) 92 | # updated_embeddings, replication_decisions, edge_weights = model( 93 | # node_embeddings, adjacency_matrix 94 | # ) 95 | 96 | # print(updated_embeddings.shape) 97 | # print(replication_decisions.shape) 98 | # print(edge_weights.shape) 99 | -------------------------------------------------------------------------------- /swarms_torch/structs/hivemind_swarm_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from zeta.structs.transformer import ( 4 | Decoder, 5 | Transformer, 6 | ) 7 | 8 | 9 | class HivemindTransformer(nn.Module): 10 | def __init__( 11 | self, 12 | dim: int = None, 13 | max_seq_len: int = None, 14 | depth: int = None, 15 | heads: int = None, 16 | dim_head: int = None, 17 | num_tokens: int = None, 18 | ): 19 | super(HivemindTransformer, self).__init__() 20 | self.dim = dim 21 | self.max_seq_len = max_seq_len 22 | self.depth = depth 23 | self.heads = heads 24 | self.dim_head = dim_head 25 | self.num_tokens = num_tokens 26 | 27 | self.model = Transformer( 28 | num_tokens=num_tokens, 29 | max_seq_len=max_seq_len, 30 | attn_layers=Decoder( 31 | dim=dim, 32 | depth=depth, 33 | dim_head=dim_head, 34 | heads=heads, 35 | ), 36 | ) 37 | 38 | def forward(self, x): 39 | return self.model(x) 40 | 41 | 42 | class HivemindSwarm(nn.Module): 43 | """ 44 | HiveMind Swarm Transformer 45 | 46 | This is a transformer that is composed of a swarm of transformers where each transformer shares the same weights. 47 | 48 | Args: 49 | dim: dimension of the model 50 | max_seq_len: maximum sequence length 51 | depth: depth of the model 52 | heads: number of heads 53 | dim_head: dimension of each head 54 | num_models: number of models in the swarm 55 | base_transformer: the base transformer to be used in the swarm 56 | 57 | 58 | Example:: 59 | model = HivemindSwarm( 60 | dim=512, 61 | max_seq_len=1024, 62 | depth=6, 63 | heads=8, 64 | dim_head=64, 65 | num_models=4, 66 | ) 67 | 68 | x = torch.randn(1, 1024, 512) 69 | y = model(x) 70 | print(y.shape) 71 | 72 | 73 | """ 74 | 75 | def __init__( 76 | self, 77 | dim: int = None, 78 | max_seq_len: int = None, 79 | num_tokens: int = None, 80 | depth: int = None, 81 | heads: int = None, 82 | dim_head: int = None, 83 | num_models: int = 1, 84 | **kwargs, 85 | ): 86 | super(HivemindSwarm, self).__init__() 87 | 88 | self.dim = dim 89 | self.max_seq_len = max_seq_len 90 | self.depth = depth 91 | self.heads = heads 92 | self.num_tokens = num_tokens 93 | self.dim_head = dim_head 94 | self.num_models = num_models 95 | self.base_transformer = HivemindTransformer( 96 | dim=dim, 97 | num_tokens=num_tokens, 98 | max_seq_len=max_seq_len, 99 | depth=depth, 100 | heads=heads, 101 | dim_head=dim_head, 102 | ) 103 | # Create a list of transformers sharing the same weights 104 | self.experts = nn.ModuleList( 105 | [self.base_transformer for _ in range(num_models)] 106 | ) 107 | 108 | # Gating mechniams allows the model to dynamically weight the contribution of each transformer 109 | # in the swarm. This is done by learning a weight for each transformer and then using a softmax 110 | # to normalize the weights. 111 | self.gate = nn.Linear(num_models, num_models) 112 | self.gate_activation = nn.Softmax(dim=-1) 113 | self.gate_bias = nn.Parameter(torch.zeros(num_models)) 114 | 115 | def forward(self, x): 116 | logits = [] 117 | for expert in self.experts: 118 | output = expert(x) 119 | logits.append(output) 120 | # Run each transformer on the input 121 | # outputs = [expert(x) for expert in self.experts] 122 | 123 | # stack outputs 124 | outputs = torch.stack(logits, dim=1) 125 | 126 | # Compute the gate 127 | gate = self.gate_activation(self.gate_bias + self.gate(outputs)) 128 | 129 | # Weight the outputs 130 | outputs = torch.sum(outputs * gate.unsqueeze(-1), dim=1) 131 | return outputs 132 | 133 | 134 | # model = HivemindSwarm( 135 | # dim=512, 136 | # max_seq_len=1024, 137 | # num_tokens=20000, 138 | # depth=6, 139 | # heads=8, 140 | # dim_head=64, 141 | # num_models=4, 142 | # ) 143 | 144 | # x = torch.randn(1, 1024, 512) 145 | # y = model(x) 146 | # print(y.shape) 147 | -------------------------------------------------------------------------------- /swarms_torch/structs/ma_agent.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import gym 5 | 6 | 7 | class MAgent: 8 | class Agent(nn.Module): 9 | def __init__(self, input_dim, output_dim): 10 | super().__init__() 11 | self.policy = nn.Sequential( 12 | nn.Linear(input_dim, 128), 13 | nn.ReLU(), 14 | nn.Linear(128, output_dim), 15 | nn.Softmax(dim=-1), 16 | ) 17 | 18 | def forward(self, state): 19 | return self.policy(state) 20 | 21 | class MultiGymEnvironment: 22 | def __init__(self, env_name, num_agents): 23 | self.envs = [gym.make(env_name) for _ in range(num_agents)] 24 | self.agents = [ 25 | MAgent.Agent( 26 | self.envs[0].observation_space.shape[0], 27 | self.envs[0].action_space.n, 28 | ) 29 | for _ in range(num_agents) 30 | ] 31 | self.optimizers = [ 32 | optim.Adam(agent.parameters()) for agent in self.agents 33 | ] 34 | 35 | def step(self, agent_actions): 36 | rewards = [] 37 | for env, action in zip(self.envs, agent_actions): 38 | _, reward, _, _ = env.step(action) 39 | rewards.append(reward) 40 | return rewards 41 | 42 | def get_states(self): 43 | states = [env.reset() for env in self.envs] 44 | return states 45 | 46 | def train(self, epochs=1000): 47 | for epoch in range(epochs): 48 | states = self.get_states() 49 | actions = [ 50 | torch.argmax(agent(torch.FloatTensor(state))).item() 51 | for agent, state in zip(self.agents, states) 52 | ] 53 | rewards = self.step(actions) 54 | 55 | for agent, optimizer, reward in zip( 56 | self.agents, self.optimizers, rewards 57 | ): 58 | loss = ( 59 | -torch.log(agent(torch.FloatTensor(states))) * reward 60 | ) # Example loss function 61 | optimizer.zero_grad() 62 | loss.backward() 63 | optimizer.step() 64 | -------------------------------------------------------------------------------- /swarms_torch/structs/mas_model.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any 2 | import torch 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from loguru import logger 6 | 7 | # Set up logger 8 | logger.add("masi_log.log", rotation="500 MB") 9 | 10 | # Define device 11 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 12 | logger.info(f"Using device: {device}") 13 | 14 | 15 | # Agent Base Class 16 | class Agent(nn.Module): 17 | def __init__(self): 18 | super(Agent, self).__init__() 19 | 20 | def forward(self, x: torch.Tensor) -> torch.Tensor: 21 | raise NotImplementedError 22 | 23 | # def backward(self, loss: torch.Tensor) -> None: 24 | # loss.backward() 25 | 26 | def update_parameters( 27 | self, shared_gradients: Dict[str, torch.Tensor] 28 | ) -> None: 29 | with torch.no_grad(): 30 | for name, param in self.named_parameters(): 31 | if param.grad is not None: 32 | param.grad = shared_gradients[name] 33 | self.optimizer.step() 34 | self.optimizer.zero_grad() 35 | 36 | 37 | # MLP Agent 38 | class MLPAgent(Agent): 39 | def __init__(self, input_size: int, hidden_size: int, output_size: int): 40 | super(MLPAgent, self).__init__() 41 | self.model = nn.Sequential( 42 | nn.Flatten(), # Add this line to flatten the input 43 | nn.Linear(input_size, hidden_size), 44 | nn.ReLU(), 45 | nn.Linear(hidden_size, output_size), 46 | ) 47 | self.to(device) 48 | self.optimizer = optim.Adam(self.parameters(), lr=0.001) 49 | 50 | def forward(self, x: torch.Tensor) -> torch.Tensor: 51 | logger.debug(f"MLPAgent input shape: {x.shape}") 52 | output = self.model(x) 53 | logger.debug(f"MLPAgent output shape: {output.shape}") 54 | return output 55 | 56 | 57 | # CNN Agent 58 | class CNNAgent(Agent): 59 | def __init__(self, input_channels: int, num_classes: int): 60 | super(CNNAgent, self).__init__() 61 | self.model = nn.Sequential( 62 | nn.Conv2d(input_channels, 16, kernel_size=3, padding=1), 63 | nn.ReLU(), 64 | nn.Flatten(), 65 | nn.Linear(16 * 28 * 28, num_classes), 66 | ) 67 | self.to(device) 68 | self.optimizer = optim.Adam(self.parameters(), lr=0.001) 69 | 70 | def forward(self, x: torch.Tensor) -> torch.Tensor: 71 | logger.debug(f"CNNAgent input shape: {x.shape}") 72 | output = self.model(x) 73 | logger.debug(f"CNNAgent output shape: {output.shape}") 74 | return output 75 | 76 | 77 | # LSTM Agent 78 | class LSTMAgent(Agent): 79 | def __init__(self, input_size: int, hidden_size: int, output_size: int): 80 | super(LSTMAgent, self).__init__() 81 | self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True) 82 | self.fc = nn.Linear(hidden_size, output_size) 83 | self.to(device) 84 | self.optimizer = optim.Adam(self.parameters(), lr=0.001) 85 | 86 | def forward(self, x: torch.Tensor) -> torch.Tensor: 87 | logger.debug(f"LSTMAgent input shape: {x.shape}") 88 | # Reshape input: (batch, channels, height, width) -> (batch, height, width * channels) 89 | x = x.view(x.size(0), x.size(2), -1) 90 | lstm_out, _ = self.lstm(x) 91 | output = self.fc(lstm_out[:, -1, :]) 92 | logger.debug(f"LSTMAgent output shape: {output.shape}") 93 | return output 94 | 95 | 96 | # Transformer Agent 97 | class TransformerAgent(Agent): 98 | def __init__( 99 | self, input_size: int, num_heads: int, num_layers: int, output_size: int 100 | ): 101 | super(TransformerAgent, self).__init__() 102 | self.embedding = nn.Linear(input_size, 128) 103 | encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=num_heads) 104 | self.transformer_encoder = nn.TransformerEncoder( 105 | encoder_layer, num_layers=num_layers 106 | ) 107 | self.fc = nn.Linear(128, output_size) 108 | self.to(device) 109 | self.optimizer = optim.Adam(self.parameters(), lr=0.001) 110 | 111 | def forward(self, x: torch.Tensor) -> torch.Tensor: 112 | logger.debug(f"TransformerAgent input shape: {x.shape}") 113 | # Reshape input: (batch, channels, height, width) -> (batch, height, width * channels) 114 | x = x.view(x.size(0), x.size(2), -1) 115 | x = self.embedding(x) 116 | x = x.permute(1, 0, 2) # (sequence_length, batch_size, embedding_dim) 117 | transformer_out = self.transformer_encoder(x) 118 | transformer_out = transformer_out.permute( 119 | 1, 0, 2 120 | ) # Back to (batch_size, sequence_length, embedding_dim) 121 | output = self.fc(transformer_out[:, -1, :]) 122 | logger.debug(f"TransformerAgent output shape: {output.shape}") 123 | return output 124 | 125 | 126 | # Multi-Architecture Swarm Intelligence (MASI) class 127 | class MultiArchitectureSwarm(nn.Module): 128 | def __init__( 129 | self, 130 | num_mlp_agents: int, 131 | num_cnn_agents: int, 132 | num_lstm_agents: int, 133 | num_transformer_agents: int, 134 | input_sizes: Dict[str, Any], 135 | output_size: int, 136 | ): 137 | super(MultiArchitectureSwarm, self).__init__() 138 | 139 | self.agents: List[Agent] = [] 140 | 141 | # Initialize MLP Agents 142 | for _ in range(num_mlp_agents): 143 | agent = MLPAgent( 144 | input_size=input_sizes["mlp"]["input_size"], 145 | hidden_size=input_sizes["mlp"]["hidden_size"], 146 | output_size=output_size, 147 | ) 148 | self.agents.append(agent) 149 | 150 | # Initialize CNN Agents 151 | for _ in range(num_cnn_agents): 152 | agent = CNNAgent( 153 | input_channels=input_sizes["cnn"]["input_channels"], 154 | num_classes=output_size, 155 | ) 156 | self.agents.append(agent) 157 | 158 | # Initialize LSTM Agents 159 | for _ in range(num_lstm_agents): 160 | agent = LSTMAgent( 161 | input_size=input_sizes["lstm"]["input_size"], 162 | hidden_size=input_sizes["lstm"]["hidden_size"], 163 | output_size=output_size, 164 | ) 165 | self.agents.append(agent) 166 | 167 | # Initialize Transformer Agents 168 | for _ in range(num_transformer_agents): 169 | agent = TransformerAgent( 170 | input_size=input_sizes["transformer"]["input_size"], 171 | num_heads=input_sizes["transformer"]["num_heads"], 172 | num_layers=input_sizes["transformer"]["num_layers"], 173 | output_size=output_size, 174 | ) 175 | self.agents.append(agent) 176 | 177 | logger.info(f"Initialized {len(self.agents)} agents.") 178 | 179 | def forward(self, x: torch.Tensor) -> torch.Tensor: 180 | agent_outputs = [] 181 | 182 | for agent in self.agents: 183 | agent_output = agent(x) 184 | agent_outputs.append(agent_output) 185 | 186 | # Aggregate outputs (Simple averaging for now) 187 | global_output = self.aggregate_agent_outputs(agent_outputs) 188 | 189 | return global_output 190 | 191 | def aggregate_agent_outputs( 192 | self, agent_outputs: List[torch.Tensor] 193 | ) -> torch.Tensor: 194 | # Stack outputs and calculate mean 195 | logger.debug(f"Aggregating outputs from {len(agent_outputs)} agents.") 196 | stacked_outputs = torch.stack(agent_outputs) 197 | logger.debug(f"Stacked outputs shape: {stacked_outputs.shape}") 198 | global_output = torch.mean(stacked_outputs, dim=0) 199 | logger.debug(f"Global output shape: {global_output.shape}") 200 | return global_output 201 | -------------------------------------------------------------------------------- /swarms_torch/structs/mixtral_expert.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SwiGLU(nn.Module): 7 | def __init__(self, input_dim, hidden_dim, output_dim): 8 | super(SwiGLU, self).__init__() 9 | self.fc1 = nn.Linear(input_dim, hidden_dim) 10 | self.fc2 = nn.Linear(hidden_dim, output_dim) 11 | 12 | def forward(self, x): 13 | return self.fc2(F.silu(self.fc1(x))) 14 | 15 | 16 | class TopKGate(nn.Module): 17 | def __init__(self, model_dim, num_experts, top_k): 18 | super(TopKGate, self).__init__() 19 | self.w_gate = nn.Linear(model_dim, num_experts) 20 | self.top_k = top_k 21 | 22 | def forward(self, x): 23 | gate_logits = self.w_gate(x) 24 | top_logits, top_indices = torch.topk(gate_logits, self.top_k, dim=-1) 25 | top_k_logits = torch.full_like(gate_logits, float("-inf")) 26 | top_k_logits.scatter_(1, top_indices, top_logits) 27 | return F.softmax(top_k_logits, dim=-1) 28 | 29 | 30 | class MoE(nn.Module): 31 | def __init__(self, model_dim, hidden_dim, num_experts, top_k): 32 | super(MoE, self).__init__() 33 | self.experts = nn.ModuleList( 34 | [ 35 | SwiGLU(model_dim, hidden_dim, model_dim) 36 | for _ in range(num_experts) 37 | ] 38 | ) 39 | self.gate = TopKGate(model_dim, num_experts, top_k) 40 | 41 | def forward(self, x): 42 | gate_scores = self.gate(x) 43 | expert_outputs = torch.stack( 44 | [expert(x) for expert in self.experts], dim=2 45 | ) 46 | weighted_expert_outputs = gate_scores.unsqueeze(-1) * expert_outputs 47 | return weighted_expert_outputs.sum(dim=2) 48 | 49 | 50 | # Model architecture parameters 51 | model_dim = 4096 52 | n_layers = 32 53 | head_dim = 128 54 | hidden_dim = 14336 55 | n_heads = 32 56 | context_len = 32768 57 | vocab_size = 32000 58 | num_experts = 8 59 | top_k_experts = 2 60 | 61 | # Create a single MoE layer as a demonstration 62 | moe_layer = MoE(model_dim, hidden_dim, num_experts, top_k_experts) 63 | 64 | # Example input tensor 65 | x = torch.rand(1, context_len, model_dim) # (batch_size, seq_len, model_dim) 66 | 67 | # Forward pass through the MoE layer 68 | output = moe_layer(x) 69 | 70 | print(output) 71 | -------------------------------------------------------------------------------- /swarms_torch/structs/mixture_of_mamba.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from zeta.nn import MambaBlock 4 | 5 | 6 | def router( 7 | x: Tensor, 8 | k: int, 9 | largest: bool = True, 10 | experts: nn.ModuleList = None, 11 | limit_of_experts: int = None, 12 | dropout_on: bool = False, 13 | dropout_p: float = 0.2, 14 | dim: int = -1, 15 | *args, 16 | **kwargs, 17 | ): 18 | # If experts is None, then we use the default topk function 19 | topk = torch.topk(x, k, largest=largest, *args, **kwargs) 20 | 21 | # Adaptive log softmax with loss 22 | # softmax = nn.LogSoftmax(dim) 23 | # topk = softmax(x) 24 | 25 | # Dropout 26 | if dropout_on: 27 | dropout = nn.Dropout(dropout_p) 28 | topk = dropout(topk) 29 | 30 | # If limit_of_experts is not None, then we only send the topk to the 31 | # experts. This is useful when we want to limit the number of experts 32 | # that we send the topk to. 33 | if limit_of_experts is not None: 34 | experts = experts[:limit_of_experts] 35 | 36 | # Send the topk to the experts 37 | if experts is not None: 38 | topk = [expert(topk) for expert in experts] 39 | return topk 40 | 41 | 42 | class MixtureOfMambas(nn.Module): 43 | """ 44 | Mixtures of Mamba is a swarm of Mamba models. The swarm can be aggregated 45 | using a weighted average or a simple average. We plan to add more aggregation 46 | methods in the future like a gating mechanism or a neural network or a 47 | transformer. 48 | 49 | Args: 50 | num_mambas (int): _description_ 51 | dim (int): _description_ 52 | d_state (int): _description_ 53 | d_conv (_type_): _description_ 54 | expand (int): _description_ 55 | fusion_method (str, optional): _description_. Defaults to "average". 56 | 57 | Example:: 58 | >>> model = MixtureOfMambas( 59 | ... num_mambas=2, 60 | ... dim=512, 61 | ... d_state=1024, 62 | ... depth=4, 63 | ... d_conv=1024, 64 | ... expand=4, 65 | ... fusion_method="average", 66 | ... ) 67 | >>> x = torch.rand(1, 512, 512) 68 | >>> model(x).shape 69 | torch.Size([1, 512, 512]) 70 | """ 71 | 72 | def __init__( 73 | self, 74 | num_mambas: int, 75 | dim: int, 76 | d_state: int, 77 | depth: int, 78 | d_conv, 79 | expand: int, 80 | fusion_method: str = "average", 81 | custom_fusion_func: callable = None, 82 | *args, 83 | **kwargs, 84 | ): 85 | super(MixtureOfMambas, self).__init__() 86 | self.num_mambas = num_mambas 87 | self.dim = dim 88 | self.d_state = d_state 89 | self.depth = depth 90 | self.d_conv = d_conv 91 | self.expand = expand 92 | self.fusion_method = fusion_method 93 | self.custom_fusion_func = custom_fusion_func 94 | 95 | self.models = nn.ModuleList() 96 | for _ in range(num_mambas): 97 | mamba_model = MambaBlock( 98 | dim, depth, d_state, expand, d_conv, *args, **kwargs 99 | ) 100 | self.models.append(mamba_model) 101 | 102 | def forward(self, x: torch.Tensor, weights=None): 103 | """Forward pass of the swarm 104 | 105 | Args: 106 | x (torch.Tensor): _description_ 107 | weights (_type_, optional): _description_. Defaults to None. 108 | 109 | Raises: 110 | ValueError: _description_ 111 | 112 | Returns: 113 | _type_: _description_ 114 | """ 115 | outputs = [model(x) for model in self.models] 116 | 117 | if self.fusion_method == "average": 118 | return self.average_aggregate(outputs) 119 | elif self.fusion_method == "weighted": 120 | return self.weighted_aggregate(outputs, weights) 121 | elif self.fusion_method == "absmax": 122 | return self.absmax_aggregate(outputs, weights) 123 | elif self.fusion_method == "softmax": 124 | return self.softmax_aggregate(outputs, weights) 125 | elif self.fusion_method == "custom": 126 | if self.custom_fusion_func is None: 127 | raise ValueError( 128 | "custom_fusion_func must be provided if fusion_method is" 129 | " custom" 130 | ) 131 | return self.custom_fusion_func(outputs, weights) 132 | else: 133 | raise ValueError( 134 | f"Unknown aggregation method: {self.fusion_method}" 135 | ) 136 | 137 | def average_aggregate(self, outputs): 138 | """Average the outputs of the models in the swarm 139 | 140 | Args: 141 | outputs (_type_): _description_ 142 | 143 | Returns: 144 | _type_: _description_ 145 | """ 146 | return torch.mean(torch.stack(outputs), dim=0) 147 | 148 | def weighted_aggegrate(self, outputs, weights): 149 | """Weighted average the outputs of the models in the swarm 150 | 151 | Args: 152 | outputs (_type_): _description_ 153 | weights (_type_): _description_ 154 | 155 | Raises: 156 | ValueError: _description_ 157 | 158 | Returns: 159 | _type_: _description_ 160 | """ 161 | if weights is None or len(weights) != len(outputs): 162 | raise ValueError("Weights must be the same length as outputs") 163 | weighted_outputs = [ 164 | weight * output for weight, output in zip(weights, outputs) 165 | ] 166 | return sum(weighted_outputs) 167 | 168 | def softmax_aggregate(self, outputs, weights): 169 | """Weighted average the outputs of the models in the swarm 170 | 171 | Args: 172 | outputs (_type_): _description_ 173 | weights (_type_): _description_ 174 | 175 | Raises: 176 | ValueError: _description_ 177 | 178 | Returns: 179 | _type_: _description_ 180 | """ 181 | # if weights is None or len(weights) != len(outputs): 182 | # raise ValueError("Weights must be the same length as outputs") 183 | if weights: 184 | weighted_outputs = [ 185 | weight * output for weight, output in zip(weights, outputs) 186 | ] 187 | out = sum(weighted_outputs) 188 | out = torch.softmax(out, dim=1) 189 | else: 190 | out = torch.softmax(outputs, dim=1) 191 | 192 | return out 193 | 194 | def absmax(self, outputs): 195 | """Absolute maximum of the outputs of the models in the swarm 196 | 197 | Args: 198 | outputs (_type_): _description_ 199 | 200 | Returns: 201 | _type_: _description_ 202 | """ 203 | # Absolute maximum of the outputs of the models in the swarm 204 | return torch.max(torch.abs(torch.stack(outputs)), dim=0)[0] 205 | 206 | def absmax_aggregate(self, outputs, weights=None): 207 | """ 208 | Weighted average the outputs of the models in the swarm 209 | 210 | Args: 211 | outputs (_type_): _description_ 212 | weights (_type_): _description_ 213 | 214 | Raises: 215 | ValueError: _description_ 216 | 217 | Returns: 218 | _type_: _description_ 219 | """ 220 | # if weights is not None or len(weights) != len(outputs): 221 | # raise ValueError("Weights must be the same length as outputs") 222 | 223 | if weights: 224 | weighted_outputs = [ 225 | weight * output for weight, output in zip(weights, outputs) 226 | ] 227 | return self.absmax(weighted_outputs) 228 | else: 229 | return self.absmax(outputs) 230 | -------------------------------------------------------------------------------- /swarms_torch/structs/neuronal_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cellular neural network 3 | 4 | Architecture: 5 | - Input -> Linear -> ReLU -> Linear -> ReLU -> Output 6 | - Neuron states are updated after each synapse 7 | - Softmax is applied after each synapse 8 | - Layer normalization is applied after each synapse 9 | 10 | """ 11 | 12 | import torch 13 | from torch import nn 14 | 15 | 16 | class TransformerLayer(nn.Module): 17 | """ 18 | Transformer Layer 19 | 20 | Architecture: 21 | - Input -> Linear -> ReLU -> Linear -> ReLU -> Output 22 | 23 | Args: 24 | input_dim (int): Input dimension 25 | output_dim (int): Output dimension 26 | 27 | Returns: 28 | torch.Tensor: Output tensor 29 | 30 | Usage 31 | 32 | """ 33 | 34 | def __init__( 35 | self, 36 | input_dim, 37 | output_dim, 38 | nhead: int, 39 | ): 40 | super(TransformerLayer, self).__init__() 41 | self.transformer = nn.TransformerEncoderLayer( 42 | d_model=input_dim, 43 | nhead=nhead, 44 | ) 45 | self.fc = nn.Linear(input_dim, output_dim) 46 | 47 | def forward(self, x): 48 | return self.fc(self.transformer(x)) 49 | 50 | 51 | class Neuron(nn.Module): 52 | def __init__(self, num_states): 53 | super(Neuron, self).__init__() 54 | self.states = nn.Parameter(torch.randn(num_states)) 55 | 56 | 57 | class SynapseTransformer(nn.Module): 58 | def __init__(self, input_dim, output_dim, nhead: int): 59 | super(SynapseTransformer, self).__init__() 60 | self.transformer = TransformerLayer(input_dim, output_dim, nhead) 61 | 62 | def forward(self, x): 63 | return self.transformer(x) 64 | 65 | 66 | class NNTransformer(nn.Module): 67 | """ 68 | Neural Network NNTransformer 69 | 70 | Args: 71 | neuron_count (int): Number of neurons 72 | num_states (int): Number of states 73 | input_dim (int): Input dimension 74 | output_dim (int): Output dimension 75 | nhead (int): Number of heads in transformer layer 76 | 77 | Returns: 78 | torch.Tensor: Output tensor 79 | 80 | Architecture: 81 | - Input -> Linear -> ReLU -> Linear -> ReLU -> Output 82 | - Neuron states are updated after each synapse 83 | - Softmax is applied after each synapse 84 | - Layer normalization is applied after each synapse 85 | 86 | Usage: 87 | network = CellularNN(5, 10, 10, 10, 2) 88 | output = network(torch.randn(1, 10)) 89 | print(output) 90 | 91 | 92 | Training: 93 | network = NNTransformer(5, 10, 10, 10, 2) 94 | output = network(torch.randn(1, 10)) 95 | print(output) 96 | 97 | 98 | # Test the network 99 | import torch 100 | import torch.optim as optim 101 | import torch.nn.functional as F 102 | 103 | # Random dataset 104 | batch_size = 64 105 | input_size = 10 106 | output_size = 10 107 | 108 | x = torch.randn(batch_size, input_size) # Random inputs 109 | y = torch.randn(batch_size, output_size) # Random targets 110 | 111 | # Hyperparameters 112 | neuron_count = 5 113 | num_states = 10 114 | input_dim = input_size 115 | output_dim = output_size 116 | n_head = 2 117 | 118 | # Initialize the network 119 | network = CellularNN(neuron_count, num_states, input_dim, output_dim, n_head) 120 | 121 | # Define the loss function and optimizer 122 | criterion = nn.MSELoss() 123 | optimizer = optim.Adam(network.parameters(), lr=0.001) 124 | 125 | # Training loop 126 | epochs = 1000 127 | for epoch in range(epochs): 128 | # Forward pass 129 | outputs = network(x) 130 | 131 | # Compute loss 132 | loss = criterion(outputs, y) 133 | 134 | # Backward pass and optimization 135 | optimizer.zero_grad() 136 | loss.backward() 137 | optimizer.step() 138 | 139 | # Print loss every 100 epochs 140 | if (epoch+1) % 100 == 0: 141 | print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}") 142 | 143 | # Test the network with a new random input 144 | test_input = torch.randn(1, input_size) 145 | test_output = network(test_input) 146 | print(test_output) 147 | 148 | 149 | """ 150 | 151 | def __init__(self, neuron_count, num_states, input_dim, output_dim, nhead): 152 | super(NNTransformer, self).__init__() 153 | 154 | # Initialize neurons and synapses 155 | self.neurons = nn.ModuleList( 156 | [Neuron(num_states) for _ in range(neuron_count)] 157 | ) 158 | self.synapses = nn.ModuleList( 159 | [ 160 | SynapseTransformer(input_dim, output_dim, nhead) 161 | for _ in range(neuron_count) 162 | ] 163 | ) 164 | 165 | self.norm = nn.LayerNorm(output_dim) 166 | self.softmax = nn.Softmax(dim=1) 167 | 168 | def forward(self, x): 169 | for neuron, synapse in zip(self.neurons[:-1], self.synapses): 170 | # norm before synapse 171 | x = self.norm(x) 172 | 173 | # synapse 174 | x = synapse(x) 175 | 176 | # softmax after synapse 177 | x = self.softmax(x) 178 | 179 | neuron.states.data = x 180 | return self.neurons[-1].states 181 | -------------------------------------------------------------------------------- /swarms_torch/structs/parallel_wrapper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from typing import List 4 | 5 | 6 | class ParallelSwarm(nn.Module): 7 | def __init__( 8 | self, 9 | models: List[nn.Module], 10 | ): 11 | """ 12 | Initializes a parallel swarm of models. 13 | 14 | Args: 15 | models (List[nn.Module]): A list of PyTorch models. 16 | 17 | """ 18 | super().__init__() 19 | self.models = models 20 | 21 | for model in models: 22 | self.model = model 23 | 24 | def forward(self, x: torch.Tensor, *args, **kwargs): 25 | """Forward pass of the swarm 26 | 27 | Args: 28 | x (torch.Tensor): _description_ 29 | weights (_type_, optional): _description_. Defaults to None. 30 | 31 | Returns: 32 | torch.Tensor: _description_ 33 | """ 34 | outputs = [] 35 | for model in self.models: 36 | outputs.append(model(x, *args, **kwargs)) 37 | return outputs 38 | -------------------------------------------------------------------------------- /swarms_torch/structs/queen_bee.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class QueenBeeGa(nn.Module): 6 | """ 7 | Queen Bee evolution for genetic algos 8 | 9 | Inspired by the evolution of bees, the fittest solution is designated 10 | and the rest of the population contends to mate with it. 11 | 12 | The strong exploitation is balanced by a higher than a normal mutation rate. 13 | 14 | Reference: 15 | --------- 16 | https://www.researchgate.net/publication/228961729_A_Queen_Bee_GA_for_optimization 17 | 18 | Usage 19 | ----- 20 | optimizer = QueenBeeGa( 21 | goal="Attention is all you need", 22 | pop_size=100, 23 | mutation_prob=0.04, 24 | strong_mutation_rate=0.1, 25 | strong_mutation_prob=0.25, 26 | num_tournament_participants=25 27 | ) 28 | optimizer.run(max_generations=100) 29 | """ 30 | 31 | def __init__( 32 | self, 33 | goal: str = "Attention is all you need", 34 | pop_size: int = 100, 35 | mutation_prob: float = 0.04, 36 | strong_mutation_rate: float = 0.1, 37 | strong_mutation_prob: float = 0.25, 38 | num_tournament_participants: int = 25, 39 | ): 40 | """ 41 | QueenBeeGa with params and initial configs 42 | 43 | Parameters 44 | ---------- 45 | goal: str 46 | The goal string to be optimized 47 | pop_size: int 48 | Number of ants 49 | mutation_prob: float 50 | Mutation rate 51 | strong_mutation_rate: float 52 | Strong mutation rate 53 | strong_mutation_prob: float 54 | Strong mutation probability 55 | num_tournament_participants: int 56 | Number of tournament participants 57 | """ 58 | self.goal = goal 59 | self.pop_size = pop_size 60 | self.mutation_prob = mutation_prob 61 | self.strong_mutation_rate = strong_mutation_rate 62 | self.strong_mutation_prob = strong_mutation_prob 63 | self.num_tournament_participants = num_tournament_participants 64 | 65 | self.gene_length = len(goal) 66 | self.gene_midpoint = self.gene_length // 2 67 | self.target_gene = self.encode(goal) 68 | 69 | self.strong_mutate_pool_size = strong_mutation_rate * pop_size 70 | self.num_code_mutate = mutation_prob * self.gene_length 71 | self.strong_num_code_mutate = strong_mutation_prob * self.gene_length 72 | 73 | self.pool = torch.randint(0, 255, (pop_size, self.gene_length)) 74 | self.queen = None 75 | self.queen_fitness = None 76 | self.generation = 0 77 | 78 | @staticmethod 79 | def encode(s): 80 | """Convert string to it's values""" 81 | return torch.tensor([ord(c) for c in s]) 82 | 83 | @staticmethod 84 | def decode(t): 85 | """Convert ASCII values tensor back to string""" 86 | return "".join([chr(i) for i in t.tolist()]) 87 | 88 | def run(self, max_generations: int = 1000): 89 | """ 90 | Run the queen genertic algorithm evolution 91 | 92 | Parameters: 93 | ----------- 94 | max_generations: int 95 | Maximum number of generations 96 | """ 97 | for _ in range(max_generations): 98 | self.generation += 1 99 | print(f"Generation: {self.generation}") 100 | self._evolve() 101 | if self._check_convergence(): 102 | pass 103 | 104 | def _evolve(self): 105 | """ 106 | Execute one step of the evolution process. 107 | """ 108 | 109 | # Sort population by fitness 110 | fitnesses = 1.0 / torch.square(self.pool - self.target_gene).sum(dim=-1) 111 | indices = fitnesses.sort(descending=True).indices 112 | self.pool, fitnesses = self.pool[indices], fitnesses[indices] 113 | 114 | # Display every generation 115 | if self.queen is not None: 116 | print("queen:") 117 | print( 118 | f"{self.decode(self.queen)} ({self.queen_fitness.item():.3f})\n" 119 | ) 120 | for gene, fitness in zip(self.pool, fitnesses): 121 | print(f"{self.decode(gene)} ({fitness.item():.3f})") 122 | 123 | # If one of the children has a better fitness than queen, that child becomes the new queen 124 | # and the queen replaces the worst bee in the population 125 | if self.queen is not None and self.queen_fitness < fitnesses[0]: 126 | self.pool = torch.cat((self.pool, self.queen[None, :]), dim=0) 127 | fitnesses = torch.cat((fitnesses, self.queen_fitness[None]), dim=0) 128 | self.queen = self.queen_fitness = None 129 | 130 | # Separate the queen bee from the rest of the population 131 | if self.queen is None: 132 | self.queen, self.pool = self.pool[0], self.pool[1:] 133 | self.queen_fitness, fitnesses = fitnesses[0], fitnesses[1:] 134 | 135 | # Deterministic tournament selection 136 | contender_ids = torch.randn( 137 | (self.pop_size - 1, self.pop_size - 1) 138 | ).argsort(dim=-1)[..., : self.num_tournament_participants] 139 | participants, tournaments = ( 140 | self.pool[contender_ids], 141 | fitnesses[contender_ids], 142 | ) 143 | top_winner = tournaments.topk( 144 | 1, dim=-1, largest=True, sorted=False 145 | ).indices 146 | top_winner = top_winner.unsqueeze(-1).expand(-1, -1, self.gene_length) 147 | parents = participants.gather(1, top_winner).squeeze(1) 148 | 149 | # Cross over all chosen drones with the queen 150 | queen_parents = self.queen.unsqueeze(0).expand( 151 | self.pop_size - 1, self.gene_length 152 | ) 153 | self.pool = torch.cat( 154 | ( 155 | queen_parents[:, : self.gene_midpoint], 156 | parents[:, self.gene_midpoint :], 157 | ), 158 | dim=-1, 159 | ) 160 | 161 | # Mutate genes in population 162 | mutate_mask = ( 163 | torch.randn(self.pool.shape).argsort(dim=-1) < self.num_code_mutate 164 | ) 165 | noise = torch.randint(0, 2, self.pool.shape) * 2 - 1 166 | mutated_pool = torch.where(mutate_mask, self.pool + noise, self.pool) 167 | 168 | strong_mutate_mask = ( 169 | torch.randn(self.pool.shape).argsort(dim=-1) 170 | < self.strong_num_code_mutate 171 | ) 172 | noise = torch.randint(0, 2, self.pool.shape) * 2 - 1 173 | strong_mutated_pool = torch.where( 174 | strong_mutate_mask, self.pool + noise, self.pool 175 | ) 176 | 177 | strong_mutate_pool_mask = ( 178 | torch.randn(self.pop_size - 1).argsort(dim=-1) 179 | < self.strong_mutate_pool_size 180 | ) 181 | self.pool = torch.where( 182 | strong_mutate_pool_mask[:, None], strong_mutated_pool, mutated_pool 183 | ) 184 | self.pool.clamp_(0, 255) 185 | 186 | def _check_convergence(self): 187 | """ 188 | Check if any of the solutions has achieved the goal 189 | """ 190 | fitnesses = 1.0 / torch.square(self.pool - self.target_gene).sum(dim=-1) 191 | return (fitnesses == float("inf")).any().item() 192 | 193 | 194 | # # Usage: 195 | # optimizer = QueenBeeGa() 196 | # optimizer.run(max_generations=100) 197 | -------------------------------------------------------------------------------- /swarms_torch/structs/simple_moe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | import torch.nn.functional as F 4 | 5 | 6 | class FeedForward(nn.Module): 7 | """ 8 | Simple FeedForward module. 9 | 10 | Args: 11 | dim (int): Input dimension 12 | hidden_dim (int): Hidden dimension 13 | mult (int): Multiplier for hidden dimension 14 | dropout (float): Dropout rate 15 | """ 16 | 17 | def __init__( 18 | self, 19 | dim: int, 20 | hidden_dim: int = None, 21 | mult: int = 4, 22 | dropout: float = 0.0, 23 | *args, 24 | **kwargs, 25 | ): 26 | super().__init__() 27 | hidden_dim = hidden_dim or dim * mult 28 | 29 | self.net = nn.Sequential( 30 | nn.Linear(dim, hidden_dim), 31 | nn.GELU(), 32 | nn.Dropout(dropout), 33 | nn.Linear(hidden_dim, dim), 34 | nn.Dropout(dropout), 35 | ) 36 | 37 | def forward(self, x: Tensor) -> Tensor: 38 | return self.net(x) 39 | 40 | 41 | class GatingMechanism(nn.Module): 42 | def __init__( 43 | self, 44 | dim: int, 45 | num_experts: int, 46 | ): 47 | """ 48 | GatingMechanism is a class that represents the gating mechanism in a mixture of experts model. 49 | 50 | Args: 51 | dim (int): The input dimension. 52 | num_experts (int): The number of experts in the mixture. 53 | 54 | """ 55 | super().__init__() 56 | self.gate = nn.Linear(dim, num_experts) 57 | 58 | def forward(self, x: Tensor): 59 | """ 60 | Forward pass of the gating mechanism. 61 | 62 | Args: 63 | x (Tensor): The input tensor. 64 | 65 | Returns: 66 | Tensor: The output tensor after applying the gating mechanism. 67 | 68 | """ 69 | return F.softmax(self.gate(x), dim=-1) 70 | 71 | 72 | class SimpleMoE(nn.Module): 73 | """ 74 | Simple Mixture of Experts (MoE) model. 75 | 76 | Args: 77 | dim (int): Input dimension. 78 | hidden_dim (int): Hidden dimension of the feedforward network. 79 | output_dim (int): Output dimension. 80 | num_experts (int): Number of experts in the MoE. 81 | mult (int, optional): Multiplier for the hidden dimension. Defaults to 4. 82 | """ 83 | 84 | def __init__( 85 | self, 86 | dim, 87 | hidden_dim: int, 88 | output_dim: int, 89 | num_experts: int, 90 | mult: int = 4, 91 | ): 92 | super().__init__() 93 | self.dim = dim 94 | self.hidden_dim = hidden_dim 95 | self.output_dim = output_dim 96 | self.num_experts = num_experts 97 | self.mult = mult 98 | 99 | self.experts = nn.ModuleList( 100 | [FeedForward(dim, dim, mult) for _ in range(num_experts)] 101 | ) 102 | self.gate = GatingMechanism(dim, num_experts) 103 | 104 | def forward(self, x: Tensor): 105 | """ 106 | Forward pass of the SimpleMoE model. 107 | 108 | Args: 109 | x (Tensor): Input tensor of shape (batch_size, sequence_length, input_dim). 110 | 111 | Returns: 112 | Tensor: Output tensor of shape (batch_size, sequence_length, output_dim). 113 | """ 114 | gating_scores = self.gate(x) 115 | 116 | expert_outputs = torch.stack( 117 | [expert(x) for expert in self.experts], dim=-1 118 | ) 119 | 120 | output = torch.sum(gating_scores.unsqueeze(2) * expert_outputs, dim=-1) 121 | 122 | return output 123 | -------------------------------------------------------------------------------- /swarms_torch/structs/switch_moe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | import torch.nn.functional as F 4 | 5 | 6 | class FeedForward(nn.Module): 7 | """ 8 | Simple FeedForward module. 9 | 10 | Args: 11 | dim (int): Input dimension 12 | hidden_dim (int): Hidden dimension 13 | mult (int): Multiplier for hidden dimension 14 | dropout (float): Dropout rate 15 | """ 16 | 17 | def __init__( 18 | self, 19 | dim: int, 20 | hidden_dim: int = None, 21 | mult: int = 4, 22 | dropout: float = 0.0, 23 | *args, 24 | **kwargs, 25 | ): 26 | super().__init__() 27 | hidden_dim = hidden_dim or dim * mult 28 | 29 | self.net = nn.Sequential( 30 | nn.Linear(dim, hidden_dim), 31 | nn.GELU(), 32 | nn.Dropout(dropout), 33 | nn.Linear(hidden_dim, dim), 34 | nn.Dropout(dropout), 35 | ) 36 | 37 | def forward(self, x: Tensor) -> Tensor: 38 | return self.net(x) 39 | 40 | 41 | class SwitchGate(nn.Module): 42 | """ 43 | SwitchGate module for MoE (Mixture of Experts) model. 44 | 45 | Args: 46 | dim (int): Input dimension. 47 | num_experts (int): Number of experts. 48 | capacity_factor (float, optional): Capacity factor for sparsity. Defaults to 1.0. 49 | *args: Variable length argument list. 50 | **kwargs: Arbitrary keyword arguments. 51 | """ 52 | 53 | def __init__( 54 | self, 55 | dim, 56 | num_experts: int, 57 | capacity_factor: float = 1.0, 58 | epsilon: float = 1e-6, 59 | *args, 60 | **kwargs, 61 | ): 62 | super().__init__() 63 | self.dim = dim 64 | self.num_experts = num_experts 65 | self.capacity_factor = capacity_factor 66 | self.epsilon = epsilon 67 | self.w_gate = nn.Linear(dim, num_experts) 68 | 69 | def forward(self, x: Tensor, use_aux_loss=False): 70 | """ 71 | Forward pass of the SwitchGate module. 72 | 73 | Args: 74 | x (Tensor): Input tensor. 75 | 76 | Returns: 77 | Tensor: Gate scores. 78 | """ 79 | # Compute gate scores 80 | gate_scores = F.softmax(self.w_gate(x), dim=-1) 81 | 82 | # Determine the top-1 expert for each token 83 | capacity = int(self.capacity_factor * x.size(0)) 84 | 85 | top_k_scores, top_k_indices = gate_scores.topk(1, dim=-1) 86 | 87 | # Mask to enforce sparsity 88 | mask = torch.zeros_like(gate_scores).scatter_(1, top_k_indices, 1) 89 | 90 | # Combine gating scores with the mask 91 | masked_gate_scores = gate_scores * mask 92 | 93 | # Denominators 94 | denominators = masked_gate_scores.sum(0, keepdim=True) + self.epsilon 95 | 96 | # Norm gate scores to sum to the capacity 97 | gate_scores = (masked_gate_scores / denominators) * capacity 98 | 99 | if use_aux_loss: 100 | # Calculate load balancing loss 101 | # Both metrics should be per-expert (sum over batch dimension) 102 | load = gate_scores.sum( 103 | 0 104 | ) # Sum over all examples - shape: (num_experts,) 105 | importance = gate_scores.mean( 106 | 0 107 | ) # Mean over all examples - shape: (num_experts,) 108 | 109 | # Aux loss encourages load balancing between experts 110 | # Using coefficient from Switch Transformer paper 111 | loss = self.num_experts * ((load * importance).sum()) 112 | 113 | return gate_scores, loss 114 | 115 | return gate_scores, None 116 | 117 | 118 | class SwitchMoE(nn.Module): 119 | """ 120 | A module that implements the Switched Mixture of Experts (MoE) architecture. 121 | 122 | Args: 123 | dim (int): The input dimension. 124 | hidden_dim (int): The hidden dimension of the feedforward network. 125 | output_dim (int): The output dimension. 126 | num_experts (int): The number of experts in the MoE. 127 | capacity_factor (float, optional): The capacity factor that controls the capacity of the MoE. Defaults to 1.0. 128 | mult (int, optional): The multiplier for the hidden dimension of the feedforward network. Defaults to 4. 129 | *args: Variable length argument list. 130 | **kwargs: Arbitrary keyword arguments. 131 | 132 | Attributes: 133 | dim (int): The input dimension. 134 | hidden_dim (int): The hidden dimension of the feedforward network. 135 | output_dim (int): The output dimension. 136 | num_experts (int): The number of experts in the MoE. 137 | capacity_factor (float): The capacity factor that controls the capacity of the MoE. 138 | mult (int): The multiplier for the hidden dimension of the feedforward network. 139 | experts (nn.ModuleList): The list of feedforward networks representing the experts. 140 | gate (SwitchGate): The switch gate module. 141 | 142 | """ 143 | 144 | def __init__( 145 | self, 146 | dim: int, 147 | hidden_dim: int, 148 | output_dim: int, 149 | num_experts: int, 150 | capacity_factor: float = 1.0, 151 | mult: int = 4, 152 | use_aux_loss: bool = False, 153 | *args, 154 | **kwargs, 155 | ): 156 | super().__init__() 157 | self.dim = dim 158 | self.hidden_dim = hidden_dim 159 | self.output_dim = output_dim 160 | self.num_experts = num_experts 161 | self.capacity_factor = capacity_factor 162 | self.mult = mult 163 | self.use_aux_loss = use_aux_loss 164 | 165 | self.experts = nn.ModuleList( 166 | [ 167 | FeedForward(dim, dim, mult, *args, **kwargs) 168 | for _ in range(num_experts) 169 | ] 170 | ) 171 | 172 | self.gate = SwitchGate( 173 | dim, 174 | num_experts, 175 | capacity_factor, 176 | ) 177 | 178 | def forward(self, x: Tensor): 179 | """ 180 | Forward pass of the SwitchMoE module. 181 | 182 | Args: 183 | x (Tensor): The input tensor. 184 | 185 | Returns: 186 | Tensor: The output tensor of the MoE. 187 | 188 | """ 189 | # (batch_size, seq_len, num_experts) 190 | gate_scores, loss = self.gate(x, use_aux_loss=self.use_aux_loss) 191 | 192 | # Dispatch to experts 193 | expert_outputs = [expert(x) for expert in self.experts] 194 | 195 | # Check if any gate scores are nan and handle 196 | if torch.isnan(gate_scores).any(): 197 | print("NaN in gate scores") 198 | gate_scores[torch.isnan(gate_scores)] = 0 199 | 200 | # Stack and weight outputs 201 | stacked_expert_outputs = torch.stack( 202 | expert_outputs, dim=-1 203 | ) # (batch_size, seq_len, output_dim, num_experts) 204 | if torch.isnan(stacked_expert_outputs).any(): 205 | stacked_expert_outputs[torch.isnan(stacked_expert_outputs)] = 0 206 | 207 | # Combine expert outputs and gating scores 208 | moe_output = torch.sum( 209 | gate_scores.unsqueeze(-2) * stacked_expert_outputs, dim=-1 210 | ) 211 | 212 | return moe_output, loss 213 | -------------------------------------------------------------------------------- /swarms_torch/swarmalators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/swarms-pytorch/0fa9adeb7da3e33ec53dba65b985c606a7ff558a/swarms_torch/swarmalators/__init__.py -------------------------------------------------------------------------------- /swarms_torch/swarmalators/swarmalator_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def pairwise_distances(x): 5 | # Compute pairwise distance matrix 6 | diff = x.unsqueeze(1) - x.unsqueeze(0) 7 | return torch.sqrt((diff**2).sum(2)) 8 | 9 | 10 | def function_for_x( 11 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 12 | ): 13 | dists = pairwise_distances(xi) 14 | mask = (dists < R).float() - torch.eye(N) 15 | 16 | interaction_term = mask.unsqueeze(2) * ( 17 | sigma_i.unsqueeze(0) - sigma_i.unsqueeze(1) 18 | ) 19 | interaction_sum = interaction_term.sum(1) 20 | 21 | # Define dynamics for x based on our assumptions 22 | dx = J * interaction_sum + alpha * xi - beta * (xi**3) 23 | return dx 24 | 25 | 26 | def function_for_sigma( 27 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 28 | ): 29 | dists = pairwise_distances(xi) 30 | mask = (dists < R).float() - torch.eye(N) 31 | 32 | interaction_term = mask.unsqueeze(2) * (xi.unsqueeze(0) - xi.unsqueeze(1)) 33 | interaction_sum = interaction_term.sum(1) 34 | 35 | # Define dynamics for sigma based on our assumptions 36 | d_sigma = ( 37 | gamma * interaction_sum + epsilon_a * sigma_i - epsilon_r * (sigma_i**3) 38 | ) 39 | return d_sigma 40 | 41 | 42 | def simulate_swarmalators( 43 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=100, dt=0.1 44 | ): 45 | xi = 2 * torch.rand(N, 3) - 1 46 | sigma_i = torch.nn.functional.normalize(torch.randn(N, D), dim=1) 47 | 48 | results_xi = [] 49 | results_sigma_i = [] 50 | 51 | for t in range(T): 52 | for i in range(N): 53 | dx = function_for_x( 54 | xi, 55 | sigma_i, 56 | N, 57 | J, 58 | alpha, 59 | beta, 60 | gamma, 61 | epsilon_a, 62 | epsilon_r, 63 | R, 64 | D, 65 | ) 66 | d_sigma = function_for_sigma( 67 | xi, 68 | sigma_i, 69 | N, 70 | J, 71 | alpha, 72 | beta, 73 | gamma, 74 | epsilon_a, 75 | epsilon_r, 76 | R, 77 | D, 78 | ) 79 | 80 | # RK4 for xi 81 | k1_x = dt * dx 82 | k2_x = dt * function_for_x( 83 | xi + 0.5 * k1_x, 84 | sigma_i, 85 | N, 86 | J, 87 | alpha, 88 | beta, 89 | gamma, 90 | epsilon_a, 91 | epsilon_r, 92 | R, 93 | D, 94 | ) 95 | k3_x = dt * function_for_x( 96 | xi + 0.5 * k2_x, 97 | sigma_i, 98 | N, 99 | J, 100 | alpha, 101 | beta, 102 | gamma, 103 | epsilon_a, 104 | epsilon_r, 105 | R, 106 | D, 107 | ) 108 | k4_x = dt * function_for_x( 109 | xi + k3_x, 110 | sigma_i, 111 | N, 112 | J, 113 | alpha, 114 | beta, 115 | gamma, 116 | epsilon_a, 117 | epsilon_r, 118 | R, 119 | D, 120 | ) 121 | xi = xi + (1 / 6) * (k1_x + 2 * k2_x + 2 * k3_x + k4_x) 122 | 123 | # RK4 for sigma_i 124 | k1_sigma = dt * d_sigma 125 | k2_sigma = dt * function_for_sigma( 126 | xi, 127 | sigma_i + 0.5 * k1_sigma, 128 | N, 129 | J, 130 | alpha, 131 | beta, 132 | gamma, 133 | epsilon_a, 134 | epsilon_r, 135 | R, 136 | D, 137 | ) 138 | k3_sigma = dt * function_for_sigma( 139 | xi, 140 | sigma_i + 0.5 * k2_sigma, 141 | N, 142 | J, 143 | alpha, 144 | beta, 145 | gamma, 146 | epsilon_a, 147 | epsilon_r, 148 | R, 149 | D, 150 | ) 151 | k4_sigma = dt * function_for_sigma( 152 | xi, 153 | sigma_i + k3_sigma, 154 | N, 155 | J, 156 | alpha, 157 | beta, 158 | gamma, 159 | epsilon_a, 160 | epsilon_r, 161 | R, 162 | D, 163 | ) 164 | sigma_i = sigma_i + (1 / 6) * ( 165 | k1_sigma + 2 * k2_sigma + 2 * k3_sigma + k4_sigma 166 | ) 167 | sigma_i = torch.nn.functional.normalize(sigma_i, dim=1) 168 | 169 | results_xi.append(xi.clone()) 170 | results_sigma_i.append(sigma_i.clone()) 171 | 172 | return results_xi, results_sigma_i 173 | -------------------------------------------------------------------------------- /swarms_torch/swarmalators/swarmalator_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Swarmalators with transformer models, SUPER EXPERIMENTAL, NEEDS WORK 3 | """ 4 | 5 | import torch 6 | from torch import nn 7 | 8 | 9 | class SwarmalatorModel(nn.Module): 10 | """ 11 | # Example 12 | N = 100 # number of swarmalators 13 | D = 3 # dimensions 14 | 15 | model = SwarmalatorModel(N, D) 16 | positions, orientations = model() 17 | print(positions, orientations) 18 | """ 19 | 20 | def __init__( 21 | self, N, D, nhead=8, num_encoder_layers=6, num_decoder_layers=6 22 | ): 23 | super(SwarmalatorModel, self).__init__() 24 | self.N = N 25 | self.D = D 26 | 27 | self.positions = nn.Parameter(torch.randn(N, D)) 28 | self.orientations = nn.Parameter(torch.randn(N, D)) 29 | 30 | # Transformer encoder to process positions and orientations 31 | encoder_layer = nn.TransformerEncoderLayer(d_model=D, nhead=nhead) 32 | self.transformer_encoder = nn.TransformerEncoder( 33 | encoder_layer, num_layers=num_encoder_layers 34 | ) 35 | 36 | # Transformer decoder to produce updated positions and orientations 37 | decoder_layer = nn.TransformerDecoderLayer(d_model=D, nhead=nhead) 38 | self.transformer_decoder = nn.TransformerDecoder( 39 | decoder_layer, num_layers=num_decoder_layers 40 | ) 41 | 42 | def forward(self, src_mask=None, tgt_mask=None, memory_mask=None): 43 | # Using transformer encoder to get memory 44 | position_memory = self.transformer_encoder( 45 | self.positions.unsqueeze(1), mask=src_mask 46 | ) 47 | orientation_memory = self.transformer_encoder( 48 | self.orientations.unsqueeze(1), mask=src_mask 49 | ) 50 | # Using transformer decoder to get updated positions and orientations 51 | updated_positions = self.transformer_decoder( 52 | self.positions.unsqueeze(1), 53 | position_memory, 54 | tgt_mask=tgt_mask, 55 | memory_mask=memory_mask, 56 | ) 57 | updated_orientations = self.transformer_decoder( 58 | self.orientations.unsqueeze(1), 59 | orientation_memory, 60 | tgt_mask=tgt_mask, 61 | memory_mask=memory_mask, 62 | ) 63 | 64 | return updated_positions.squeeze(1), updated_orientations.squeeze(1) 65 | -------------------------------------------------------------------------------- /swarms_torch/swarmalators/swarmalator_visualize.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.animation import FuncAnimation 3 | from swarms_torch.swarmalators.swarmalator_base import simulate_swarmalators 4 | 5 | # Example usage: 6 | N = 100 7 | J, alpha, beta, gamma, epsilon_a, epsilon_r, R = [0.1] * 7 8 | D = 3 # Ensure D is an integer 9 | xi, sigma_i = simulate_swarmalators( 10 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 11 | ) 12 | print(xi[-1], sigma_i[-1]) 13 | 14 | 15 | def visualize_swarmalators(results_xi): 16 | fig = plt.figure(figsize=(8, 8)) 17 | ax = fig.add_subplot(111, projection="3d") 18 | 19 | ax.set_xlim(-2, 2) 20 | ax.set_ylim(-2, 2) 21 | ax.set_zlim(-2, 2) 22 | 23 | # Initialize the scatter plot 24 | scatter = ax.scatter([], [], []) 25 | 26 | def init(): 27 | scatter._offsets3d = ([], [], []) 28 | return (scatter,) 29 | 30 | def update(num): 31 | ax.view_init(30, 0.3 * num) 32 | x_data, y_data, z_data = results_xi[num].t() 33 | scatter._offsets3d = (x_data, y_data, z_data) 34 | return (scatter,) 35 | 36 | FuncAnimation( 37 | fig, update, frames=len(results_xi), init_func=init, blit=False 38 | ) 39 | 40 | plt.show() 41 | 42 | 43 | # # Call the visualization function 44 | # visualize_swarmalators(xi) 45 | -------------------------------------------------------------------------------- /swarms_torch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyegomez/swarms-pytorch/0fa9adeb7da3e33ec53dba65b985c606a7ff558a/swarms_torch/utils/__init__.py -------------------------------------------------------------------------------- /swarms_torch/utils/particle_swarm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ParticleSwarmOptimization: 5 | """ 6 | Particle Swarm Optimization 7 | Overview: https://en.wikipedia.org/wiki/Particle_swarm_optimization 8 | 9 | How does it work? 10 | 1. Initialize particles with random positions and velocities 11 | 2. For each particle, compute the fitness value 12 | 3. Update the personal best and global best 13 | 4. Update the velocity and position of each particle 14 | 5. Repeat step 2 to 4 until the maximum number of iterations is reached 15 | 16 | 17 | 18 | Parameters 19 | ---------- 20 | goal: str 21 | The goal string to be optimized 22 | n_particles: int 23 | Number of particles 24 | inertia: float 25 | Inertia weight 26 | personal_best_weight: float 27 | Personal best weight 28 | global_best_weight: float 29 | Global best weight 30 | 31 | Usage 32 | ----- 33 | pso = ParticleSwarmOptimization(goal="Attention is all you need", n_particles=100) 34 | pso.optimize(iterations=1000) 35 | 36 | 37 | Future Improvements 38 | ------------------- 39 | 1. Add a stopping criterion 40 | 2. Add a callback function to track the progress 41 | 3. Add a function to plot the fitness value 42 | 4. Add a function to plot the particles 43 | 5. Add a function to plot the velocity 44 | 6. Add a function to plot the position 45 | 7. Add a function to plot the personal best 46 | 8. Add a function to plot the global best 47 | 9. Add a function to plot the personal best weight 48 | 10. Add a function to plot the global best weight 49 | 50 | 51 | 52 | """ 53 | 54 | def __init__( 55 | self, 56 | goal: str = None, 57 | n_particles: int = 100, 58 | inertia: float = 0.5, 59 | personal_best_weight: float = 1.5, 60 | global_best_weight: float = 1.5, 61 | dim: int = 1, 62 | ): 63 | self.goal = torch.tensor([ord(c) for c in goal]) 64 | self.n_particles = n_particles 65 | self.inertia = inertia 66 | self.personal_best_weight = personal_best_weight 67 | self.global_best_weight = global_best_weight 68 | 69 | self.particles = torch.randint(0, 255, (n_particles, len(goal))) 70 | self.velocities = torch.zeros((n_particles, len(goal))) 71 | 72 | self.personal_best = self.particles.clone() 73 | self.global_best = self.particles[0].clone() 74 | 75 | def compute_fitness( 76 | self, 77 | particle, 78 | ): 79 | return 1.0 / (1.0 + torch.norm((particle - self.goal).float())) 80 | 81 | def update( 82 | self, 83 | ): 84 | """Update the particles""" 85 | for i in range(self.n_particles): 86 | fitness = self.compute_fitness( 87 | self.particles[i], 88 | ) 89 | 90 | personal_best_fitness = self.compute_fitness( 91 | self.personal_best[i], 92 | ) 93 | 94 | if fitness > personal_best_fitness: 95 | self.personal_best[i] = self.particles[i] 96 | 97 | global_best_fitness = self.compute_fitness(self.global_best) 98 | 99 | if fitness > global_best_fitness: 100 | self.global_best = self.particles[i] 101 | 102 | # update velocity 103 | personal_attraction = ( 104 | self.personal_best_weight 105 | * torch.rand(self.goal.size()) 106 | * (self.personal_best[i] - self.particles[i]) 107 | ) 108 | 109 | global_attraction = ( 110 | self.global_best_weight 111 | * torch.rand(self.goal.size()) 112 | * (self.global_best - self.particles[i]) 113 | ) 114 | 115 | self.velocities[i] = ( 116 | self.inertia * self.velocities[i] 117 | + personal_attraction 118 | + global_attraction 119 | ) 120 | 121 | # Update position 122 | self.particles[i] += self.velocities[i].int() 123 | self.particles[i].clamp_(0, 255) 124 | 125 | def optimize( 126 | self, 127 | iterations: int = 1000, 128 | ): 129 | """Optimize the goal string""" 130 | for _ in range(iterations): 131 | self.update() 132 | best_particle = self.global_best 133 | print( 134 | "Best Particle: ", "".join([chr(int(i)) for i in best_particle]) 135 | ) 136 | -------------------------------------------------------------------------------- /swarms_torch/utils/sakana.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | def fish(dim: int, mult: int = 4): 5 | return nn.Sequential( 6 | nn.Linear(dim, dim * mult), 7 | nn.Softplus(), 8 | nn.Dropout(0.1), 9 | nn.LayerNorm(dim * mult), 10 | nn.Softmax(dim=-1), # change this line 11 | nn.Linear(dim * mult, dim), 12 | ) 13 | -------------------------------------------------------------------------------- /swarms_torch/utils/spiral_optimization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class SPO(nn.Module): 6 | """ 7 | Spiral Optimization (SPO) Algorithm in PyTorch. 8 | 9 | Implements the SPO algorithm for optimization towards a target string. 10 | 11 | How does it work? 12 | ---------- 13 | 1. Initialize the search points randomly 14 | 2. Initialize the center randomly 15 | 3. Update the search points based on the spiral model 16 | 4. Find the best search point and set as the new center 17 | 5. Repeat step 3 to 4 until the maximum number of iterations is reached 18 | 19 | Usage 20 | ----- 21 | from swarms_torch import SPO 22 | 23 | goaling = "Hello SPO" 24 | spo = SPO(goaling, m=100, k_max=1000) 25 | spo.optimize() 26 | 27 | print("Best Matched String:", spo.best_string()) 28 | 29 | Future Features to implement 30 | -------- 31 | 1. Add a stopping criterion 32 | 2. Add a callback function to track the progress 33 | 3. Add a function to plot the search points 34 | 4. Add a function to plot the best solution 35 | 36 | """ 37 | 38 | def __init__(self, goal: str = None, m: int = 10, k_max: int = 1000): 39 | """ 40 | Initialize the SPO class. 41 | 42 | Args: 43 | - goal: The target string. 44 | - m: Number of search points (strings). 45 | - k_max: Maximum number of iterations. 46 | """ 47 | self.goal = torch.tensor( 48 | [ord(c) for c in goal], dtype=torch.float32 49 | ) # ASCII representation 50 | 51 | self.m = m 52 | self.k_max = k_max 53 | self.n_dim = len(goal) 54 | 55 | # Initializing the search points and center randomly 56 | # Note: 32-126 is the ASCII range for all printable characters 57 | self.points = torch.randint( 58 | 32, 127, (self.m, self.n_dim), dtype=torch.float32 59 | ) 60 | self.center = torch.randint(32, 127, (self.n_dim,), dtype=torch.float32) 61 | 62 | def _step_rate(self, k): 63 | """ 64 | Define the step rate function. 65 | 66 | Args: 67 | - k: Current iteration. 68 | 69 | Returns: Step rate for the current iteration. 70 | """ 71 | return 1 / (1 + k) 72 | 73 | def _update_points(self, k): 74 | """Update the search points based on the spiral model.""" 75 | r = self._step_rate(k) 76 | R = torch.eye(self.n_dim) # Identity for simplicity in n-dimensions 77 | for i in range(self.m): 78 | self.points[i] = self.center + r * torch.mv( 79 | R, (self.points[i] - self.center) 80 | ) 81 | 82 | def _update_center(self): 83 | """Find the best search point and set as the new center.""" 84 | fitnesses = torch.norm(self.points - self.goal, dim=1) 85 | best_idx = torch.argmin(fitnesses) 86 | self.center = self.points[best_idx] 87 | 88 | def optimize(self): 89 | """Run the optimization loop.""" 90 | for k in range(self.k_max): 91 | self._update_points(k) 92 | self._update_center() 93 | if ( 94 | torch.norm(self.center - self.goal) < 1e-5 95 | ): # Example convergence condition 96 | break 97 | 98 | def best_string(self): 99 | """Convert the best found point to its string representation""" 100 | return "".join([chr(int(c)) for c in self.center.round()]) 101 | 102 | 103 | # # Example Usage 104 | # goal = "Attention is all you need" 105 | # optimizer = SPO(goal) 106 | # optimizer.optimize() 107 | # print(f"Optimized String: {optimizer.best_string()}") 108 | -------------------------------------------------------------------------------- /test_switch_moe_fix.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.structs.switch_moe import SwitchMoE 3 | 4 | 5 | def test_switch_moe_aux_loss(): 6 | """Test that SwitchMoE works with auxiliary loss enabled.""" 7 | 8 | # Set up test parameters 9 | batch_size = 32 10 | seq_len = 128 11 | dim = 512 12 | num_experts = 8 13 | 14 | # Create model with auxiliary loss enabled 15 | model = SwitchMoE( 16 | dim=dim, 17 | hidden_dim=dim, 18 | output_dim=dim, 19 | num_experts=num_experts, 20 | use_aux_loss=True, 21 | ) 22 | 23 | # Create test input 24 | x = torch.randn(batch_size, dim) 25 | 26 | try: 27 | # Forward pass 28 | output, loss = model(x) 29 | 30 | print("✅ Success! No runtime error occurred.") 31 | print(f"Input shape: {x.shape}") 32 | print(f"Output shape: {output.shape}") 33 | print(f"Auxiliary loss: {loss.item() if loss is not None else 'None'}") 34 | 35 | # Verify shapes 36 | assert ( 37 | output.shape == x.shape 38 | ), f"Output shape {output.shape} doesn't match input shape {x.shape}" 39 | assert ( 40 | loss is not None 41 | ), "Loss should not be None when use_aux_loss=True" 42 | assert torch.isfinite(loss), "Loss should be finite" 43 | 44 | print("✅ All assertions passed!") 45 | 46 | except Exception as e: 47 | print(f"❌ Error occurred: {e}") 48 | raise e 49 | 50 | 51 | if __name__ == "__main__": 52 | test_switch_moe_aux_loss() 53 | -------------------------------------------------------------------------------- /tests/ant_colony.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | 4 | from swarms_torch import AntColonyOptimization # Import your class 5 | 6 | 7 | class TestAntColonyOptimization(unittest.TestCase): 8 | def setUp(self): 9 | self.aco = AntColonyOptimization( 10 | goal="Hello ACO", num_ants=1000, num_iterations=10 11 | ) 12 | 13 | def test_initialization(self): 14 | self.assertEqual(self.aco.goal.tolist(), [ord(c) for c in "Hello ACO"]) 15 | self.assertEqual(self.aco.pheromones.size(), torch.Size([1000])) 16 | self.assertEqual(self.aco.pheromones.tolist(), [1.0] * 1000) 17 | 18 | def test_fitness(self): 19 | solution = torch.tensor( 20 | [ord(c) for c in "Hello ACO"], dtype=torch.float32 21 | ) 22 | self.assertEqual( 23 | self.aco.fitness(solution).item(), 0 24 | ) # Should be maximum fitness 25 | 26 | def test_update_pheromones(self): 27 | initial_pheromones = self.aco.pheromones.clone() 28 | self.aco.solutions = [ 29 | torch.tensor([ord(c) for c in "Hello ACO"], dtype=torch.float32) 30 | for _ in range(1000) 31 | ] 32 | self.aco.update_pheromones() 33 | # After updating, pheromones should not remain the same 34 | self.assertFalse(torch.equal(initial_pheromones, self.aco.pheromones)) 35 | 36 | def test_choose_next_path(self): 37 | path = self.aco.choose_next_path() 38 | # Path should be an integer index within the number of ants 39 | self.assertIsInstance(path, int) 40 | self.assertGreaterEqual(path, 0) 41 | self.assertLess(path, 1000) 42 | 43 | def test_optimize(self): 44 | solution = self.aco.optimize() 45 | self.assertIsInstance(solution, str) 46 | # Given enough iterations and ants, the solution should approach the goal. For short runs, this might not hold. 47 | # self.assertEqual(solution, "Hello ACO") 48 | 49 | def test_invalid_parameters(self): 50 | with self.assertRaises(ValueError): 51 | _ = AntColonyOptimization(num_ants=-5) 52 | with self.assertRaises(ValueError): 53 | _ = AntColonyOptimization(evaporation_rate=1.5) 54 | 55 | 56 | if __name__ == "__main__": 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /tests/cellular_swarm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch import TransformerCell, CellularSwarm 3 | 4 | 5 | def test_transformercell_initialization(): 6 | transformercell = TransformerCell(input_dim=512, nhead=8) 7 | assert isinstance(transformercell, TransformerCell) 8 | assert transformercell.neighborhood_size == 3 9 | 10 | 11 | def test_transformercell_forward(): 12 | transformercell = TransformerCell(input_dim=512, nhead=8) 13 | x = torch.randn(10, 32, 512) 14 | neighbors = [torch.randn(10, 32, 512)] 15 | output = transformercell(x, neighbors) 16 | assert output.shape == torch.Size([20, 32, 512]) 17 | 18 | 19 | def test_cellularswarm_initialization(): 20 | cellularswarm = CellularSwarm(cell_count=5, input_dim=512, nhead=8) 21 | assert isinstance(cellularswarm, CellularSwarm) 22 | assert len(cellularswarm.cells) == 5 23 | assert cellularswarm.time_steps == 4 24 | 25 | 26 | def test_cellularswarm_forward(): 27 | cellularswarm = CellularSwarm(cell_count=5, input_dim=512, nhead=8) 28 | x = torch.randn(10, 32, 512) 29 | output = cellularswarm(x) 30 | assert output.shape == torch.Size([10, 32, 512]) 31 | -------------------------------------------------------------------------------- /tests/fish_school.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.structs.fish_school import Fish, FishSchool 3 | 4 | 5 | def test_fish_initialization(): 6 | fish = Fish(dim=512, heads=8, depth=6) 7 | assert isinstance(fish, Fish) 8 | 9 | 10 | def test_fish_train(): 11 | fish = Fish(dim=512, heads=8, depth=6) 12 | src = torch.randn(10, 32, 512) 13 | tgt = torch.randn(10, 32, 512) 14 | labels = torch.randint(0, 512, (10, 32)) 15 | fish.train(src, tgt, labels) 16 | assert isinstance(fish.food, float) 17 | 18 | 19 | def test_fishschool_initialization(): 20 | fishschool = FishSchool( 21 | num_fish=10, dim=512, heads=8, depth=6, num_iter=100 22 | ) 23 | assert isinstance(fishschool, FishSchool) 24 | assert len(fishschool.fish) == 10 25 | 26 | 27 | def test_fishschool_forward(): 28 | fishschool = FishSchool( 29 | num_fish=10, dim=512, heads=8, depth=6, num_iter=100 30 | ) 31 | src = torch.randn(10, 32, 512) 32 | tgt = torch.randn(10, 32, 512) 33 | labels = torch.randint(0, 512, (10, 32)) 34 | fishschool.forward(src, tgt, labels) 35 | assert isinstance(fishschool.fish[0].food, float) 36 | -------------------------------------------------------------------------------- /tests/neuronal_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.structs.neuronal_transformer import ( 3 | TransformerLayer, 4 | Neuron, 5 | SynapseTransformer, 6 | NNTransformer, 7 | ) 8 | 9 | 10 | def test_transformerlayer_initialization(): 11 | transformerlayer = TransformerLayer(input_dim=512, output_dim=256, nhead=8) 12 | assert isinstance(transformerlayer, TransformerLayer) 13 | 14 | 15 | def test_transformerlayer_forward(): 16 | transformerlayer = TransformerLayer(input_dim=512, output_dim=256, nhead=8) 17 | x = torch.randn(10, 32, 512) 18 | output = transformerlayer(x) 19 | assert output.shape == torch.Size([10, 32, 256]) 20 | 21 | 22 | def test_neuron_initialization(): 23 | neuron = Neuron(num_states=10) 24 | assert isinstance(neuron, Neuron) 25 | assert neuron.states.shape == torch.Size([10]) 26 | 27 | 28 | def test_synapsetransformer_initialization(): 29 | synapsetransformer = SynapseTransformer( 30 | input_dim=512, output_dim=256, nhead=8 31 | ) 32 | assert isinstance(synapsetransformer, SynapseTransformer) 33 | 34 | 35 | def test_synapsetransformer_forward(): 36 | synapsetransformer = SynapseTransformer( 37 | input_dim=512, output_dim=256, nhead=8 38 | ) 39 | x = torch.randn(10, 32, 512) 40 | output = synapsetransformer(x) 41 | assert output.shape == torch.Size([10, 32, 256]) 42 | 43 | 44 | def test_nntransformer_initialization(): 45 | nntransformer = NNTransformer( 46 | neuron_count=5, num_states=10, input_dim=512, output_dim=256, nhead=8 47 | ) 48 | assert isinstance(nntransformer, NNTransformer) 49 | assert len(nntransformer.neurons) == 5 50 | assert len(nntransformer.synapses) == 5 51 | 52 | 53 | def test_nntransformer_forward(): 54 | nntransformer = NNTransformer( 55 | neuron_count=5, num_states=10, input_dim=512, output_dim=256, nhead=8 56 | ) 57 | x = torch.randn(1, 10) 58 | output = nntransformer(x) 59 | assert output.shape == torch.Size([10]) 60 | -------------------------------------------------------------------------------- /tests/particle_swarm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | 4 | from swarms_torch import ParticleSwarmOptimization # Import your class here 5 | 6 | 7 | class TestParticleSwarmOptimization(unittest.TestCase): 8 | def setUp(self): 9 | self.pso = ParticleSwarmOptimization(goal="Hello", n_particles=10) 10 | 11 | def test_initialization(self): 12 | self.assertEqual(self.pso.goal.tolist(), [ord(c) for c in "Hello"]) 13 | self.assertEqual(self.pso.particles.size(), (10, 5)) 14 | self.assertEqual(self.pso.velocities.size(), (10, 5)) 15 | 16 | def test_compute_fitness(self): 17 | particle = torch.tensor([ord(c) for c in "Hello"]) 18 | fitness = self.pso.compute_fitness(particle) 19 | self.assertEqual(fitness.item(), 1.0) 20 | 21 | def test_update(self): 22 | initial_particle = self.pso.particles.clone() 23 | self.pso.update() 24 | # After updating, particles should not remain the same (in most cases) 25 | self.assertFalse(torch.equal(initial_particle, self.pso.particles)) 26 | 27 | def test_optimize(self): 28 | initial_best_particle = self.pso.global_best.clone() 29 | self.pso.optimize(iterations=10) 30 | # After optimization, global best should be closer to the goal 31 | initial_distance = torch.norm( 32 | (initial_best_particle - self.pso.goal).float() 33 | ).item() 34 | final_distance = torch.norm( 35 | (self.pso.global_best - self.pso.goal).float() 36 | ).item() 37 | self.assertLess(final_distance, initial_distance) 38 | 39 | 40 | if __name__ == "__main__": 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /tests/queen_bee.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | from swarms_torch.structs.queen_bee import QueenBeeGa # Import the class 4 | 5 | 6 | class TestQueenBeeGa(unittest.TestCase): 7 | def setUp(self): 8 | self.optimizer = QueenBeeGa(goal="Hello QBGA", pop_size=50) 9 | 10 | def test_initialization(self): 11 | self.assertEqual(self.optimizer.goal, "Hello QBGA") 12 | self.assertEqual(self.optimizer.gene_length, len("Hello QBGA")) 13 | self.assertIsNone(self.optimizer.queen) 14 | self.assertIsNone(self.optimizer.queen_fitness) 15 | 16 | def test_encode_decode(self): 17 | encoded = QueenBeeGa.encode("Hello") 18 | decoded = QueenBeeGa.decode(encoded) 19 | self.assertEqual(decoded, "Hello") 20 | 21 | def test_evolution(self): 22 | initial_population = self.optimizer.pool.clone() 23 | self.optimizer._evolve() 24 | self.assertFalse(torch.equal(initial_population, self.optimizer.pool)) 25 | 26 | def test_run(self): 27 | initial_population = self.optimizer.pool.clone() 28 | self.optimizer.run(max_generations=10) 29 | self.assertNotEqual( 30 | QueenBeeGa.decode(self.optimizer.queen), 31 | QueenBeeGa.decode(initial_population[0]), 32 | ) 33 | 34 | def test_check_convergence(self): 35 | self.optimizer.pool = torch.stack([self.optimizer.target_gene] * 50) 36 | self.assertTrue(self.optimizer._check_convergence()) 37 | 38 | def test_invalid_parameters(self): 39 | with self.assertRaises(ValueError): 40 | _ = QueenBeeGa(mutation_prob=1.5) 41 | with self.assertRaises(ValueError): 42 | _ = QueenBeeGa(strong_mutation_rate=-0.5) 43 | 44 | 45 | if __name__ == "__main__": 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /tests/spiral_optimization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from swarms_torch.utils.spiral_optimization import SPO 3 | 4 | 5 | def test_spo_initialization(): 6 | spo = SPO(goal="Hello SPO", m=100, k_max=1000) 7 | assert isinstance(spo, SPO) 8 | assert spo.goal.shape == torch.Size([9]) 9 | assert spo.points.shape == torch.Size([100, 9]) 10 | assert spo.center.shape == torch.Size([9]) 11 | 12 | 13 | def test_spo_step_rate(): 14 | spo = SPO(goal="Hello SPO", m=100, k_max=1000) 15 | step_rate = spo._step_rate(1) 16 | assert step_rate == 0.5 17 | 18 | 19 | def test_spo_update_points(): 20 | spo = SPO(goal="Hello SPO", m=100, k_max=1000) 21 | spo._update_points(1) 22 | assert spo.points.shape == torch.Size([100, 9]) 23 | 24 | 25 | def test_spo_update_center(): 26 | spo = SPO(goal="Hello SPO", m=100, k_max=1000) 27 | spo._update_center() 28 | assert spo.center.shape == torch.Size([9]) 29 | 30 | 31 | def test_spo_optimize(): 32 | spo = SPO(goal="Hello SPO", m=100, k_max=1000) 33 | spo.optimize() 34 | assert spo.center.shape == torch.Size([9]) 35 | 36 | 37 | def test_spo_best_string(): 38 | spo = SPO(goal="Hello SPO", m=100, k_max=1000) 39 | spo.optimize() 40 | best_string = spo.best_string() 41 | assert isinstance(best_string, str) 42 | assert len(best_string) == 9 43 | -------------------------------------------------------------------------------- /tests/swarmalator_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pytest 3 | from swarms_torch.swarmalators.swarmalator_base import ( 4 | pairwise_distances, 5 | function_for_x, 6 | function_for_sigma, 7 | simulate_swarmalators, 8 | ) 9 | 10 | # Define global constants for testing 11 | N = 10 12 | J = 1.0 13 | alpha = 0.1 14 | beta = 0.2 15 | gamma = 0.3 16 | epsilon_a = 0.01 17 | epsilon_r = 0.02 18 | R = 0.5 19 | D = 3 20 | T = 100 21 | dt = 0.1 22 | 23 | # === Test pairwise_distances === 24 | 25 | 26 | def test_pairwise_distances_shape(): 27 | x = torch.randn(N, D) 28 | distances = pairwise_distances(x) 29 | assert distances.shape == (N, N) 30 | 31 | 32 | def test_pairwise_distances_identity(): 33 | x = torch.randn(N, D) 34 | distances = pairwise_distances(x) 35 | for i in range(N): 36 | assert distances[i, i] == pytest.approx(0.0, abs=1e-6) 37 | 38 | 39 | def test_pairwise_distances_symmetry(): 40 | x = torch.randn(N, D) 41 | distances = pairwise_distances(x) 42 | for i in range(N): 43 | for j in range(i + 1, N): 44 | assert distances[i, j] == pytest.approx(distances[j, i], abs=1e-6) 45 | 46 | 47 | # === Test function_for_x === 48 | 49 | 50 | def test_function_for_x_shape(): 51 | xi = torch.randn(N, D) 52 | sigma_i = torch.randn(N, D) 53 | dx = function_for_x( 54 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 55 | ) 56 | assert dx.shape == (N, D) 57 | 58 | 59 | def test_function_for_x_output_range(): 60 | xi = torch.randn(N, D) 61 | sigma_i = torch.randn(N, D) 62 | dx = function_for_x( 63 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 64 | ) 65 | assert (dx >= -1.0).all() and (dx <= 1.0).all() 66 | 67 | 68 | def test_function_for_x_zero_at_equilibrium(): 69 | xi = torch.zeros(N, D) 70 | sigma_i = torch.zeros(N, D) 71 | dx = function_for_x( 72 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 73 | ) 74 | assert (dx == 0.0).all() 75 | 76 | 77 | # === Test function_for_sigma === 78 | 79 | 80 | def test_function_for_sigma_shape(): 81 | xi = torch.randn(N, D) 82 | sigma_i = torch.randn(N, D) 83 | d_sigma = function_for_sigma( 84 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 85 | ) 86 | assert d_sigma.shape == (N, D) 87 | 88 | 89 | def test_function_for_sigma_output_range(): 90 | xi = torch.randn(N, D) 91 | sigma_i = torch.randn(N, D) 92 | d_sigma = function_for_sigma( 93 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 94 | ) 95 | assert (d_sigma >= -1.0).all() and (d_sigma <= 1.0).all() 96 | 97 | 98 | def test_function_for_sigma_zero_at_equilibrium(): 99 | xi = torch.zeros(N, D) 100 | sigma_i = torch.zeros(N, D) 101 | d_sigma = function_for_sigma( 102 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D 103 | ) 104 | assert (d_sigma == 0.0).all() 105 | 106 | 107 | # === Test simulate_swarmalators === 108 | 109 | 110 | def test_simulate_swarmalators_output_shape(): 111 | results_xi, results_sigma_i = simulate_swarmalators( 112 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=T, dt=dt 113 | ) 114 | assert len(results_xi) == T 115 | assert len(results_sigma_i) == T 116 | assert results_xi[0].shape == (N, D) 117 | assert results_sigma_i[0].shape == (N, D) 118 | 119 | 120 | def test_simulate_swarmalators_convergence(): 121 | results_xi, results_sigma_i = simulate_swarmalators( 122 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=T, dt=dt 123 | ) 124 | for i in range(1, T): 125 | assert torch.allclose(results_xi[i], results_xi[i - 1], atol=1e-6) 126 | assert torch.allclose( 127 | results_sigma_i[i], results_sigma_i[i - 1], atol=1e-6 128 | ) 129 | 130 | 131 | def test_simulate_swarmalators_non_zero_initial_condition(): 132 | xi = torch.randn(N, D) 133 | sigma_i = torch.randn(N, D) 134 | results_xi, results_sigma_i = simulate_swarmalators( 135 | N, 136 | J, 137 | alpha, 138 | beta, 139 | gamma, 140 | epsilon_a, 141 | epsilon_r, 142 | R, 143 | D, 144 | T=T, 145 | dt=dt, 146 | xi=xi, 147 | sigma_i=sigma_i, 148 | ) 149 | assert not torch.allclose(results_xi[0], xi, atol=1e-6) 150 | assert not torch.allclose(results_sigma_i[0], sigma_i, atol=1e-6) 151 | 152 | 153 | # Add more tests as needed... 154 | -------------------------------------------------------------------------------- /tests/test_mixture_of_mamba.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | from swarms_torch.structs.mixture_of_mamba import MixtureOfMambas 4 | 5 | 6 | @pytest.fixture 7 | def mixture(): 8 | num_mambas = 5 9 | dim = 10 10 | d_state = 20 11 | d_conv = 30 12 | expand = 40 13 | return MixtureOfMambas(num_mambas, dim, d_state, d_conv, expand) 14 | 15 | 16 | def test_init(mixture): 17 | assert mixture.num_mambas == 5 18 | assert mixture.dim == 10 19 | assert mixture.d_state == 20 20 | assert mixture.d_conv == 30 21 | assert mixture.expand == 40 22 | assert len(mixture.models) == 5 23 | 24 | 25 | def test_forward_average(mixture): 26 | x = torch.rand((1, 10)) 27 | output = mixture.forward(x) 28 | assert output.shape == (1, 10) 29 | 30 | 31 | def test_forward_weighted(mixture): 32 | x = torch.rand((1, 10)) 33 | weights = torch.ones(5) 34 | mixture.fusion_method = "weighted" 35 | output = mixture.forward(x, weights) 36 | assert output.shape == (1, 10) 37 | 38 | 39 | def test_forward_invalid_aggregation(mixture): 40 | x = torch.rand((1, 10)) 41 | mixture.fusion_method = "invalid" 42 | with pytest.raises(ValueError): 43 | mixture.forward(x) 44 | 45 | 46 | def test_average_aggregate(mixture): 47 | outputs = [torch.rand((1, 10)) for _ in range(5)] 48 | output = mixture.average_aggregate(outputs) 49 | assert output.shape == (1, 10) 50 | 51 | 52 | def test_weighted_aggregate(mixture): 53 | outputs = [torch.rand((1, 10)) for _ in range(5)] 54 | weights = torch.ones(5) 55 | output = mixture.weighted_aggregate(outputs, weights) 56 | assert output.shape == (1, 10) 57 | 58 | 59 | def test_weighted_aggregate_invalid_weights(mixture): 60 | outputs = [torch.rand((1, 10)) for _ in range(5)] 61 | weights = torch.ones(4) 62 | with pytest.raises(ValueError): 63 | mixture.weighted_aggregate(outputs, weights) 64 | 65 | 66 | def test_forward_different_dimensions(mixture): 67 | x = torch.rand((2, 10)) 68 | with pytest.raises(ValueError): 69 | mixture.forward(x) 70 | 71 | 72 | def test_forward_no_weights(mixture): 73 | x = torch.rand((1, 10)) 74 | mixture.fusion_method = "weighted" 75 | with pytest.raises(ValueError): 76 | mixture.forward(x) 77 | 78 | 79 | def test_forward_extra_weights(mixture): 80 | x = torch.rand((1, 10)) 81 | weights = torch.ones(6) 82 | mixture.fusion_method = "weighted" 83 | with pytest.raises(ValueError): 84 | mixture.forward(x, weights) 85 | -------------------------------------------------------------------------------- /tests/transformer_hive.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | from swarms_torch.structs.hivemind_swarm_transformer import HivemindSwarm 4 | 5 | 6 | # Create a fixture for the HivemindSwarm model 7 | @pytest.fixture 8 | def swarm_model(): 9 | return HivemindSwarm( 10 | dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=3 11 | ) 12 | 13 | 14 | # Test the basic functionality of HivemindSwarm 15 | def test_hivemind_swarm_forward(swarm_model): 16 | x = torch.randint(0, 20000, (1, 32)) 17 | y = swarm_model(x) 18 | assert y.shape == (1, 32, 512) 19 | 20 | 21 | # Test if the swarm consists of the correct number of transformers 22 | def test_num_transformers_in_swarm(swarm_model): 23 | assert len(list(swarm_model.experts)) == 3 24 | 25 | 26 | # Test if the gate mechanism works as expected 27 | def test_gate_mechanism(swarm_model): 28 | x = torch.randint(0, 20000, (1, 32)) 29 | outputs = torch.stack([expert(x) for expert in swarm_model.experts], dim=1) 30 | gate = swarm_model.gate_activation( 31 | swarm_model.gate_bias + swarm_model.gate(outputs) 32 | ) 33 | 34 | # Check if the gate values sum to 1 along the transformer dimension 35 | assert torch.allclose(gate.sum(dim=-1), torch.ones(1, 3)) 36 | 37 | 38 | # Test if the model can handle different input shapes 39 | def test_different_input_shapes(swarm_model): 40 | x1 = torch.randint(0, 20000, (1, 32)) 41 | x2 = torch.randint(0, 20000, (1, 16)) 42 | y1 = swarm_model(x1) 43 | y2 = swarm_model(x2) 44 | assert y1.shape == (1, 32, 512) 45 | assert y2.shape == (1, 16, 512) 46 | 47 | 48 | # Test if the model can handle different numbers of models in the swarm 49 | def test_different_num_models(): 50 | swarm_model_1 = HivemindSwarm( 51 | dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=1 52 | ) 53 | swarm_model_2 = HivemindSwarm( 54 | dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=5 55 | ) 56 | 57 | x = torch.randint(0, 20000, (1, 32)) 58 | y1 = swarm_model_1(x) 59 | y2 = swarm_model_2(x) 60 | 61 | assert y1.shape == (1, 32, 512) 62 | assert y2.shape == (1, 32, 512) 63 | 64 | 65 | # Test if the model works with different configurations 66 | def test_different_configurations(): 67 | model_1 = HivemindSwarm( 68 | dim=256, max_seq_len=16, depth=4, heads=4, dim_head=64, num_models=2 69 | ) 70 | model_2 = HivemindSwarm( 71 | dim=1024, max_seq_len=64, depth=8, heads=16, dim_head=128, num_models=4 72 | ) 73 | 74 | x = torch.randint(0, 20000, (1, 16)) 75 | y1 = model_1(x) 76 | y2 = model_2(x) 77 | 78 | assert y1.shape == (1, 16, 256) 79 | assert y2.shape == (1, 16, 1024) 80 | -------------------------------------------------------------------------------- /tests/transformer_pso.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from swarms_torch.pso.transformer_pso import ( 4 | SimpleTransformer, 5 | TransformerParticleSwarmOptimization, 6 | ) 7 | 8 | 9 | def test_simpletransformer_initialization(): 10 | simpletransformer = SimpleTransformer( 11 | input_dim=10, d_model=512, nhead=8, num_layers=1, output_dim=2 12 | ) 13 | assert isinstance(simpletransformer, SimpleTransformer) 14 | 15 | 16 | def test_simpletransformer_forward(): 17 | simpletransformer = SimpleTransformer( 18 | input_dim=10, d_model=512, nhead=8, num_layers=1, output_dim=2 19 | ) 20 | x = torch.randint(0, 10, (10, 32)) 21 | output = simpletransformer(x) 22 | assert output.shape == torch.Size([32, 2]) 23 | 24 | 25 | def test_TransformerParticleSwarmOptimization_initialization(): 26 | model_constructor = SimpleTransformer 27 | model_args = (10, 512, 8, 1, 2) 28 | device = "cpu" 29 | criterion = torch.nn.CrossEntropyLoss() 30 | data_loader = DataLoader( 31 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], 32 | batch_size=32, 33 | ) 34 | pso = TransformerParticleSwarmOptimization( 35 | model_constructor, model_args, device, criterion, data_loader 36 | ) 37 | assert isinstance(pso, TransformerParticleSwarmOptimization) 38 | assert len(pso.particles) == 10 39 | assert len(pso.velocities) == 10 40 | assert len(pso.personal_best) == 10 41 | 42 | 43 | def test_TransformerParticleSwarmOptimization_compute_fitness(): 44 | model_constructor = SimpleTransformer 45 | model_args = (10, 512, 8, 1, 2) 46 | device = "cpu" 47 | criterion = torch.nn.CrossEntropyLoss() 48 | data_loader = DataLoader( 49 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], 50 | batch_size=32, 51 | ) 52 | pso = TransformerParticleSwarmOptimization( 53 | model_constructor, model_args, device, criterion, data_loader 54 | ) 55 | fitness = pso.compute_fitness(pso.particles[0].state_dict()) 56 | assert isinstance(fitness, float) 57 | 58 | 59 | def test_TransformerParticleSwarmOptimization_update(): 60 | model_constructor = SimpleTransformer 61 | model_args = (10, 512, 8, 1, 2) 62 | device = "cpu" 63 | criterion = torch.nn.CrossEntropyLoss() 64 | data_loader = DataLoader( 65 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], 66 | batch_size=32, 67 | ) 68 | pso = TransformerParticleSwarmOptimization( 69 | model_constructor, model_args, device, criterion, data_loader 70 | ) 71 | pso.update() 72 | assert len(pso.particles) == 10 73 | assert len(pso.velocities) == 10 74 | assert len(pso.personal_best) == 10 75 | 76 | 77 | def test_TransformerParticleSwarmOptimization_optimize(): 78 | model_constructor = SimpleTransformer 79 | model_args = (10, 512, 8, 1, 2) 80 | device = "cpu" 81 | criterion = torch.nn.CrossEntropyLoss() 82 | data_loader = DataLoader( 83 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], 84 | batch_size=32, 85 | ) 86 | pso = TransformerParticleSwarmOptimization( 87 | model_constructor, model_args, device, criterion, data_loader 88 | ) 89 | pso.optimize(iterations=10) 90 | assert len(pso.particles) == 10 91 | assert len(pso.velocities) == 10 92 | assert len(pso.personal_best) == 10 93 | 94 | 95 | def test_TransformerParticleSwarmOptimization_get_best_model(): 96 | model_constructor = SimpleTransformer 97 | model_args = (10, 512, 8, 1, 2) 98 | device = "cpu" 99 | criterion = torch.nn.CrossEntropyLoss() 100 | data_loader = DataLoader( 101 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], 102 | batch_size=32, 103 | ) 104 | pso = TransformerParticleSwarmOptimization( 105 | model_constructor, model_args, device, criterion, data_loader 106 | ) 107 | pso.optimize(iterations=10) 108 | best_model = pso.get_best_model() 109 | assert isinstance(best_model, SimpleTransformer) 110 | --------------------------------------------------------------------------------