├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── PULL_REQUEST_TEMPLATE.yml
├── dependabot.yml
└── workflows
│ ├── code_quality_control.yml
│ ├── cos_integration.yml
│ ├── docs.yml
│ ├── docs_test.yml
│ ├── label.yml
│ ├── lints.yml
│ ├── pr_request_checks.yml
│ ├── pull-request-links.yml
│ ├── pylint.yml
│ ├── python-publish.yml
│ ├── quality.yml
│ ├── ruff.yml
│ ├── run_test.yml
│ ├── stale.yml
│ ├── test.yml
│ ├── testing.yml
│ ├── unit-test.yml
│ └── welcome.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
├── .DS_Store
├── .readthedocs.yaml
├── 20_swarm_model_ideas.md
├── README.md
├── assets
│ ├── css
│ │ └── extra.css
│ └── img
│ │ ├── SwarmsLogoIcon.png
│ │ ├── swarmsbanner.png
│ │ ├── tools
│ │ ├── output.png
│ │ ├── poetry_setup.png
│ │ └── toml.png
│ │ └── zetascale.png
├── contributing.md
├── index.md
├── mkdocs.yml
├── overrides
│ └── main.html
├── requirements.txt
├── stylesheets
│ └── extra.css
├── swarms
│ ├── aco.md
│ ├── firefly.md
│ ├── index.md
│ ├── pso.md
│ ├── qb.md
│ └── so.md
└── vision.md
├── example.py
├── examples
├── ant_colony.py
├── fire_fly_example.py
├── fish_school_example.py
├── mixture_of_mambas.py
├── new_mergers.py
├── nnt.py
├── queen_bee_transformer_hierarchy_example.py
├── silu_visualization.py
├── simple_moe.py
├── sop.py
├── swarmalator_example.py
└── switch_moe.py
├── multi_modal_mergers.py
├── pyproject.toml
├── queen_bee_transformer_hierarchy.py
├── requirements.txt
├── scripts
├── code_quality.sh
├── get_package_requirements.py
├── requirementstxt_to_pyproject.py
├── test_name.sh
└── tests.sh
├── swarms_torch
├── __init__.py
├── mergers
│ ├── __init__.py
│ ├── all_new_evo_mergers.py
│ └── mm_mergers.py
├── pso
│ ├── __init__.py
│ ├── multi_swarm_pso.py
│ ├── multi_swarm_pso2.py
│ ├── multi_swarm_pso_transformer.py
│ └── transformer_pso.py
├── structs
│ ├── __init__.py
│ ├── ant_colony_swarm.py
│ ├── cellular_transformer.py
│ ├── firefly.py
│ ├── fish_school.py
│ ├── graph_cellular_automa.py
│ ├── hivemind_swarm_transformer.py
│ ├── ma_agent.py
│ ├── mas_model.py
│ ├── mixtral_expert.py
│ ├── mixture_of_mamba.py
│ ├── neuronal_transformer.py
│ ├── parallel_wrapper.py
│ ├── queen_bee.py
│ ├── simple_moe.py
│ └── switch_moe.py
├── swarmalators
│ ├── __init__.py
│ ├── swarmalator_base.py
│ ├── swarmalator_transformer.py
│ └── swarmalator_visualize.py
└── utils
│ ├── __init__.py
│ ├── particle_swarm.py
│ ├── sakana.py
│ └── spiral_optimization.py
├── test.py
├── test_switch_moe_fix.py
└── tests
├── ant_colony.py
├── cellular_swarm.py
├── fish_school.py
├── neuronal_transformer.py
├── particle_swarm.py
├── queen_bee.py
├── spiral_optimization.py
├── swarmalator_base.py
├── test.py
├── test_mixture_of_mamba.py
├── transformer_hive.py
└── transformer_pso.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [kyegomez]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: #Nothing
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a detailed report on the bug and it's root cause. Conduct root cause error analysis
4 | title: "[BUG] "
5 | labels: bug
6 | assignees: kyegomez
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is and what the main root cause error is. Test very thoroughly before submitting.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Additional context**
27 | Add any other context about the problem here.
28 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: 'kyegomez'
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.yml:
--------------------------------------------------------------------------------
1 |
4 |
5 | {% block announce %}
6 |
9 | {% endblock %}
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs
2 | mkdocs-material
3 | mkdocs-glightbox
4 | mkdocs-git-authors-plugin
5 | mkdocs-git-revision-date-plugin
6 | mkdocs-git-committers-plugin
7 | mkdocstrings
8 | mike
9 | mkdocs-jupyter
10 | mkdocs-git-committers-plugin-2
11 | mkdocs-git-revision-date-localized-plugin
12 | mkdocs-redirects
13 | mkdocs-material-extensions
14 | mkdocs-simple-hooks
15 | mkdocs-awesome-pages-plugin
16 | mkdocs-versioning
17 | mkdocs-mermaid2-plugin
18 | mkdocs-include-markdown-plugin
19 | mkdocs-enumerate-headings-plugin
20 | mkdocs-autolinks-plugin
21 | mkdocs-minify-html-plugin
22 | mkdocs-autolinks-plugin
23 |
24 | # Requirements for core
25 | jinja2~=3.1
26 | markdown~=3.7
27 | mkdocs-material-extensions~=1.3
28 | pygments~=2.18
29 | pymdown-extensions~=10.9
30 |
31 | # Requirements for plugins
32 | babel~=2.16
33 | colorama~=0.4
34 | paginate~=0.5
35 | regex>=2022.4
--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | :root {
2 | --md-primary-fg-color: #8315F9;
3 | --md-accent-fg-color: #00FFCE;
4 | }
--------------------------------------------------------------------------------
/docs/swarms/aco.md:
--------------------------------------------------------------------------------
1 | # `AntColonyOptimization` Class
2 |
3 | The `AntColonyOptimization` class implements the Ant Colony Optimization (ACO) algorithm. ACO is a probabilistic technique for solving computational problems which can be reduced to finding good paths through graphs.
4 |
5 | ## Attributes
6 |
7 | - `goal` (str): The goal string to be optimized.
8 | - `num_ants` (int): Number of ants.
9 | - `evaporation_rate` (float): Evaporation rate.
10 | - `alpha` (int): The relative importance of the pheromone.
11 | - `beta` (int): The relative importance of the heuristic information.
12 | - `num_iterations` (int): The number of iterations.
13 | - `pheromones` (torch.Tensor): The pheromone levels.
14 | - `solutions` (list): The solutions found by the ants.
15 |
16 | ## Methods
17 | -------
18 |
19 | ### `__init__(self, goal: str = None, num_ants: int = 10000, evaporation_rate: float = 0.1, alpha: int = 1, beta: int = 1, num_iterations: int = 10010)`
20 |
21 | The constructor for the `AntColonyOptimization` class. Initializes the pheromone levels and the solutions.
22 |
23 | #### Parameters
24 |
25 | - `goal` (str, optional): The goal string to be optimized.
26 | - `num_ants` (int, optional): Number of ants. Default is 10000.
27 | - `evaporation_rate` (float, optional): Evaporation rate. Default is 0.1.
28 | - `alpha` (int, optional): The relative importance of the pheromone. Default is 1.
29 | - `beta` (int, optional): The relative importance of the heuristic information. Default is 1.
30 | - `num_iterations` (int, optional): The number of iterations. Default is 10010.
31 |
32 | #### Example
33 |
34 | ```
35 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=10000, num_iterations=1000)
36 | ```
37 |
38 |
39 | ### `fitness(self, solution)`
40 |
41 | Computes the fitness of a solution. The fitness is the negative of the Euclidean distance between the solution and the goal.
42 |
43 | #### Parameters
44 |
45 | - `solution` (torch.Tensor): The solution to compute the fitness for.
46 |
47 | #### Returns
48 |
49 | - `fitness` (float): The fitness of the solution.
50 |
51 | #### Example
52 |
53 | ```
54 | fitness = aco.fitness(solution)
55 | ```
56 |
57 |
58 | ### `update_pheromones(self)`
59 |
60 | Updates the pheromone levels based on the fitness of the solutions.
61 |
62 | #### Example
63 |
64 | ```
65 | aco.update_pheromones()
66 | ```
67 |
68 |
69 | ### `choose_next_path(self)`
70 |
71 | Chooses the next path based on the pheromone levels. The probability of choosing a path is proportional to the pheromone level of the path.
72 |
73 | #### Returns
74 |
75 | - `path` (int): The chosen path.
76 |
77 | #### Example
78 |
79 | ```
80 | path = aco.choose_next_path()
81 | ```
82 |
83 |
84 | ### `optimize(self)`
85 |
86 | Optimizes the goal string. Updates the solutions and the pheromone levels for a given number of iterations and returns the best solution.
87 |
88 | #### Returns
89 |
90 | - `best_solution` (str): The best solution.
91 |
92 | #### Example
93 |
94 | ```
95 | best_solution = aco.optimize()
96 | print("Best Matched String:", best_solution)
97 | ```
98 |
99 |
100 | Usage Examples
101 | --------------
102 |
103 | ### Example 1: Optimize a String
104 |
105 | In this example, we will optimize the string "Hello ACO" using 10000 ants and 1000 iterations.
106 |
107 | ```
108 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=10000, num_iterations=1000)
109 | best_solution = aco.optimize()
110 | print("Best Matched String:", best_solution)
111 | ```
112 |
113 |
114 | ### Example 2: Using a Different Number of Ants
115 |
116 | In this example, we will optimize the string "Hello ACO" using 5000 ants and 1000 iterations.
117 |
118 | ```
119 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=5000, num_iterations=1000)
120 | best_solution = aco.optimize()
121 | print("Best Matched String:", best_solution)
122 | ```
123 |
124 |
125 | ### Example 3: Using a Different Evaporation Rate
126 |
127 | In this example, we will optimize the string "Hello ACO" using 10000 ants, an evaporation rate of 0.2, and 1000 iterations.
128 |
129 | ```
130 | aco = AntColonyOptimization(goal="Hello ACO", num_ants=10000, evaporation_rate=0.2, num_iterations=1000)
131 | best_solution = aco.optimize()
132 | print("Best Matched String:", best_solution)
133 | ```
134 |
--------------------------------------------------------------------------------
/docs/swarms/firefly.md:
--------------------------------------------------------------------------------
1 | # FireflyOptimizer
2 |
3 | ```python
4 | class FireflyOptimizer(cost_function, steps=5000, species=4, population_size=1000, dimensions=10, lower_bound=-4.0, upper_bound=4.0, mix_species_every=25, beta0=2.0, gamma=1.0, alpha=0.1, alpha_decay=0.995, use_genetic_algorithm=False, breed_every=10, tournament_size=100, num_children=500, use_cuda=True, verbose=True)
5 | ```
6 |
7 | The `FireflyOptimizer` class implements the Firefly Algorithm to minimize a given objective function. It simulates the flashing behavior of fireflies to explore the search space efficiently.
8 |
9 | ## Parameters
10 |
11 | - **cost_function** (callable):
12 | The objective function to minimize. Should accept a `torch.Tensor` and return a `torch.Tensor` of costs.
13 |
14 | - **steps** (int, optional):
15 | Number of optimization steps. Default: `5000`.
16 |
17 | - **species** (int, optional):
18 | Number of species in the population. Default: `4`.
19 |
20 | - **population_size** (int, optional):
21 | Number of fireflies in each species. Default: `1000`.
22 |
23 | - **dimensions** (int, optional):
24 | Dimensionality of the search space. Default: `10`.
25 |
26 | - **lower_bound** (float, optional):
27 | Lower bound of the search space. Default: `-4.0`.
28 |
29 | - **upper_bound** (float, optional):
30 | Upper bound of the search space. Default: `4.0`.
31 |
32 | - **mix_species_every** (int, optional):
33 | Interval (in steps) to mix species. Default: `25`.
34 |
35 | - **beta0** (float, optional):
36 | Base attractiveness coefficient. Default: `2.0`.
37 |
38 | - **gamma** (float, optional):
39 | Light absorption coefficient controlling intensity decay. Default: `1.0`.
40 |
41 | - **alpha** (float, optional):
42 | Randomness scaling factor. Default: `0.1`.
43 |
44 | - **alpha_decay** (float, optional):
45 | Decay rate of `alpha` per step. Default: `0.995`.
46 |
47 | - **use_genetic_algorithm** (bool, optional):
48 | Whether to include genetic algorithm operations. Default: `False`.
49 |
50 | - **breed_every** (int, optional):
51 | Steps between breeding operations when using genetic algorithm. Default: `10`.
52 |
53 | - **tournament_size** (int, optional):
54 | Number of participants in each tournament selection. Default: `100`.
55 |
56 | - **num_children** (int, optional):
57 | Number of offspring produced during breeding. Default: `500`.
58 |
59 | - **use_cuda** (bool, optional):
60 | Use CUDA for computations if available. Default: `True`.
61 |
62 | - **verbose** (bool, optional):
63 | Print progress messages during optimization. Default: `True`.
64 |
65 | ## Attributes
66 |
67 | | Attribute | Type | Description |
68 | |--------------------|-----------------|--------------------------------------------------------|
69 | | `fireflies` | `torch.Tensor` | Positions of the fireflies in the search space. |
70 | | `device` | `torch.device` | Device used for computations (`cpu` or `cuda`). |
71 | | `current_alpha` | `float` | Current value of `alpha` during optimization. |
72 |
73 | ## Methods
74 |
75 | ### `optimize()`
76 |
77 | Runs the optimization loop for the specified number of steps.
78 |
79 | **Example:**
80 |
81 | ```python
82 | optimizer.optimize()
83 | ```
84 |
85 | ### `get_best_solution()`
86 |
87 | Retrieves the best solution found by the optimizer.
88 |
89 | **Returns:**
90 |
91 | - **best_firefly** (`torch.Tensor`):
92 | The best solution vector found.
93 |
94 | **Example:**
95 |
96 | ```python
97 | best_solution = optimizer.get_best_solution()
98 | print(f"Best solution: {best_solution}")
99 | ```
100 |
101 | ### `generate()`
102 |
103 | Generates a new set of fireflies, reinitializing their positions.
104 |
105 | **Returns:**
106 |
107 | - **fireflies** (`torch.Tensor`):
108 | The new set of fireflies.
109 |
110 | **Example:**
111 |
112 | ```python
113 | optimizer.generate()
114 | ```
115 |
116 | ### `reset()`
117 |
118 | Resets the optimizer to its initial state, including `alpha` and firefly positions.
119 |
120 | **Example:**
121 |
122 | ```python
123 | optimizer.reset()
124 | ```
125 |
126 | ---
127 |
128 | **Note:** The Firefly Algorithm is inspired by the flashing behavior of fireflies and is suitable for continuous optimization problems. This implementation allows for customization and includes optional genetic algorithm operations for enhanced performance.
129 |
130 | **Example Usage:**
131 |
132 | ```python
133 | from swarms_torch.firefly import FireflyOptimizer
134 | from torch import Tensor
135 |
136 |
137 | def rosenbrock(x: Tensor) -> Tensor:
138 | return (
139 | 100 * (x[..., 1:] - x[..., :-1] ** 2) ** 2 + (1 - x[..., :-1]) ** 2
140 | ).sum(dim=-1)
141 |
142 |
143 | if __name__ == "__main__":
144 | optimizer = FireflyOptimizer(
145 | cost_function=rosenbrock,
146 | steps=100,
147 | species=10,
148 | population_size=100,
149 | dimensions=10,
150 | lower_bound=-4,
151 | upper_bound=4,
152 | # Many more parameters can be set, see the documentation for more details
153 | )
154 | optimizer.optimize()
155 | best_solution = optimizer.get_best_solution()
156 | print(f"Best solution: {best_solution}")
157 | ```
--------------------------------------------------------------------------------
/docs/swarms/index.md:
--------------------------------------------------------------------------------
1 | # Swarms Installation Guide
2 |
3 |
13 |
14 | You can install `swarms` with pip in a
15 | [**Python>=3.10**](https://www.python.org/) environment.
16 |
17 | ## Prerequisites
18 |
19 | Before you begin, ensure you have the following installed:
20 |
21 | - Python 3.10 or higher: [Download Python](https://www.python.org/)
22 | - pip (specific version recommended): `pip >= 21.0`
23 | - git (for cloning the repository): [Download Git](https://git-scm.com/)
24 |
25 | ## Installation Options
26 |
27 | === "pip (Recommended)"
28 |
29 | #### Headless Installation
30 |
31 | The headless installation of `swarms` is designed for environments where graphical user interfaces (GUI) are not needed, making it more lightweight and suitable for server-side applications.
32 |
33 | ```bash
34 | pip3 install swarms-torch
35 | ```
36 |
37 | === "Development Installation"
38 |
39 | === "Using virtualenv"
40 |
41 | 1. **Clone the repository and navigate to the root directory:**
42 |
43 | ```bash
44 | git clone https://github.com/kyegomez/swarms-pytorch
45 | cd swarms
46 | ```
47 |
48 | 2. **Setup Python environment and activate it:**
49 |
50 | ```bash
51 | python3 -m venv venv
52 | source venv/bin/activate
53 | pip install --upgrade pip
54 | ```
55 |
56 | 3. **Install Swarms:**
57 |
58 | - Headless install:
59 |
60 | ```bash
61 | pip install -e .
62 | ```
63 |
64 | - Desktop install:
65 |
66 | ```bash
67 | pip install -e .[desktop]
68 | ```
69 |
70 | === "Using Anaconda"
71 |
72 | 1. **Create and activate an Anaconda environment:**
73 |
74 | ```bash
75 | conda create -n swarms python=3.10
76 | conda activate swarms
77 | ```
78 |
79 | 2. **Clone the repository and navigate to the root directory:**
80 |
81 | ```bash
82 | git clone https://github.com/kyegomez/swarms-pytorch
83 | cd swarms
84 | ```
85 |
86 | 3. **Install Swarms:**
87 |
88 | - Headless install:
89 |
90 | ```bash
91 | pip install -e .
92 | ```
93 |
94 | - Desktop install:
95 |
96 | ```bash
97 | pip install -e .[desktop]
98 | ```
99 |
100 | === "Using Poetry"
101 |
102 | 1. **Clone the repository and navigate to the root directory:**
103 |
104 | ```bash
105 | git clone https://github.com/kyegomez/swarms-pytorch
106 | cd swarms
107 | ```
108 |
109 | 2. **Setup Python environment and activate it:**
110 |
111 | ```bash
112 | poetry env use python3.10
113 | poetry shell
114 | ```
115 |
116 | 3. **Install Swarms:**
117 |
118 | - Headless install:
119 |
120 | ```bash
121 | poetry install
122 | ```
123 |
124 | - Desktop install:
125 |
126 | ```bash
127 | poetry install --extras "desktop"
128 | ```
129 |
130 | === "CI/CD Pipelines"
131 |
132 | Integrating Swarms into your CI/CD pipeline ensures automated testing and deployment.
133 |
134 | #### Using GitHub Actions
135 |
136 | ```yaml
137 | # .github/workflows/ci.yml
138 | name: CI
139 |
140 | on:
141 | push:
142 | branches: [ main ]
143 | pull_request:
144 | branches: [ main ]
145 |
146 | jobs:
147 | build:
148 |
149 | runs-on: ubuntu-latest
150 |
151 | steps:
152 | - uses: actions/checkout@v2
153 | - name: Set up Python
154 | uses: actions/setup-python@v2
155 | with:
156 | python-version: 3.10
157 | - name: Install dependencies
158 | run: |
159 | python -m venv venv
160 | source venv/bin/activate
161 | pip install --upgrade pip
162 | pip install -e .
163 | - name: Run tests
164 | run: |
165 | source venv/bin/activate
166 | pytest
167 | ```
168 |
169 | #### Using Jenkins
170 |
171 | ```groovy
172 | pipeline {
173 | agent any
174 |
175 | stages {
176 | stage('Clone repository') {
177 | steps {
178 | git 'https://github.com/kyegomez/swarms-pytorch'
179 | }
180 | }
181 | stage('Setup Python') {
182 | steps {
183 | sh 'python3 -m venv venv'
184 | sh 'source venv/bin/activate && pip install --upgrade pip'
185 | }
186 | }
187 | stage('Install dependencies') {
188 | steps {
189 | sh 'source venv/bin/activate && pip install -e .'
190 | }
191 | }
192 | stage('Run tests') {
193 | steps {
194 | sh 'source venv/bin/activate && pytest'
195 | }
196 | }
197 | }
198 | }
199 | ```
200 |
--------------------------------------------------------------------------------
/docs/swarms/pso.md:
--------------------------------------------------------------------------------
1 | # `ParticleSwarmOptimization` Class
2 |
3 | The `ParticleSwarmOptimization` class implements the Particle Swarm Optimization (PSO) algorithm. PSO is a computational method that optimizes a problem by iteratively trying to improve a candidate solution with regard to a given measure of quality. It solves a problem by having a population of candidate solutions, here dubbed particles, and moving these particles around in the search-space according to simple mathematical formulae over the particle's position and velocity.
4 |
5 | ## Attributes
6 |
7 | - `goal` (str): The goal string to be optimized.
8 | - `n_particles` (int): Number of particles.
9 | - `inertia` (float): Inertia weight.
10 | - `personal_best_weight` (float): Personal best weight.
11 | - `global_best_weight` (float): Global best weight.
12 | - `particles` (torch.Tensor): The particles' positions.
13 | - `velocities` (torch.Tensor): The particles' velocities.
14 | - `personal_best` (torch.Tensor): The personal best positions of each particle.
15 | - `global_best` (torch.Tensor): The global best position.
16 |
17 | ## Methods
18 |
19 | ### `__init__(self, goal: str = None, n_particles: int = 100, inertia: float = 0.5, personal_best_weight: float = 1.5, global_best_weight: float = 1.5, dim: int = 1)`
20 |
21 | The constructor for the `ParticleSwarmOptimization` class. Initializes the particles with random positions and velocities, and the personal best and global best with the initial positions of the particles.
22 |
23 | #### Parameters
24 |
25 | - `goal` (str, optional): The goal string to be optimized.
26 | - `n_particles` (int, optional): Number of particles. Default is 100.
27 | - `inertia` (float, optional): Inertia weight. Default is 0.5.
28 | - `personal_best_weight` (float, optional): Personal best weight. Default is 1.5.
29 | - `global_best_weight` (float, optional): Global best weight. Default is 1.5.
30 | - `dim` (int, optional): The dimension of the problem. Default is 1.
31 |
32 | #### Example
33 |
34 | ```
35 | pso = ParticleSwarmOptimization(goal="Attention is all you need", n_particles=100)
36 | ```
37 |
38 |
39 | ### `compute_fitness(self, particle)`
40 |
41 | Computes the fitness value of a particle. The fitness value is the inverse of the Euclidean distance between the particle and the goal.
42 |
43 | #### Parameters
44 |
45 | - `particle` (torch.Tensor): The particle to compute the fitness value for.
46 |
47 | #### Returns
48 |
49 | - `fitness` (float): The fitness value of the particle.
50 |
51 | #### Example
52 |
53 | ```
54 | fitness = pso.compute_fitness(particle)
55 | ```
56 |
57 |
58 | ### `update(self)`
59 |
60 | Updates the personal best and global best, and the velocity and position of each particle.
61 |
62 | #### Example
63 |
64 | ```
65 | pso.update()
66 | ```
67 |
68 |
69 | ### `optimize(self, iterations: int = 1000)`
70 |
71 | Optimizes the goal string. Updates the particles for a given number of iterations and prints the best particle at each iteration.
72 |
73 | #### Parameters
74 |
75 | - `iterations` (int, optional): The maximum number of iterations. Default is 1000.
76 |
77 | #### Example
78 |
79 | ```
80 | pso.optimize(iterations=1000)
81 | ```
82 |
83 |
84 | Usage Examples
85 | --------------
86 |
87 | ### Example 1: Optimize a String
88 |
89 | In this example, we will optimize the string "Attention is all you need" using 100 particles.
90 |
91 | ```python
92 | pso = ParticleSwarmOptimization(goal="Attention is all you need", n_particles=100)
93 | pso.optimize(iterations=1000)
94 | ```
95 | ### Example 2: Optimize a Different String
96 |
97 | In this example, we will optimize the string "Hello, World!" using 200 particles.
98 |
99 | ```python
100 | pso = ParticleSwarmOptimization(goal="Hello, World!", n_particles=200)
101 | pso.optimize(iterations=1000)
102 | ```
103 |
104 |
105 | ### Example 3: Using Different Weights
106 |
107 | In this example, we will optimize the string "Particle Swarm Optimization" using 100 particles, an inertia weight of 0.8, a personal best weight of 2.0, and a global best weight of 2.0.
108 |
109 | ```python
110 | pso = ParticleSwarmOptimization(goal="Particle Swarm Optimization", n_particles=100, inertia=0.8, personal_best_weight=2.0, global_best_weight=2.0)
111 | pso.optimize(iterations=1000)
112 | ```
113 |
114 |
115 | ### Example 4: Using a Large Number of Particles
116 |
117 | In this example, we will optimize the string "Large number of particles" using 1000 particles.
118 |
119 | ```python
120 | pso = ParticleSwarmOptimization(goal="Large number of particles", n_particles=1000)
121 | pso.optimize(iterations=1000)
122 | ```
123 |
124 |
125 | ### Example 5: Using a Small Number of Iterations
126 |
127 | In this example, we will optimize the string "Small number of iterations" using 100 particles and 100 iterations.
128 |
129 | ```python
130 | pso = ParticleSwarmOptimization(goal="Small number of iterations", n_particles=100)
131 | pso.optimize(iterations=100)
132 | ```
133 |
134 |
135 | ### Example 6: Using a Large Number of Iterations
136 |
137 | In this example, we will optimize the string "Large number of iterations" using 100 particles and 10000 iterations.
138 |
139 | ```python
140 | pso = ParticleSwarmOptimization(goal="Large number of iterations", n_particles=100)
141 | pso.optimize(iterations=10000)
142 | ```
143 |
144 |
145 | ### Example 7: Using Different Characters
146 |
147 | In this example, we will optimize the string "1234567890" using 100 particles.
148 |
149 | ```python
150 | pso = ParticleSwarmOptimization(goal="1234567890", n_particles=100)
151 | pso.optimize(iterations=1000)
152 | ```
153 |
154 |
155 | ### Example 8: Using Special Characters
156 |
157 | In this example, we will optimize the string "!@#$%^&*()" using 100 particles.
158 |
159 | ```python
160 | pso = ParticleSwarmOptimization(goal="!@#$%^&*()", n_particles=100)
161 | pso.optimize(iterations=1000)
162 | ```
163 |
164 |
165 | ### Example 9: Using a Long String
166 |
167 | In this example, we will optimize a long string using 100 particles.
168 |
169 | ```python
170 | pso = ParticleSwarmOptimization(goal="This is a very long string that we want to optimize using Particle Swarm Optimization.", n_particles=100)
171 | pso.optimize(iterations=1000)
172 | ```
173 |
174 |
175 | ### Example 10: Using a Short String
176 |
177 | In this example, we will optimize a short string using 100 particles.
178 |
179 | ```python
180 | pso = ParticleSwarmOptimization(goal="Short", n_particles=100)
181 | pso.optimize(iterations=1000)
182 | ```
--------------------------------------------------------------------------------
/docs/swarms/qb.md:
--------------------------------------------------------------------------------
1 | # `QueenBeeGa` Class
2 |
3 | The `QueenBeeGa` class implements the Queen Bee Genetic Algorithm (GA). This GA is inspired by the evolution of bees, where the fittest solution is designated as the queen and the rest of the population contends to mate with it. The strong exploitation is balanced by a higher than normal mutation rate.
4 |
5 | ## Attributes
6 | ---
7 | - `goal` (str): The goal string to be optimized.
8 | - `pop_size` (int): Population size.
9 | - `mutation_prob` (float): Mutation probability.
10 | - `strong_mutation_rate` (float): Strong mutation rate.
11 | - `strong_mutation_prob` (float): Strong mutation probability.
12 | - `num_tournament_participants` (int): Number of tournament participants.
13 | - `gene_length` (int): Length of the gene.
14 | - `gene_midpoint` (int): Midpoint of the gene.
15 | - `target_gene` (torch.Tensor): The target gene.
16 | - `strong_mutate_pool_size` (float): Size of the strong mutate pool.
17 | - `num_code_mutate` (float): Number of code mutations.
18 | - `strong_num_code_mutate` (float): Number of strong code mutations.
19 | - `pool` (torch.Tensor): The pool of genes.
20 | - `queen` (torch.Tensor): The queen gene.
21 | - `queen_fitness` (float): The fitness of the queen.
22 | - `generation` (int): The current generation.
23 |
24 | ## Methods
25 | -------
26 |
27 | ### `__init__(self, goal: str = "Attention is all you need", pop_size: int = 100, mutation_prob: float = 0.04, strong_mutation_rate: float = 0.1, strong_mutation_prob: float = 0.25, num_tournament_participants: int = 25)`
28 |
29 | The constructor for the `QueenBeeGa` class. Initializes the pool of genes, the queen, and the queen's fitness.
30 |
31 | #### Parameters
32 |
33 | - `goal` (str, optional): The goal string to be optimized. Default is "Attention is all you need".
34 | - `pop_size` (int, optional): Population size. Default is 100.
35 | - `mutation_prob` (float, optional): Mutation probability. Default is 0.04.
36 | - `strong_mutation_rate` (float, optional): Strong mutation rate. Default is 0.1.
37 | - `strong_mutation_prob` (float, optional): Strong mutation probability. Default is 0.25.
38 | - `num_tournament_participants` (int, optional): Number of tournament participants. Default is 25.
39 |
40 | #### Example
41 |
42 | ```
43 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25)
44 | ```
45 |
46 |
47 | ### `encode(s)`
48 |
49 | Converts a string to its ASCII values.
50 |
51 | #### Parameters
52 |
53 | - `s` (str): The string to encode.
54 |
55 | #### Returns
56 |
57 | - `encoded` (torch.Tensor): The encoded string.
58 |
59 | #### Example
60 |
61 | ```
62 | encoded = QueenBeeGa.encode("Hello")
63 | ```
64 |
65 |
66 | ### `decode(t)`
67 |
68 | Converts a tensor of ASCII values back to a string.
69 |
70 | #### Parameters
71 |
72 | - `t` (torch.Tensor): The tensor to decode.
73 |
74 | #### Returns
75 |
76 | - `decoded` (str): The decoded string.
77 |
78 | #### Example
79 |
80 | ```
81 | decoded = QueenBeeGa.decode(encoded)
82 | ```
83 |
84 |
85 | ### `run(self, max_generations: int = 1000)`
86 |
87 | Runs the Queen Bee GA. Evolves the population for a given number of generations.
88 |
89 | #### Parameters
90 |
91 | - `max_generations` (int, optional): The maximum number of generations. Default is 1000.
92 |
93 | #### Example
94 |
95 | ```
96 | optimizer.run(max_generations=100)
97 | ```
98 |
99 |
100 | ### `_evolve(self)`
101 |
102 | Executes one step of the evolution process. Sorts the population by fitness, displays the queen and the population, and updates the queen and the population.
103 |
104 | #### Example
105 |
106 | ```
107 | optimizer._evolve()
108 | ```
109 |
110 |
111 | ### `_check_convergence(self)`
112 |
113 | Checks if any of the solutions has achieved the goal.
114 |
115 | #### Returns
116 |
117 | - `converged` (bool): Whether any of the solutions has achieved the goal.
118 |
119 | #### Example
120 |
121 | ```
122 | converged = optimizer._check_convergence()
123 | ```
124 | ------
125 |
126 | ## Usage Examples
127 | --------------
128 |
129 | ### Example 1: Optimize a String
130 |
131 | In this example, we will optimize the string "Attention is all you need" using a population size of 100, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 25 tournament participants.
132 |
133 | ```python
134 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25)
135 | optimizer.run(max_generations=100)
136 | ```
137 |
138 |
139 | ### Example 2: Using a Different Goal String
140 |
141 | In this example, we will optimize the string "Hello, World!" using a population size of 100, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 25 tournament participants.
142 |
143 | ```python
144 | optimizer = QueenBeeGa(goal="Hello, World!", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25)
145 | optimizer.run(max_generations=100)
146 | ```
147 |
148 |
149 | ### Example 3: Using a Different Population Size
150 |
151 | In this example, we will optimize the string "Attention is all you need" using a population size of 200, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 25 tournament participants.
152 |
153 | ```python
154 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=200, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=25)
155 | optimizer.run(max_generations=100)
156 | ```
157 |
158 |
159 | ### Example 4: Using Different Mutation Probabilities
160 |
161 | In this example, we will optimize the string "Attention is all you need" using a population size of 100, a mutation probability of 0.05, a strong mutation rate of 0.1, a strong mutation probability of 0.3, and 25 tournament participants.
162 |
163 | ```python
164 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.05, strong_mutation_rate=0.1, strong_mutation_prob=0.3, num_tournament_participants=25)
165 | optimizer.run(max_generations=100)
166 | ```
167 |
168 |
169 | ### Example 5: Using a Different Number of Tournament Participants
170 |
171 | In this example, we will optimize the string "Attention is all you need" using a population size of 100, a mutation probability of 0.04, a strong mutation rate of 0.1, a strong mutation probability of 0.25, and 30 tournament participants.
172 |
173 | ```python
174 | optimizer = QueenBeeGa(goal="Attention is all you need", pop_size=100, mutation_prob=0.04, strong_mutation_rate=0.1, strong_mutation_prob=0.25, num_tournament_participants=30)
175 | optimizer.run(max_generations=100)
176 | ```
--------------------------------------------------------------------------------
/docs/swarms/so.md:
--------------------------------------------------------------------------------
1 | # `SPO` Class
2 |
3 |
4 | The `SPO` class implements the Spiral Optimization (SPO) algorithm. This algorithm is used for optimization towards a target string.
5 |
6 | ## Attributes
7 | ----------
8 |
9 | - `goal` (torch.Tensor): The goal string to be optimized.
10 | - `m` (int): Number of search points.
11 | - `k_max` (int): Maximum number of iterations.
12 | - `n_dim` (int): Length of the goal string.
13 | - `points` (torch.Tensor): The search points.
14 | - `center` (torch.Tensor): The center point.
15 |
16 | ## Methods
17 | -------
18 |
19 | ### `__init__(self, goal: str = None, m: int = 10, k_max: int = 1000)`
20 |
21 | The constructor for the `SPO` class. Initializes the search points and the center.
22 |
23 | #### Parameters
24 |
25 | - `goal` (str, optional): The goal string to be optimized.
26 | - `m` (int, optional): Number of search points. Default is 10.
27 | - `k_max` (int, optional): Maximum number of iterations. Default is 1000.
28 |
29 | #### Example
30 |
31 | ```
32 | spo = SPO(goal="Hello SPO", m=100, k_max=1000)
33 | ```
34 |
35 |
36 | ### `_step_rate(self, k)`
37 |
38 | Defines the step rate function.
39 |
40 | #### Parameters
41 |
42 | - `k` (int): Current iteration.
43 |
44 | #### Returns
45 |
46 | - `step_rate` (float): Step rate for the current iteration.
47 |
48 | #### Example
49 |
50 | ```
51 | step_rate = spo._step_rate(k)
52 | ```
53 |
54 |
55 | ### `_update_points(self, k)`
56 |
57 | Updates the search points based on the spiral model.
58 |
59 | #### Parameters
60 |
61 | - `k` (int): Current iteration.
62 |
63 | #### Example
64 |
65 | ```
66 | spo._update_points(k)
67 | ```
68 |
69 |
70 | ### `_update_center(self)`
71 |
72 | Finds the best search point and sets it as the new center.
73 |
74 | #### Example
75 |
76 | ```
77 | spo._update_center()
78 | ```
79 |
80 |
81 | ### `optimize(self)`
82 |
83 | Runs the optimization loop. Updates the search points and the center for a given number of iterations.
84 |
85 | #### Example
86 |
87 | ```
88 | spo.optimize()
89 | ```
90 |
91 |
92 | ### `best_string(self)`
93 |
94 | Converts the best found point to its string representation.
95 |
96 | #### Returns
97 |
98 | - `best_string` (str): The best string.
99 |
100 | #### Example
101 |
102 | ```
103 | best_string = spo.best_string()
104 | print("Best Matched String:", best_string)
105 | ```
106 |
107 |
108 | ## Usage Examples
109 | --------------
110 |
111 | ### Example 1: Optimize a String
112 |
113 | In this example, we will optimize the string "Attention is all you need" using 100 search points and 1000 iterations.
114 |
115 | ```python
116 | spo = SPO(goal="Attention is all you need", m=100, k_max=1000)
117 | spo.optimize()
118 | print("Best Matched String:", spo.best_string())
119 | ```
120 |
121 |
122 | ### Example 2: Using a Different Goal String
123 |
124 | In this example, we will optimize the string "Hello, World!" using 100 search points and 1000 iterations.
125 |
126 | ```python
127 | spo = SPO(goal="Hello, World!", m=100, k_max=1000)
128 | spo.optimize()
129 | print("Best Matched String:", spo.best_string())
130 | ```
131 |
132 |
133 | ### Example 3: Using a Different Number of Search Points
134 |
135 | In this example, we will optimize the string "Attention is all you need" using 200 search points and 1000 iterations.
136 |
137 | ```python
138 | spo = SPO(goal="Attention is all you need", m=200, k_max=1000)
139 | spo.optimize()
140 | print("Best Matched String:", spo.best_string())
141 | ```
142 |
--------------------------------------------------------------------------------
/docs/vision.md:
--------------------------------------------------------------------------------
1 | # Advancing Deep Learning Through Novel Swarm Intelligence Model Architectures
2 |
3 | The rapid evolution of deep learning has undeniably revolutionized numerous facets of technology and society. However, the prevailing approach—scaling up models using vast amounts of data and computational power—is reaching its practical limits. To transcend these boundaries and usher in the next epoch of artificial intelligence (AI), we are embarking on a mission to develop **Novel Swarm Intelligence Model Architectures**. This initiative is predicated on the belief that the future of deep learning hinges not on the quantity of data or compute, but on innovative architectural paradigms that can emulate and surpass natural intelligence systems.
4 |
5 | ### **The Need for Alternate Model Architectures**
6 |
7 | Current deep learning models predominantly rely on increasing layers, parameters, and training data to achieve marginal improvements. This methodology is akin to building taller towers on shaky foundations—it is unsustainable and inefficient. The challenges are multifold:
8 |
9 | - **Data Saturation**: High-quality, labeled data is becoming scarce and expensive to procure. Moreover, models trained on massive datasets often fail to generalize well to unseen scenarios.
10 | - **Computational Constraints**: The energy consumption and computational requirements for training colossal models are exorbitant, leading to environmental and economic concerns.
11 | - **Diminishing Returns**: Simply scaling existing architectures yields progressively smaller performance gains, indicating a plateau in this trajectory.
12 |
13 | To overcome these hurdles, we must pivot towards creating novel model architectures that can achieve superior performance without reliance on data or compute scaling.
14 |
15 | ### **Swarm Intelligence: A Paradigm Shift**
16 |
17 | Nature offers profound insights into efficient and intelligent systems. Swarm intelligence, observed in colonies of ants, flocks of birds, and schools of fish, exemplifies how simple agents can collectively perform complex tasks through local interactions and without centralized control.
18 |
19 | Applying swarm intelligence to AI involves developing architectures where numerous smaller models (agents) collaborate, communicate, and adapt to achieve a common goal. This approach offers several advantages:
20 |
21 | - **Scalability**: Systems can be scaled horizontally by adding more agents without exponentially increasing computational demands.
22 | - **Robustness**: The decentralized nature ensures that the failure of individual agents does not compromise the entire system.
23 | - **Adaptability**: Agents can adapt to new information and environments dynamically, enhancing the system's ability to generalize.
24 |
25 | ### **Artificial Superintelligence Through Swarms**
26 |
27 | Artificial Superintelligence (ASI) represents AI that surpasses human intelligence across all domains. Achieving ASI through a swarm of models rather than a singular entity offers a more feasible and resilient path:
28 |
29 | - **Diversity of Thought**: Multiple agents with varied specializations can approach problems from different perspectives, leading to more creative and effective solutions.
30 | - **Collective Learning**: Agents can share knowledge and learn from each other's experiences, accelerating the overall learning process.
31 | - **Emergent Behavior**: Complex and intelligent behaviors can emerge from the interactions of simple agents, potentially leading to capabilities beyond programmed instructions.
32 |
33 | ### **Our Mission Objectives**
34 |
35 | 1. **Innovate Model Architectures**: Design and develop novel swarm-based model architectures that can learn and perform tasks more efficiently than traditional models.
36 | 2. **Reduce Reliance on Data and Compute**: Create systems that require less data and computational power by leveraging the collective intelligence of agent swarms.
37 | 3. **Enhance Generalization and Adaptability**: Build models capable of adapting to new and unforeseen situations through decentralized learning and collaboration.
38 | 4. **Pave the Way for ASI**: Establish foundational architectures that can evolve into artificial superintelligence through emergent behaviors and continuous learning.
39 |
40 | ### **Conclusion**
41 |
42 | The future of deep learning and AI advancement lies not in the augmentation of data and computational resources but in the fundamental reimagining of model architectures. By embracing swarm intelligence, we aim to break through the current limitations and unlock new potentials in AI capabilities.
43 |
44 | Our mission to develop **Novel Swarm Intelligence Model Architectures** is more than an academic pursuit; it is a strategic imperative to ensure that AI continues to evolve sustainably and beneficially. We are committed to pioneering this paradigm shift, confident that it will lead to breakthroughs not just in technology, but in how intelligence—artificial or otherwise—is understood and harnessed.
45 |
46 | ---
47 |
48 | Together, we will forge a path toward an AI future that is efficient, adaptable, and intelligent beyond the sum of its parts.
--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch import MixtureOfMambas
3 |
4 | # Create a 3D tensor for text
5 | x = torch.rand(1, 512, 512)
6 |
7 | # Create an instance of the MixtureOfMambas model
8 | model = MixtureOfMambas(
9 | num_mambas=2, # Number of Mambas in the model
10 | dim=512, # Dimension of the input tensor
11 | d_state=1024, # Dimension of the hidden state
12 | depth=4, # Number of layers in the model
13 | d_conv=1024, # Dimension of the convolutional layers
14 | expand=4, # Expansion factor for the model
15 | fusion_method="absmax", # Fusion method for combining Mambas' outputs
16 | custom_fusion_func=None, # Custom fusion function (if any)
17 | )
18 |
19 | # Pass the input tensor through the model and print the output shape
20 | print(model(x).shape)
21 |
--------------------------------------------------------------------------------
/examples/ant_colony.py:
--------------------------------------------------------------------------------
1 | from swarms_torch.structs.ant_colony_swarm import AntColonyOptimization
2 |
3 | # Usage:
4 | goal_string = "Hello ACO"
5 | aco = AntColonyOptimization(goal_string, num_iterations=1000)
6 | best_solution = aco.optimize()
7 | print("Best Matched String:", best_solution)
8 |
--------------------------------------------------------------------------------
/examples/fire_fly_example.py:
--------------------------------------------------------------------------------
1 | from swarms_torch.structs.firefly import FireflyOptimizer
2 | from torch import Tensor
3 |
4 |
5 | def rosenbrock(x: Tensor) -> Tensor:
6 | return (
7 | 100 * (x[..., 1:] - x[..., :-1] ** 2) ** 2 + (1 - x[..., :-1]) ** 2
8 | ).sum(dim=-1)
9 |
10 |
11 | if __name__ == "__main__":
12 | optimizer = FireflyOptimizer(
13 | cost_function=rosenbrock,
14 | steps=100,
15 | species=10,
16 | population_size=100,
17 | dimensions=10,
18 | lower_bound=-4,
19 | upper_bound=4,
20 | # Many more parameters can be set, see the documentation for more details
21 | )
22 | optimizer.optimize()
23 | best_solution = optimizer.get_best_solution()
24 | print(f"Best solution: {best_solution}")
25 |
--------------------------------------------------------------------------------
/examples/fish_school_example.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.structs.fish_school import Fish, FishSchool
3 |
4 | # Create random source and target sequences
5 | src = torch.randn(10, 32, 512)
6 | tgt = torch.randn(10, 32, 512)
7 |
8 | # Create random labels
9 | labels = torch.randint(0, 512, (10, 32))
10 |
11 | # Create a fish and train it on the random data
12 | fish = Fish(512, 8, 6)
13 | fish.train(src, tgt, labels)
14 | print(fish.food) # Print the fish's food
15 |
16 | # Create a fish school and optimize it on the random data
17 | school = FishSchool(10, 512, 8, 6, 100)
18 | school.forward(src, tgt, labels)
19 | print(school.fish[0].food) # Print the first fish's food
20 |
--------------------------------------------------------------------------------
/examples/mixture_of_mambas.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.structs.mixture_of_mamba import MixtureOfMambas
3 |
4 | # Example Usage
5 | num_models = 3
6 | dim = 16
7 | state_range = (1, 20)
8 | conv_range = (1, 10)
9 | expand_range = (1, 5)
10 |
11 | mixture_model = MixtureOfMambas(
12 | num_models, dim, state_range, conv_range, expand_range
13 | )
14 | x = torch.randn(2, 64, dim).to("cuda")
15 | output = mixture_model(
16 | x, fusion_method="average"
17 | ) # Or use 'weighted' with weights
18 |
--------------------------------------------------------------------------------
/examples/new_mergers.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.mergers.all_new_evo_mergers import (
3 | hyperslice_merge,
4 | random_subspace_merge,
5 | dimensional_cross_fusion,
6 | weighted_evolutionary_crossover,
7 | permutation_weight_swapping,
8 | )
9 |
10 | # Example of how to use the logger and merge methods
11 | if __name__ == "__main__":
12 | # Example models, replace with actual model instances
13 | model_1 = torch.nn.Linear(10, 10)
14 | model_2 = torch.nn.Linear(10, 10)
15 | model_3 = torch.nn.Linear(10, 10)
16 |
17 | # Perform HyperSlice merge
18 | merged_model_hs = hyperslice_merge(
19 | [model_1, model_2, model_3], slice_indices=[0, 2, 4]
20 | )
21 |
22 | # Perform Random Subspace merge
23 | merged_model_rs = random_subspace_merge(
24 | [model_1, model_2, model_3], subspace_fraction=0.5
25 | )
26 |
27 | # Perform Dimensional Cross-fusion merge
28 | merged_model_dc = dimensional_cross_fusion([model_1, model_2], cross_axis=0)
29 |
30 | # Perform Weighted Evolutionary Crossover merge
31 | merged_model_wc = weighted_evolutionary_crossover(
32 | [model_1, model_2, model_3], performance_scores=[0.7, 0.85, 0.65]
33 | )
34 |
35 | # Perform Permutation-based Weight Swapping
36 | merged_model_pw = permutation_weight_swapping(
37 | [model_1, model_2], permutation_seed=42
38 | )
39 |
--------------------------------------------------------------------------------
/examples/nnt.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.structs.neuronal_transformer import NNTransformer
3 |
4 | x = torch.randn(1, 10)
5 |
6 | network = NNTransformer(
7 | # transformer cells
8 | neuron_count=5,
9 | # num states
10 | num_states=10,
11 | # input dim
12 | input_dim=10,
13 | # output dim
14 | output_dim=10,
15 | # nhead
16 | nhead=2,
17 | )
18 |
19 |
20 | output = network(x)
21 | print(output)
22 |
--------------------------------------------------------------------------------
/examples/silu_visualization.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 |
5 | # SiLU (Sigmoid-weighted Linear Unit) activation function
6 | def silu(x):
7 | return x * (1 / (1 + np.exp(-x)))
8 |
9 |
10 | # Generate inputs and calculate SiLU outputs
11 | input_values = np.linspace(-10, 10, 100)
12 | output_values = silu(input_values)
13 |
14 | # Create 3D plot
15 | fig = plt.figure()
16 | ax = fig.add_subplot(111, projection="3d")
17 |
18 | # Scatter plot of SiLU outputs
19 | ax.scatter(
20 | input_values, output_values, input_values, c=output_values, cmap="viridis"
21 | )
22 | ax.set_xlabel("Input")
23 | ax.set_ylabel("Output")
24 | ax.set_zlabel("Input")
25 | ax.set_title("3D Visualization of SiLU Activation Function")
26 |
27 | plt.show()
28 |
--------------------------------------------------------------------------------
/examples/simple_moe.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch import SimpleMoE
3 |
4 | # Example usage:
5 | input_dim = 512 # Dimension of input tokens
6 | hidden_dim = 1024 # Hidden dimension of experts
7 | output_dim = 512 # Output dimension, should match input dimension for residual connection
8 | num_experts = 4 # Number of experts
9 |
10 | moe = SimpleMoE(input_dim, hidden_dim, output_dim, num_experts)
11 |
12 | # Create a sample input tensor (batch_size, seq_len, input_dim)
13 | x = torch.rand(10, 16, input_dim)
14 |
15 | # Forward pass through the MoE layer
16 | output = moe(x)
17 | print(output)
18 |
--------------------------------------------------------------------------------
/examples/sop.py:
--------------------------------------------------------------------------------
1 | from swarms_torch import SPO
2 |
3 | # Example Usage
4 | goal_str = "Attention is all you need"
5 | optimizer = SPO(goal_str)
6 | optimizer.optimize()
7 | print(f"Optimized String: {optimizer.best_string()}")
8 |
--------------------------------------------------------------------------------
/examples/swarmalator_example.py:
--------------------------------------------------------------------------------
1 | from swarms_torch import visualize_swarmalators, simulate_swarmalators
2 |
3 | # Init for Swarmalator
4 | # Example usage:
5 | N = 100
6 | J, alpha, beta, gamma, epsilon_a, epsilon_r, R = [0.1] * 7
7 | D = 3 # Ensure D is an integer
8 | xi, sigma_i = simulate_swarmalators(
9 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
10 | )
11 |
12 | # Call the visualization function
13 | visualize_swarmalators(xi)
14 |
--------------------------------------------------------------------------------
/examples/switch_moe.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch import SwitchMoE
3 |
4 | # Example usage:
5 | input_dim = 768 # Dimension of input tokens
6 | hidden_dim = 2048 # Hidden dimension of experts
7 | output_dim = 768 # Output dimension, should match input dimension for residual connection
8 | num_experts = 16 # Number of experts
9 |
10 | moe_layer = SwitchMoE(
11 | dim=input_dim,
12 | hidden_dim=hidden_dim,
13 | output_dim=output_dim,
14 | num_experts=num_experts,
15 | use_aux_loss=False,
16 | )
17 |
18 | # Create a sample input tensor (batch_size, seq_len, input_dim)
19 | x = torch.rand(32, 128, input_dim)
20 |
21 | # Forward pass through the MoE layer with auxiliary loss computation
22 | output, auxiliary_loss = moe_layer(x)
23 |
24 | # Now, 'output' contains the MoE output, and 'auxiliary_loss' contains the load balancing loss.
25 | # This auxiliary loss should be added to the main loss function during training.
26 |
27 | print(output)
28 | print(auxiliary_loss)
29 |
--------------------------------------------------------------------------------
/multi_modal_mergers.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.mergers.mm_mergers import (
3 | modality_weighted_merge,
4 | modality_specific_layer_swap,
5 | cross_modality_weight_crossover,
6 | hierarchical_modality_fusion,
7 | modality_mutation_merge,
8 | )
9 |
10 | if __name__ == "__main__":
11 | # Example models, replace with actual multi-modal model instances
12 | model_1 = torch.nn.Linear(
13 | 100, 50
14 | ) # Assume multi-modal model (e.g., image + text)
15 | model_2 = torch.nn.Linear(100, 50)
16 | model_3 = torch.nn.Linear(100, 50)
17 |
18 | # Perform Modality-Weighted Merge
19 | merged_model_wm = modality_weighted_merge(
20 | [model_1, model_2, model_3], modality_weights=[0.6, 0.3, 0.1]
21 | )
22 |
23 | # Perform Modality-Specific Layer Swap
24 | merged_model_ls = modality_specific_layer_swap(
25 | [model_1, model_2], modality_layer_map=["image", "text"]
26 | )
27 |
28 | # Perform Cross-Modality Weight Crossover
29 | merged_model_cm = cross_modality_weight_crossover(
30 | [model_1, model_2], modality_pairs=[(0, 1)], crossover_fraction=0.5
31 | )
32 |
33 | # Perform Hierarchical Modality Fusion
34 | merged_model_hf = hierarchical_modality_fusion(
35 | [model_1, model_2, model_3], modality_hierarchy=[[0], [1, 2]]
36 | )
37 |
38 | # Perform Modality Mutation Merge
39 | merged_model_mm = modality_mutation_merge(
40 | [model_1, model_2, model_3], mutation_rate=0.01
41 | )
42 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["poetry-core>=1.0.0"]
3 | build-backend = "poetry.core.masonry.api"
4 |
5 | [tool.poetry]
6 | name = "swarms-torch"
7 | version = "0.2.3"
8 | description = "swarms-torch - Pytorch"
9 | license = "MIT"
10 | authors = ["Kye Gomez "]
11 | homepage = "https://github.com/kyegomez/swarms-pytorch"
12 | documentation = "https://github.com/kyegomez/swarms-pytorch" # Add this if you have documentation.
13 | readme = "README.md" # Assuming you have a README.md
14 | repository = "https://github.com/kyegomez/swarms-pytorch"
15 | keywords = ["artificial intelligence", "deep learning", "optimizers", "Prompt Engineering"]
16 | classifiers = [
17 | "Development Status :: 4 - Beta",
18 | "Intended Audience :: Developers",
19 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
20 | "License :: OSI Approved :: MIT License",
21 | "Programming Language :: Python :: 3.6"
22 | ]
23 | packages = [
24 | { include = "swarms_torch" },
25 | { include = "swarms_torch/**/*.py" },
26 | ]
27 |
28 |
29 |
30 | [tool.poetry.dependencies]
31 | python = "^3.6"
32 | torch = "*"
33 | einops = "*"
34 | zetascale = "*"
35 | pytest = "*"
36 | torchvision = "*"
37 | loguru = "*"
38 | einx = "*"
39 |
40 |
41 |
42 |
43 | [tool.poetry.group.lint.dependencies]
44 | ruff = ">=0.0.249,<0.1.10"
45 | types-toml = "^0.10.8.1"
46 | types-redis = "^4.3.21.6"
47 | types-pytz = "^2023.3.0.0"
48 | black = "^23.1.0"
49 | types-chardet = "^5.0.4.6"
50 | mypy-protobuf = "^3.0.0"
51 |
52 |
53 | [tool.autopep8]
54 | max_line_length = 80
55 | ignore = "E501,W6" # or ["E501", "W6"]
56 | in-place = true
57 | recursive = true
58 | aggressive = 3
59 |
60 | [tool.ruff]
61 | line-length = 80
62 |
63 | [tool.black]
64 | line-length = 80
65 | target-version = ['py38']
66 | preview = true
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python
2 | torch
3 | einops
4 | zetascale
5 | pytest
6 | torchvision
7 | loguru
8 | einx
9 |
10 |
--------------------------------------------------------------------------------
/scripts/code_quality.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Navigate to the directory containing the 'swarms' folder
4 | # cd /path/to/your/code/directory
5 |
6 | # Run autopep8 with max aggressiveness (-aaa) and in-place modification (-i)
7 | # on all Python files (*.py) under the 'swarms' directory.
8 | autopep8 --in-place --aggressive --aggressive --recursive --experimental --list-fixes swarms/
9 |
10 | # Run black with default settings, since black does not have an aggressiveness level.
11 | # Black will format all Python files it finds in the 'swarms' directory.
12 | black --experimental-string-processing swarms/
13 |
14 | # Run ruff on the 'swarms' directory.
15 | # Add any additional flags if needed according to your version of ruff.
16 | ruff --unsafe_fix
17 |
18 | # YAPF
19 | yapf --recursive --in-place --verbose --style=google --parallel swarms
20 |
--------------------------------------------------------------------------------
/scripts/get_package_requirements.py:
--------------------------------------------------------------------------------
1 | import pkg_resources
2 |
3 |
4 | def get_package_versions(requirements_path, output_path):
5 | try:
6 | with open(requirements_path, "r") as file:
7 | requirements = file.readlines()
8 | except FileNotFoundError:
9 | print(f"Error: The file '{requirements_path}' was not found.")
10 | return
11 |
12 | package_versions = []
13 |
14 | for requirement in requirements:
15 | # Skip empty lines and comments
16 | if requirement.strip() == "" or requirement.strip().startswith("#"):
17 | continue
18 |
19 | # Extract package name
20 | package_name = requirement.split("==")[0].strip()
21 | try:
22 | version = pkg_resources.get_distribution(package_name).version
23 | package_versions.append(f"{package_name}=={version}")
24 | except pkg_resources.DistributionNotFound:
25 | package_versions.append(f"{package_name}: not installed")
26 |
27 | with open(output_path, "w") as file:
28 | for package_version in package_versions:
29 | file.write(package_version + "\n")
30 | print(f"Versions written to {output_path}")
31 |
32 |
33 | # Usage
34 | get_package_versions("requirements.txt", "installed_versions.txt")
35 |
--------------------------------------------------------------------------------
/scripts/requirementstxt_to_pyproject.py:
--------------------------------------------------------------------------------
1 | import toml
2 | import pkg_resources
3 |
4 |
5 | def update_pyproject_versions(pyproject_path):
6 | try:
7 | with open(pyproject_path, "r") as file:
8 | data = toml.load(file)
9 | except FileNotFoundError:
10 | print(f"Error: The file '{pyproject_path}' was not found.")
11 | return
12 | except toml.TomlDecodeError:
13 | print(f"Error: The file '{pyproject_path}' is not a valid TOML file.")
14 | return
15 |
16 | dependencies = (
17 | data.get("tool", {}).get("poetry", {}).get("dependencies", {})
18 | )
19 |
20 | for package in dependencies:
21 | if package.lower() == "python":
22 | continue # Skip the Python version dependency
23 |
24 | try:
25 | version = pkg_resources.get_distribution(package).version
26 | dependencies[package] = version
27 | except pkg_resources.DistributionNotFound:
28 | print(f"Warning: Package '{package}' not installed.")
29 |
30 | with open(pyproject_path, "w") as file:
31 | toml.dump(data, file)
32 |
33 | print(f"Updated versions written to {pyproject_path}")
34 |
35 |
36 | # Usage
37 | update_pyproject_versions("pyproject.toml")
38 |
--------------------------------------------------------------------------------
/scripts/test_name.sh:
--------------------------------------------------------------------------------
1 | find ./tests -name "*.py" -type f | while read file
2 | do
3 | filename=$(basename "$file")
4 | dir=$(dirname "$file")
5 | if [[ $filename != test_* ]]; then
6 | mv "$file" "$dir/test_$filename"
7 | fi
8 | done
--------------------------------------------------------------------------------
/scripts/tests.sh:
--------------------------------------------------------------------------------
1 | find ./tests -name '*.py' -exec pytest {} \;
--------------------------------------------------------------------------------
/swarms_torch/__init__.py:
--------------------------------------------------------------------------------
1 | from swarms_torch.structs.ant_colony_swarm import AntColonyOptimization
2 | from swarms_torch.structs.cellular_transformer import CellularSwarm
3 | from swarms_torch.structs.fish_school import Fish, FishSchool
4 | from swarms_torch.structs.hivemind_swarm_transformer import HivemindSwarm
5 | from swarms_torch.structs.mixture_of_mamba import MixtureOfMambas
6 | from swarms_torch.pso.multi_swarm_pso import MultiSwarmPSO
7 | from swarms_torch.structs.neuronal_transformer import NNTransformer
8 | from swarms_torch.utils.particle_swarm import ParticleSwarmOptimization
9 | from swarms_torch.structs.queen_bee import QueenBeeGa
10 | from swarms_torch.utils.spiral_optimization import SPO
11 | from swarms_torch.pso.transformer_pso import (
12 | Particle,
13 | TransformerParticleSwarmOptimization,
14 | )
15 | from swarms_torch.structs.firefly import FireflyOptimizer
16 | from queen_bee_transformer_hierarchy import (
17 | QueenBeeTransformerHierarchy,
18 | GeneticTransformerEvolution,
19 | QueenWorkerCommunication,
20 | WorkerTransformer,
21 | )
22 | from swarms_torch.structs import * # noqa
23 |
24 | __all__ = [
25 | "ParticleSwarmOptimization",
26 | "AntColonyOptimization",
27 | "QueenBeeGa",
28 | "NNTransformer",
29 | "CellularSwarm",
30 | "SPO",
31 | "Fish",
32 | "FishSchool",
33 | "MultiSwarmPSO",
34 | "Particle",
35 | "TransformerParticleSwarmOptimization",
36 | "HivemindSwarm",
37 | "MixtureOfMambas",
38 | "FireflyOptimizer",
39 | "QueenBeeTransformerHierarchy",
40 | "GeneticTransformerEvolution",
41 | "QueenWorkerCommunication",
42 | "WorkerTransformer",
43 | ]
44 |
--------------------------------------------------------------------------------
/swarms_torch/mergers/__init__.py:
--------------------------------------------------------------------------------
1 | from swarms_torch.mergers.all_new_evo_mergers import (
2 | hyperslice_merge,
3 | random_subspace_merge,
4 | dimensional_cross_fusion,
5 | weighted_evolutionary_crossover,
6 | permutation_weight_swapping,
7 | )
8 | from swarms_torch.mergers.mm_mergers import (
9 | modality_weighted_merge,
10 | modality_specific_layer_swap,
11 | cross_modality_weight_crossover,
12 | hierarchical_modality_fusion,
13 | modality_mutation_merge,
14 | )
15 |
16 | __all__ = [
17 | "hyperslice_merge",
18 | "random_subspace_merge",
19 | "dimensional_cross_fusion",
20 | "weighted_evolutionary_crossover",
21 | "permutation_weight_swapping",
22 | "modality_weighted_merge",
23 | "modality_specific_layer_swap",
24 | "cross_modality_weight_crossover",
25 | "hierarchical_modality_fusion",
26 | "modality_mutation_merge",
27 | ]
28 |
--------------------------------------------------------------------------------
/swarms_torch/pso/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/swarms-pytorch/0fa9adeb7da3e33ec53dba65b985c606a7ff558a/swarms_torch/pso/__init__.py
--------------------------------------------------------------------------------
/swarms_torch/pso/multi_swarm_pso.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 |
4 |
5 | class MultiSwarmPSO:
6 | """
7 | Multi-Swarm PSO Algorithm
8 |
9 | Parameters
10 | ----------
11 | target_string : str
12 | The target string to be generated
13 | num_sub_swarms : int
14 | The number of sub-swarms
15 | num_particles_per_swarm : int
16 | The number of particles per sub-swarm
17 | max_iterations : int
18 | The maximum number of iterations to run the algorithm
19 |
20 | Attributes
21 | ----------
22 | target_string : str
23 | The target string to be generated
24 | num_sub_swarms : int
25 | The number of sub-swarms
26 | num_particles_per_swarm : int
27 | The number of particles per sub-swarm
28 | num_dimensions : int
29 | The number of dimensions in the search space
30 | max_iterations : int
31 | The maximum number of iterations to run the algorithm
32 |
33 | Methods
34 | -------
35 | generate_random_string()
36 | Generates a random string of length num_dimensions
37 | fitness_function(position)
38 | Calculates the fitness of a given position
39 | diversification_method(sub_swarms)
40 | Adds a new sub-swarm if the number of sub-swarms is less than the maximum
41 | optimize()
42 | Runs the Multi-Swarm PSO algorithm
43 |
44 | References
45 | ----------
46 | .. [1] https://www.researchgate.net/publication/221172800_Multi-swarm_Particle_Swarm_Optimization
47 |
48 |
49 | Usage:
50 | ------
51 | target_string = "hello world"
52 | multi_swarm = MultiSwarm(target_string)
53 | multi_swarm.optimize()
54 |
55 |
56 |
57 | """
58 |
59 | def __init__(
60 | self,
61 | target_string,
62 | num_sub_swarms=5,
63 | num_particles_per_swarm=20,
64 | max_iterations=100,
65 | ):
66 | self.target_string = target_string
67 | self.num_sub_swarms = num_sub_swarms
68 | self.num_particles_per_swarm = num_particles_per_swarm
69 | self.num_dimensions = len(target_string)
70 | self.max_iterations = max_iterations
71 |
72 | def generate_random_string(self):
73 | """
74 | Generates a random string of length num_dimensions
75 |
76 | """
77 | return "".join(
78 | random.choice(string.ascii_lowercase + " ")
79 | for _ in range(self.num_dimensions)
80 | )
81 |
82 | def fitness_function(self, position):
83 | """Fitness function to be maximized"""
84 | fitness = sum(a == b for a, b in zip(position, self.target_string))
85 | return fitness
86 |
87 | def diversification_method(self, sub_swarms):
88 | """Diversification method to add a new sub-swarm if the number of sub-swarms is less than the maximum"""
89 | if len(sub_swarms) < self.num_sub_swarms:
90 | new_sub_swarm = [
91 | self.generate_random_string()
92 | for _ in range(self.num_particles_per_swarm)
93 | ]
94 | sub_swarms.append(new_sub_swarm)
95 |
96 | def optimize(self):
97 | """Optimizes the fitness function"""
98 | sub_swarms = [
99 | [
100 | self.generate_random_string()
101 | for _ in range(self.num_particles_per_swarm)
102 | ]
103 | for _ in range(self.num_sub_swarms)
104 | ]
105 |
106 | for iteration in range(self.max_iterations):
107 | for sub_swarm in sub_swarms:
108 | for particle in sub_swarm:
109 | fitness = self.fitness_function(particle)
110 | if fitness > 0:
111 | index_to_change = random.randint(
112 | 0, self.num_dimensions - 1
113 | )
114 | new_char = random.choice(string.ascii_lowercase + " ")
115 | new_position = list(particle)
116 | new_position[index_to_change] = new_char
117 | new_position = "".join(new_position)
118 | particle = new_position
119 |
120 | self.diversification_method(sub_swarms)
121 |
122 | global_best_fitness = max(
123 | self.fitness_function(particle)
124 | for sub_swarm in sub_swarms
125 | for particle in sub_swarm
126 | )
127 | global_best_position = [
128 | particle
129 | for sub_swarm in sub_swarms
130 | for particle in sub_swarm
131 | if self.fitness_function(particle) == global_best_fitness
132 | ][0]
133 | print(
134 | f"Iteration {iteration}: Global Best Fitness ="
135 | f" {global_best_fitness}, Global Best Position ="
136 | f" {global_best_position}"
137 | )
138 |
139 | global_best_fitness = max(
140 | self.fitness_function(particle)
141 | for sub_swarm in sub_swarms
142 | for particle in sub_swarm
143 | )
144 | global_best_position = [
145 | particle
146 | for sub_swarm in sub_swarms
147 | for particle in sub_swarm
148 | if self.fitness_function(particle) == global_best_fitness
149 | ][0]
150 | print(
151 | f"Final Result: Global Best Fitness = {global_best_fitness}, Global"
152 | f" Best Position = {global_best_position}"
153 | )
154 |
155 |
156 | # Example usage
157 | if __name__ == "__main__":
158 | target_string = "hello world"
159 | multi_swarm = MultiSwarmPSO(target_string)
160 | multi_swarm.optimize()
161 |
--------------------------------------------------------------------------------
/swarms_torch/pso/multi_swarm_pso2.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class Particle:
5 | def __init__(self, dim, minx, maxx):
6 | self.position = torch.rand(dim) * (maxx - minx) + minx
7 | self.velocity = torch.rand(dim) * (maxx - minx) + minx
8 | self.best_position = self.position.clone()
9 | self.best_score = float("inf")
10 |
11 | def update_velocity(self, global_best, w=0.7, c1=1.5, c2=1.5):
12 | r1 = torch.rand(self.position.size())
13 | r2 = torch.rand(self.position.size())
14 | self.velocity = (
15 | w * self.velocity
16 | + c1 * r1 * (self.best_position - self.position)
17 | + c2 * r2 * (global_best - self.position)
18 | )
19 |
20 | def update_position(self, minx, maxx):
21 | self.position += self.velocity
22 | self.position = torch.clamp(self.position, minx, maxx)
23 |
24 |
25 | class Swarm:
26 | def __init__(self, num_particles, dim, minx, maxx):
27 | self.particles = [
28 | Particle(dim, minx, maxx) for _ in range(num_particles)
29 | ]
30 | self.global_best = None
31 | self.global_best_score = float("inf")
32 |
33 | def update_global_best(self):
34 | for particle in self.particles:
35 | if particle.best_score < self.global_best_score:
36 | self.global_best = particle.best_position.clone()
37 | self.global_best_score = particle.best_score
38 |
39 | def move_particles(self, minx, maxx):
40 | for particle in self.particles:
41 | particle.update_velocity(self.global_best)
42 | particle.update_position(minx, maxx)
43 |
44 |
45 | class MultiSwarm:
46 | def __init__(self, num_swarms, num_particles, dim, minx, maxx):
47 | self.swarms = [
48 | Swarm(num_particles, dim, minx, maxx) for _ in range(num_swarms)
49 | ]
50 | self.minx = minx
51 | self.maxx = maxx
52 |
53 | def optimize(self, func, max_iter):
54 | for _ in range(max_iter):
55 | for swarm in self.swarms:
56 | swarm.update_global_best()
57 | swarm.move_particles(self.minx, self.maxx)
58 | best_swarm = min(self.swarms, key=lambda s: s.global_best_score)
59 | return best_swarm.global_best, best_swarm.global_best_score
60 |
61 |
62 | def rosenbrock(x, a=1, b=100):
63 | return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
64 |
65 |
66 | # num_swarms = 5
67 | # num_particles = 20
68 | # dim = 2
69 | # minx = -5
70 | # maxx = 5
71 | # max_iter = 100
72 |
73 | # multi_swarm = MultiSwarm(num_swarms, num_particles, dim, minx, maxx)
74 |
75 | # best_position, best_score = multi_swarm.optimize(rosenbrock, max_iter)
76 |
77 | # print(f"Best position: {best_position}")
78 | # print(f"Best score: {best_score}")
79 |
--------------------------------------------------------------------------------
/swarms_torch/pso/multi_swarm_pso_transformer.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # import torch.nn as nn
3 | # from copy import deepcopy
4 | # from swarms_torch.transformer_pso import Particle, TransformerParticleSwarmOptimization
5 |
6 | # class MultiSwarm(nn.Module):
7 | # def __init__(
8 | # self,
9 | # num_swarms,
10 | # *args,
11 | # **kwargs
12 | # ):
13 | # #create multiple instances of the transformerparticle swarm optimization
14 | # self.swarms = [TransformerParticleSwarmOptimization(*args, **kwargs) for _ in range(num_swarms)]
15 | # self.num_swarms = num_swarms
16 |
17 | # def optimize(self, iterations):
18 | # for _ in range(iterations):
19 | # #update each swarm
20 | # for swarm in self.swarms:
21 | # swarm.update()
22 |
23 | # #apply diversification strategy
24 | # self.diversification_method()
25 |
26 | # def diversification_strategy(self):
27 | # for i in range(self.num_swarms):
28 | # for j in range(i + 1, self.num_swarms):
29 | # if self.is_collided(self.swarms[i].global_best, self.swarms[j].global_best):
30 | # #handle collision by launching a new swarm or re init one of the swarms
31 | # self.handle_collision(i, j)
32 |
33 | # def is_collided(self, global_best_1, global_best_2):
34 | # #Check if difference between the global bests or 2 swarms is below a threshold
35 | # diff = sum((global_best_1[key] - global_best_2[key]).abs().sum() for key in global_best_1.keys())
36 | # COLLISION_THRESHOLD = 0.1
37 |
38 | # return diff < COLLISION_THRESHOLD
39 |
40 | # def handle_collision(self, idx1, idx2):
41 | # #for simplicity re init 2nd swarm
42 | # self.swarms[idx2] = TransformerParticleSwarmOptimization(*self.swarms[idx2].model_args, **self.swarms[idx2].kwargs)
43 |
44 | # import torch
45 | # from torch.utils.data import DataLoader, TensorDataset
46 |
47 | # # Generate random data
48 | # num_samples = 1000
49 | # input_dim = 50 # Length of input sequence
50 | # num_classes = 2
51 |
52 | # inputs = torch.randint(0, 1000, (num_samples, input_dim))
53 | # targets = torch.randint(0, num_classes, (num_samples,))
54 |
55 | # dataset = TensorDataset(inputs, targets)
56 | # data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
57 |
58 | # # Define hyperparameters and model arguments
59 | # model_args = (1000, 512, 8, 6, 2) # (input_dim, d_model, nhead, num_layers, output_dim)
60 | # optimizer_args = {
61 | # "model_constructor": Particle,
62 | # "model_args": model_args,
63 | # "device": "cpu",
64 | # "criterion": torch.nn.CrossEntropyLoss(),
65 | # "data_loader": data_loader
66 | # }
67 |
68 | # # Create MultiSwarmOptimizer
69 | # num_swarms = 3
70 | # mso = MultiSwarm(num_swarms, **optimizer_args)
71 |
72 | # # Optimize
73 | # mso.optimize(iterations=10)
74 |
75 | # # Get the best model from the best-performing swarm
76 | # best_swarm = max(mso.swarms, key=lambda s: s.compute_fitness(s.global_best))
77 | # best_model = best_swarm.get_best_model()
78 |
79 | import torch
80 | import torch.nn as nn
81 | from copy import deepcopy
82 |
83 |
84 | class Particle(nn.Module):
85 | def __init__(self, input_dim, hidden_dim, output_dim):
86 | super(Particle, self).__init__()
87 | self.transformer = nn.Transformer(input_dim, hidden_dim)
88 | self.fc = nn.Linear(hidden_dim, output_dim)
89 |
90 | def forward(self, x):
91 | x = self.transformer(x)
92 | x = self.fc(x)
93 | return x
94 |
95 |
96 | class MultiSwarmOptimizer:
97 | def __init__(
98 | self,
99 | particle,
100 | num_particles,
101 | num_subswarms,
102 | fitness_func,
103 | bounds,
104 | num_epochs,
105 | ):
106 | self.particle = particle
107 | self.num_particles = num_particles
108 | self.num_subswarms = num_subswarms
109 | self.fitness_func = fitness_func
110 | self.bounds = bounds
111 | self.num_epochs = num_epochs
112 |
113 | self.subswarms = []
114 | for _ in range(num_subswarms):
115 | self.subswarms.append(
116 | [deepcopy(particle) for _ in range(num_particles)]
117 | )
118 |
119 | def optimize(self):
120 | for epoch in range(self.num_epochs):
121 | for subswarm in self.subswarms:
122 | for particle in subswarm:
123 | fitness = self.fitness_func(particle)
124 | if fitness > particle.best_fitness:
125 | particle.best_fitness = fitness
126 | particle.best_position = deepcopy(particle.position)
127 |
128 | best_particle = max(subswarm, key=lambda p: p.best_fitness)
129 | for particle in subswarm:
130 | particle.velocity = (
131 | particle.velocity
132 | + 0.5 * (particle.best_position - particle.position)
133 | + 0.5
134 | * (best_particle.best_position - particle.position)
135 | )
136 | particle.position = particle.position + particle.velocity
137 | particle.position = torch.clamp(
138 | particle.position, *self.bounds
139 | )
140 |
141 | best_subswarm = max(
142 | self.subswarms, key=lambda s: max(p.best_fitness for p in s)
143 | )
144 | best_particle = max(best_subswarm, key=lambda p: p.best_fitness)
145 | print(
146 | f"Epoch {epoch+1}/{self.num_epochs}, Best Fitness:"
147 | f" {best_particle.best_fitness}"
148 | )
149 |
150 | best_subswarm = max(
151 | self.subswarms, key=lambda s: max(p.best_fitness for p in s)
152 | )
153 | best_particle = max(best_subswarm, key=lambda p: p.best_fitness)
154 | return best_particle
155 |
156 | def get_best_model(self):
157 | return self.particle
158 |
159 |
160 | # import torch
161 | # import torch.nn as nn
162 | # from random import random
163 |
164 |
165 | # # Define the fitness function
166 | # def fitness_func(particle):
167 | # # This is a dummy fitness function. Replace it with your own.
168 | # return random()
169 |
170 |
171 | # # Define the bounds for the particle positions
172 | # bounds = (-1.0, 1.0)
173 |
174 | # # Define the number of particles, sub-swarms, and epochs
175 | # num_particles = 10
176 | # num_subswarms = 5
177 | # num_epochs = 100
178 |
179 | # # Define the dimensions for the transformer model
180 | # input_dim = 10
181 | # hidden_dim = 20
182 | # output_dim = 2
183 |
184 | # # Create a particle (transformer model)
185 | # particle = Particle(input_dim, hidden_dim, output_dim)
186 |
187 | # # Create the multi-swarm optimizer
188 | # optimizer = MultiSwarmOptimizer(
189 | # particle, num_particles, num_subswarms, fitness_func, bounds, num_epochs
190 | # )
191 |
192 | # # Run the optimization
193 | # best_particle = optimizer.optimize()
194 |
195 | # # The best_particle is the model with the highest fitness score
196 | # print(best_particle)
197 |
--------------------------------------------------------------------------------
/swarms_torch/pso/transformer_pso.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from copy import deepcopy
4 |
5 |
6 | class Particle(nn.Module):
7 | """
8 | Simple Transformer model for classification.
9 |
10 | Parameters
11 | ----------
12 | input_dim : int
13 | The number of expected features in the input (required).
14 | d_model : int
15 | The number of expected features in the encoder/decoder inputs (required).
16 | nhead : int
17 | The number of heads in the multiheadattention models (required).
18 | num_layers : int
19 | The number of sub-encoder-layers in the encoder (required).
20 | output_dim : int
21 | The number of classes to predict (required).
22 |
23 | Usage:
24 | >>> model = SimpleTransformer(1000, 512, 8, 6, 10)
25 | >>> model(x)
26 |
27 |
28 | """
29 |
30 | def __init__(self, input_dim, d_model, nhead, num_layers, output_dim):
31 | super(Particle, self).__init__()
32 | self.embedding = nn.Embedding(input_dim, d_model)
33 | self.transformer = nn.Transformer(
34 | d_model, nhead, num_layers, num_layers
35 | )
36 | self.fc = nn.Linear(d_model, output_dim)
37 |
38 | def forward(self, x):
39 | """
40 | Forward pass through the model.
41 |
42 | """
43 | x = self.embedding(x)
44 | x = self.transformer(x, x)
45 | return self.fc(x[-1])
46 |
47 |
48 | class TransformerParticleSwarmOptimization(nn.Module):
49 | """
50 | Transformer Particle Swarm Optimization.
51 |
52 | Parameters
53 | ----------
54 | model_constructor : function
55 | Function to create a new model instance.
56 | model_args : tuple
57 | Arguments for the model constructor.
58 | device : str
59 | 'cuda' or 'cpu'.
60 | criterion : nn.Module
61 | Loss function.
62 | data_loader : torch.utils.data.DataLoader
63 | Data loader.
64 | n_particles : int
65 | Number of particles.
66 | inertia : float
67 | Inertia weight.
68 | personal_best_weight : float
69 | Personal best weight.
70 | global_best_weight : float
71 | Global best weight.
72 |
73 | Usage:
74 | >>> pso = TransformerParticleSwarmOptimization(
75 | ... SimpleTransformer,
76 | ... (1000, 512, 8, 6, 10),
77 | ... device="cuda",
78 | ... criterion=nn.CrossEntropyLoss(),
79 | ... data_loader=your_dataloader
80 | ... )
81 |
82 | """
83 |
84 | def __init__(
85 | self,
86 | model_constructor, # Function to create a new model instance
87 | model_args, # Arguments for the model constructor
88 | device, # 'cuda' or 'cpu'
89 | criterion,
90 | data_loader,
91 | n_particles=10,
92 | inertia=0.5,
93 | personal_best_weight=1.5,
94 | global_best_weight=1.5,
95 | ):
96 | super(TransformerParticleSwarmOptimization, self).__init__()
97 | self.model_constructor = model_constructor
98 | self.model_args = model_args
99 | self.criterion = criterion
100 | self.data_loader = data_loader
101 | self.device = device
102 |
103 | self.n_particles = n_particles
104 | self.inertia = inertia
105 | self.personal_best_weight = personal_best_weight
106 | self.global_best_weight = global_best_weight
107 |
108 | # Representing particles using model parameters
109 | param_size = sum(
110 | p.numel() for p in model_constructor(*model_args).parameters()
111 | )
112 | self.particles = [
113 | self.model_constructor(*model_args).to(device)
114 | for _ in range(n_particles)
115 | ]
116 | self.velocities = [
117 | torch.zeros((param_size,)).to(device) for _ in range(n_particles)
118 | ]
119 | self.personal_best = [deepcopy(p.state_dict()) for p in self.particles]
120 | self.global_best = deepcopy(self.particles[0].state_dict())
121 |
122 | def compute_fitness(self, model_state):
123 | """
124 | Compute the fitness of a model.
125 | """
126 | model = self.model_constructor(*self.model_args).to(self.device)
127 | model.load_state_dict(model_state)
128 | model.eval()
129 |
130 | total_loss = 0.0
131 | with torch.no_grad():
132 | for inputs, targets in self.data_loader:
133 | outputs = model(inputs.to(self.device))
134 | loss = self.criterion(outputs, targets.to(self.device))
135 | total_loss += loss.item()
136 | return 1.0 / (1.0 + total_loss)
137 |
138 | def update(self):
139 | """
140 | Update particles.
141 | """
142 | # Update particles
143 | for idx, particle in enumerate(self.particles):
144 | fitness = self.compute_fitness(particle.state_dict())
145 |
146 | # Update personal best
147 | if fitness > self.compute_fitness(self.personal_best[idx]):
148 | self.personal_best[idx] = deepcopy(particle.state_dict())
149 |
150 | # Update global best
151 | if fitness > self.compute_fitness(self.global_best):
152 | self.global_best = deepcopy(particle.state_dict())
153 |
154 | # Update velocities and positions
155 | for name, param in particle.named_parameters():
156 | delta = self.personal_best_weight * torch.rand_like(param) * (
157 | self.personal_best[idx][name].to(self.device) - param.data
158 | ) + self.global_best_weight * torch.rand_like(param) * (
159 | self.global_best[name].to(self.device) - param.data
160 | )
161 | self.velocities[idx] += (
162 | self.inertia * self.velocities[idx] + delta
163 | )
164 | param.data += self.velocities[idx]
165 |
166 | def optimize(self, iterations=1000):
167 | """Optimize the model."""
168 | for _ in range(iterations):
169 | self.update()
170 | best_particle_score = self.compute_fitness(self.global_best)
171 | print(
172 | f"Iteration {_ + 1}/{iterations} - Best Particle Fitness:"
173 | f" {best_particle_score}"
174 | )
175 |
176 | def get_best_model(self):
177 | """Get the best model."""
178 | best_model = self.model_constructor(*self.model_args).to(self.device)
179 | best_model.load_state_dict(self.global_best)
180 | return best_model
181 |
182 |
183 | # # Define model and optimization parameters
184 | # input_dim = 1000
185 | # d_model = 512
186 | # nhead = 8
187 | # num_layers = 3
188 | # output_dim = 10
189 |
190 | # batch_size = 32
191 | # sequence_length = 50
192 |
193 | # # Instantiate the optimizer
194 | # pso = ParticleSwarmOptimization(
195 | # SimpleTransformer,
196 | # (input_dim, d_model, nhead, num_layers, output_dim),
197 | # device="cuda", # or 'cpu'
198 | # criterion=nn.CrossEntropyLoss(),
199 | # # data_loader=your_dataloader # replace with your dataloader
200 | # )
201 |
202 | # # Run optimization
203 | # pso.optimize(iterations=100)
204 |
205 | # # Get the best model
206 | # best_model = pso.get_best_model()
207 |
208 | # # Generate a random input tensor
209 | # x = torch.randint(0, input_dim, (batch_size, sequence_length)).to(
210 | # "cuda"
211 | # ) # ensure it's on the same device as your model
212 |
213 | # # Pass the tensor through the model
214 | # output = best_model(x)
215 | # print(output.shape) # should be [batch_size, output_dim]
216 |
--------------------------------------------------------------------------------
/swarms_torch/structs/__init__.py:
--------------------------------------------------------------------------------
1 | from swarms_torch.structs.parallel_wrapper import ParallelSwarm
2 | from swarms_torch.structs.switch_moe import SwitchGate, SwitchMoE
3 | from swarms_torch.structs.simple_moe import GatingMechanism, SimpleMoE
4 | from queen_bee_transformer_hierarchy import (
5 | QueenBeeTransformerHierarchy,
6 | GeneticTransformerEvolution,
7 | QueenWorkerCommunication,
8 | WorkerTransformer,
9 | )
10 |
11 | __all__ = [
12 | "ParallelSwarm",
13 | "SwitchGate",
14 | "SwitchMoE",
15 | "GatingMechanism",
16 | "SimpleMoE",
17 | "QueenBeeTransformerHierarchy",
18 | "GeneticTransformerEvolution",
19 | "QueenWorkerCommunication",
20 | "WorkerTransformer",
21 | ]
22 |
--------------------------------------------------------------------------------
/swarms_torch/structs/ant_colony_swarm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class AntColonyOptimization(nn.Module):
6 | """
7 | Ant Colony Optimization
8 | Overview: https://en.wikipedia.org/wiki/Ant_colony_optimization_algorithms
9 |
10 | How does it work?
11 | 1. Initialize pheromone levels for each path
12 | 2. For each ant, choose the next path based on the pheromone levels
13 | 3. Update the pheromone levels
14 | 4. Repeat step 2 to 3 until the maximum number of iterations is reached
15 |
16 | Parameters
17 | ----------
18 | goal: str
19 | The goal string to be optimized
20 | num_ants: int
21 | Number of ants
22 | evaporation_rate: float
23 | Evaporation rate
24 |
25 | Usage
26 | -----
27 | from swarms_torch import AntColonyOptimization
28 |
29 | goal_string = "Hello ACO"
30 | aco = AntColonyOptimization(goal_string, num_iterations=1000)
31 | best_solution = aco.optimize()
32 |
33 | print("Best Matched String:", best_solution)
34 |
35 | Features to implement
36 | --------
37 | 1. Add a stopping criterion
38 | 2. Add a callback function to track the progress
39 | 3. Add a function to plot the pheromone levels
40 | 4. Add a function to plot the ants
41 | 5. Add a function to plot the best solution
42 |
43 | """
44 |
45 | def __init__(
46 | self,
47 | goal: str = None,
48 | num_ants: int = 10000,
49 | evaporation_rate: float = 0.1,
50 | alpha: int = 1,
51 | beta: int = 1,
52 | num_iterations: int = 10010,
53 | ):
54 | self.goal = torch.tensor([ord(c) for c in goal], dtype=torch.float32)
55 | self.num_ants = num_ants
56 | self.evaporation_rate = evaporation_rate
57 | self.alpha = alpha
58 | self.beta = beta
59 | self.num_iterations = num_iterations
60 | # Pheromone levels can be initialized for different paths
61 | # (architectures)
62 | self.pheromones = torch.ones(num_ants)
63 | self.solutions = []
64 |
65 | def fitness(self, solution):
66 | """Fitness of a solution"""
67 | return -torch.norm(solution - self.goal)
68 |
69 | def update_pheromones(self):
70 | """Update pheromone levels"""
71 | for i, solution in enumerate(self.solutions):
72 | self.pheromones[i] = (1 - self.evaporation_rate) * self.pheromones[
73 | i
74 | ] + self.fitness(solution)
75 |
76 | def choose_next_path(self):
77 | """Choose the next path based on the pheromone levels"""
78 | probabilities = (self.pheromones**self.alpha) * (
79 | (1.0 / (1 + self.pheromones)) ** self.beta
80 | )
81 |
82 | probabilities /= probabilities.sum()
83 |
84 | return torch.multinomial(probabilities, num_samples=1).item()
85 |
86 | def optimize(self):
87 | """Optimize the goal string"""
88 | for iteration in range(self.num_iterations):
89 | self.solutions = []
90 | for _ in range(self.num_ants):
91 | # This is a placeholder. Actual implementation will define how
92 | # ants traverse the search space.
93 | solution = torch.randint(
94 | 32, 127, (len(self.goal),), dtype=torch.float32
95 | ) # Random characters.
96 | self.solutions.append(solution)
97 | self.update_pheromones()
98 |
99 | best_solution_index = self.pheromones.argmax().item()
100 | best_solution = self.solutions[best_solution_index]
101 | return "".join([chr(int(c)) for c in best_solution])
102 |
--------------------------------------------------------------------------------
/swarms_torch/structs/cellular_transformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class TransformerCell(nn.Module):
6 | def __init__(
7 | self,
8 | input_dim,
9 | nhead,
10 | num_layers=1,
11 | neighborhood_size=3,
12 | ):
13 | super(TransformerCell, self).__init__()
14 | self.transformer = nn.Transformer(
15 | input_dim, nhead=nhead, num_encoder_layers=num_layers
16 | )
17 | self.neighborhood_size = neighborhood_size
18 |
19 | def forward(self, x, neigbors):
20 | x = self.transformer(x, x)
21 |
22 | out = torch.cat([x] + neigbors, dim=0)
23 |
24 | return out
25 |
26 |
27 | class CellularSwarm(nn.Module):
28 | """
29 | CellularSwarm
30 |
31 | Architecture:
32 | - Input -> TransformerCell -> TransformerCell -> ... -> Output
33 |
34 | Overview:
35 | CellularSwarm is a cellular neural network that uses a transformer cell
36 | to process the input.
37 |
38 | Args:
39 | cell_count (int): Number of transformer cells
40 | input_dim (int): Input dimension
41 | nhead (int): Number of heads in the transformer cell
42 | time_steps (int): Number of time steps to run the network
43 |
44 | Returns:
45 | torch.Tensor: Output tensor
46 |
47 | Usage:
48 | >>> x = torch.randn(10, 32, 512)
49 | >>> model = CellularSwarm(cell_count=5, input_dim=512, nhead=8)
50 | >>> output = model(x)
51 | >>> print(output)
52 |
53 |
54 | """
55 |
56 | def __init__(self, cell_count, input_dim, nhead, time_steps=4):
57 | super(CellularSwarm, self).__init__()
58 | self.cells = nn.ModuleList(
59 | [TransformerCell(input_dim, nhead) for _ in range(cell_count)]
60 | )
61 | self.time_steps = time_steps
62 |
63 | def forward(self, x):
64 | for _ in range(self.time_steps):
65 | for i, cell in enumerate(self.cells):
66 | # get neighboring cells states
67 | start_idx = max(0, i - cell.neighborhood_size)
68 |
69 | end_idx = min(len(self.cells), i + cell.neighborhood_size + 1)
70 |
71 | neighbors = [
72 | self.cells[j].transformer(x, x)
73 | for j in range(start_idx, end_idx)
74 | if j != i
75 | ]
76 |
77 | x = cell(x, neighbors)
78 | return x
79 |
--------------------------------------------------------------------------------
/swarms_torch/structs/graph_cellular_automa.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class GraphCellularAutomata(nn.Module):
6 | def __init__(self, input_dim, hidden_dim, output_dim):
7 | super(GraphCellularAutomata, self).__init__()
8 |
9 | self.mlp = nn.Sequential(
10 | nn.Linear(input_dim, hidden_dim),
11 | nn.ReLU(),
12 | nn.Linear(hidden_dim, output_dim),
13 | )
14 |
15 | def forward(self, x):
16 | return self.mlp(x)
17 |
18 |
19 | class ReplicationModel(nn.Module):
20 | def __init__(self, input_dim, hidden_dim):
21 | super(ReplicationModel, self).__init__()
22 |
23 | self.mlp = nn.Sequential(
24 | nn.Linear(input_dim, hidden_dim),
25 | nn.ReLU(),
26 | nn.Linear(hidden_dim, 1),
27 | nn.Sigmoid(), # for binary classification
28 | )
29 |
30 | def forward(self, x):
31 | return self.mlp(x)
32 |
33 |
34 | class WeightUpdateModel(nn.Module):
35 | def __init__(self, input_dim, hidden_dim):
36 | super(WeightUpdateModel, self).__init__()
37 |
38 | self.mlp = nn.Sequential(
39 | nn.Linear(input_dim, hidden_dim),
40 | nn.ReLU(),
41 | nn.Linear(hidden_dim, 1),
42 | )
43 |
44 | def forward(self, x):
45 | return self.mlp(x)
46 |
47 |
48 | class NDP(nn.Module):
49 | def __init__(self, embedding_dim, hidden_dim):
50 | super(NDP, self).__init__()
51 |
52 | self.gc_automata = GraphCellularAutomata(
53 | embedding_dim, hidden_dim, embedding_dim
54 | )
55 | self.replication_model = ReplicationModel(embedding_dim, hidden_dim)
56 | self.weight_update_model = WeightUpdateModel(
57 | 2 * embedding_dim, hidden_dim
58 | )
59 |
60 | def forward(self, node_embeddings, adjacency_matrix):
61 | # Update node embeddings using Graph Cellular Automata
62 | updated_embeddings = self.gc_automata(node_embeddings)
63 |
64 | # Check which nodes need to replicate
65 | replication_decisions = self.replication_model(updated_embeddings)
66 |
67 | # Weight update (assuming weighted network)
68 | num_nodes = node_embeddings.shape[0]
69 | edge_weights = torch.zeros((num_nodes, num_nodes))
70 |
71 | for i in range(num_nodes):
72 | for j in range(num_nodes):
73 | combined_embedding = torch.cat(
74 | (updated_embeddings[i], updated_embeddings[j])
75 | )
76 |
77 | edge_weights[i, j] = self.weight_update_model(
78 | combined_embedding
79 | )
80 |
81 | return updated_embeddings, replication_decisions, edge_weights
82 |
83 |
84 | # # Usage examples
85 | # embedding_dim = 16
86 | # hidden_dim = 32
87 | # node_embeddings = torch.rand((10, embedding_dim)) # For 10 nodes
88 | # adjacency_matrix = torch.rand((10, 10)) # Dummy adjacency matrix for 10
89 | # nodes
90 |
91 | # model = NDP(embedding_dim, hidden_dim)
92 | # updated_embeddings, replication_decisions, edge_weights = model(
93 | # node_embeddings, adjacency_matrix
94 | # )
95 |
96 | # print(updated_embeddings.shape)
97 | # print(replication_decisions.shape)
98 | # print(edge_weights.shape)
99 |
--------------------------------------------------------------------------------
/swarms_torch/structs/hivemind_swarm_transformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from zeta.structs.transformer import (
4 | Decoder,
5 | Transformer,
6 | )
7 |
8 |
9 | class HivemindTransformer(nn.Module):
10 | def __init__(
11 | self,
12 | dim: int = None,
13 | max_seq_len: int = None,
14 | depth: int = None,
15 | heads: int = None,
16 | dim_head: int = None,
17 | num_tokens: int = None,
18 | ):
19 | super(HivemindTransformer, self).__init__()
20 | self.dim = dim
21 | self.max_seq_len = max_seq_len
22 | self.depth = depth
23 | self.heads = heads
24 | self.dim_head = dim_head
25 | self.num_tokens = num_tokens
26 |
27 | self.model = Transformer(
28 | num_tokens=num_tokens,
29 | max_seq_len=max_seq_len,
30 | attn_layers=Decoder(
31 | dim=dim,
32 | depth=depth,
33 | dim_head=dim_head,
34 | heads=heads,
35 | ),
36 | )
37 |
38 | def forward(self, x):
39 | return self.model(x)
40 |
41 |
42 | class HivemindSwarm(nn.Module):
43 | """
44 | HiveMind Swarm Transformer
45 |
46 | This is a transformer that is composed of a swarm of transformers where each transformer shares the same weights.
47 |
48 | Args:
49 | dim: dimension of the model
50 | max_seq_len: maximum sequence length
51 | depth: depth of the model
52 | heads: number of heads
53 | dim_head: dimension of each head
54 | num_models: number of models in the swarm
55 | base_transformer: the base transformer to be used in the swarm
56 |
57 |
58 | Example::
59 | model = HivemindSwarm(
60 | dim=512,
61 | max_seq_len=1024,
62 | depth=6,
63 | heads=8,
64 | dim_head=64,
65 | num_models=4,
66 | )
67 |
68 | x = torch.randn(1, 1024, 512)
69 | y = model(x)
70 | print(y.shape)
71 |
72 |
73 | """
74 |
75 | def __init__(
76 | self,
77 | dim: int = None,
78 | max_seq_len: int = None,
79 | num_tokens: int = None,
80 | depth: int = None,
81 | heads: int = None,
82 | dim_head: int = None,
83 | num_models: int = 1,
84 | **kwargs,
85 | ):
86 | super(HivemindSwarm, self).__init__()
87 |
88 | self.dim = dim
89 | self.max_seq_len = max_seq_len
90 | self.depth = depth
91 | self.heads = heads
92 | self.num_tokens = num_tokens
93 | self.dim_head = dim_head
94 | self.num_models = num_models
95 | self.base_transformer = HivemindTransformer(
96 | dim=dim,
97 | num_tokens=num_tokens,
98 | max_seq_len=max_seq_len,
99 | depth=depth,
100 | heads=heads,
101 | dim_head=dim_head,
102 | )
103 | # Create a list of transformers sharing the same weights
104 | self.experts = nn.ModuleList(
105 | [self.base_transformer for _ in range(num_models)]
106 | )
107 |
108 | # Gating mechniams allows the model to dynamically weight the contribution of each transformer
109 | # in the swarm. This is done by learning a weight for each transformer and then using a softmax
110 | # to normalize the weights.
111 | self.gate = nn.Linear(num_models, num_models)
112 | self.gate_activation = nn.Softmax(dim=-1)
113 | self.gate_bias = nn.Parameter(torch.zeros(num_models))
114 |
115 | def forward(self, x):
116 | logits = []
117 | for expert in self.experts:
118 | output = expert(x)
119 | logits.append(output)
120 | # Run each transformer on the input
121 | # outputs = [expert(x) for expert in self.experts]
122 |
123 | # stack outputs
124 | outputs = torch.stack(logits, dim=1)
125 |
126 | # Compute the gate
127 | gate = self.gate_activation(self.gate_bias + self.gate(outputs))
128 |
129 | # Weight the outputs
130 | outputs = torch.sum(outputs * gate.unsqueeze(-1), dim=1)
131 | return outputs
132 |
133 |
134 | # model = HivemindSwarm(
135 | # dim=512,
136 | # max_seq_len=1024,
137 | # num_tokens=20000,
138 | # depth=6,
139 | # heads=8,
140 | # dim_head=64,
141 | # num_models=4,
142 | # )
143 |
144 | # x = torch.randn(1, 1024, 512)
145 | # y = model(x)
146 | # print(y.shape)
147 |
--------------------------------------------------------------------------------
/swarms_torch/structs/ma_agent.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import gym
5 |
6 |
7 | class MAgent:
8 | class Agent(nn.Module):
9 | def __init__(self, input_dim, output_dim):
10 | super().__init__()
11 | self.policy = nn.Sequential(
12 | nn.Linear(input_dim, 128),
13 | nn.ReLU(),
14 | nn.Linear(128, output_dim),
15 | nn.Softmax(dim=-1),
16 | )
17 |
18 | def forward(self, state):
19 | return self.policy(state)
20 |
21 | class MultiGymEnvironment:
22 | def __init__(self, env_name, num_agents):
23 | self.envs = [gym.make(env_name) for _ in range(num_agents)]
24 | self.agents = [
25 | MAgent.Agent(
26 | self.envs[0].observation_space.shape[0],
27 | self.envs[0].action_space.n,
28 | )
29 | for _ in range(num_agents)
30 | ]
31 | self.optimizers = [
32 | optim.Adam(agent.parameters()) for agent in self.agents
33 | ]
34 |
35 | def step(self, agent_actions):
36 | rewards = []
37 | for env, action in zip(self.envs, agent_actions):
38 | _, reward, _, _ = env.step(action)
39 | rewards.append(reward)
40 | return rewards
41 |
42 | def get_states(self):
43 | states = [env.reset() for env in self.envs]
44 | return states
45 |
46 | def train(self, epochs=1000):
47 | for epoch in range(epochs):
48 | states = self.get_states()
49 | actions = [
50 | torch.argmax(agent(torch.FloatTensor(state))).item()
51 | for agent, state in zip(self.agents, states)
52 | ]
53 | rewards = self.step(actions)
54 |
55 | for agent, optimizer, reward in zip(
56 | self.agents, self.optimizers, rewards
57 | ):
58 | loss = (
59 | -torch.log(agent(torch.FloatTensor(states))) * reward
60 | ) # Example loss function
61 | optimizer.zero_grad()
62 | loss.backward()
63 | optimizer.step()
64 |
--------------------------------------------------------------------------------
/swarms_torch/structs/mas_model.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Any
2 | import torch
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | from loguru import logger
6 |
7 | # Set up logger
8 | logger.add("masi_log.log", rotation="500 MB")
9 |
10 | # Define device
11 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12 | logger.info(f"Using device: {device}")
13 |
14 |
15 | # Agent Base Class
16 | class Agent(nn.Module):
17 | def __init__(self):
18 | super(Agent, self).__init__()
19 |
20 | def forward(self, x: torch.Tensor) -> torch.Tensor:
21 | raise NotImplementedError
22 |
23 | # def backward(self, loss: torch.Tensor) -> None:
24 | # loss.backward()
25 |
26 | def update_parameters(
27 | self, shared_gradients: Dict[str, torch.Tensor]
28 | ) -> None:
29 | with torch.no_grad():
30 | for name, param in self.named_parameters():
31 | if param.grad is not None:
32 | param.grad = shared_gradients[name]
33 | self.optimizer.step()
34 | self.optimizer.zero_grad()
35 |
36 |
37 | # MLP Agent
38 | class MLPAgent(Agent):
39 | def __init__(self, input_size: int, hidden_size: int, output_size: int):
40 | super(MLPAgent, self).__init__()
41 | self.model = nn.Sequential(
42 | nn.Flatten(), # Add this line to flatten the input
43 | nn.Linear(input_size, hidden_size),
44 | nn.ReLU(),
45 | nn.Linear(hidden_size, output_size),
46 | )
47 | self.to(device)
48 | self.optimizer = optim.Adam(self.parameters(), lr=0.001)
49 |
50 | def forward(self, x: torch.Tensor) -> torch.Tensor:
51 | logger.debug(f"MLPAgent input shape: {x.shape}")
52 | output = self.model(x)
53 | logger.debug(f"MLPAgent output shape: {output.shape}")
54 | return output
55 |
56 |
57 | # CNN Agent
58 | class CNNAgent(Agent):
59 | def __init__(self, input_channels: int, num_classes: int):
60 | super(CNNAgent, self).__init__()
61 | self.model = nn.Sequential(
62 | nn.Conv2d(input_channels, 16, kernel_size=3, padding=1),
63 | nn.ReLU(),
64 | nn.Flatten(),
65 | nn.Linear(16 * 28 * 28, num_classes),
66 | )
67 | self.to(device)
68 | self.optimizer = optim.Adam(self.parameters(), lr=0.001)
69 |
70 | def forward(self, x: torch.Tensor) -> torch.Tensor:
71 | logger.debug(f"CNNAgent input shape: {x.shape}")
72 | output = self.model(x)
73 | logger.debug(f"CNNAgent output shape: {output.shape}")
74 | return output
75 |
76 |
77 | # LSTM Agent
78 | class LSTMAgent(Agent):
79 | def __init__(self, input_size: int, hidden_size: int, output_size: int):
80 | super(LSTMAgent, self).__init__()
81 | self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
82 | self.fc = nn.Linear(hidden_size, output_size)
83 | self.to(device)
84 | self.optimizer = optim.Adam(self.parameters(), lr=0.001)
85 |
86 | def forward(self, x: torch.Tensor) -> torch.Tensor:
87 | logger.debug(f"LSTMAgent input shape: {x.shape}")
88 | # Reshape input: (batch, channels, height, width) -> (batch, height, width * channels)
89 | x = x.view(x.size(0), x.size(2), -1)
90 | lstm_out, _ = self.lstm(x)
91 | output = self.fc(lstm_out[:, -1, :])
92 | logger.debug(f"LSTMAgent output shape: {output.shape}")
93 | return output
94 |
95 |
96 | # Transformer Agent
97 | class TransformerAgent(Agent):
98 | def __init__(
99 | self, input_size: int, num_heads: int, num_layers: int, output_size: int
100 | ):
101 | super(TransformerAgent, self).__init__()
102 | self.embedding = nn.Linear(input_size, 128)
103 | encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=num_heads)
104 | self.transformer_encoder = nn.TransformerEncoder(
105 | encoder_layer, num_layers=num_layers
106 | )
107 | self.fc = nn.Linear(128, output_size)
108 | self.to(device)
109 | self.optimizer = optim.Adam(self.parameters(), lr=0.001)
110 |
111 | def forward(self, x: torch.Tensor) -> torch.Tensor:
112 | logger.debug(f"TransformerAgent input shape: {x.shape}")
113 | # Reshape input: (batch, channels, height, width) -> (batch, height, width * channels)
114 | x = x.view(x.size(0), x.size(2), -1)
115 | x = self.embedding(x)
116 | x = x.permute(1, 0, 2) # (sequence_length, batch_size, embedding_dim)
117 | transformer_out = self.transformer_encoder(x)
118 | transformer_out = transformer_out.permute(
119 | 1, 0, 2
120 | ) # Back to (batch_size, sequence_length, embedding_dim)
121 | output = self.fc(transformer_out[:, -1, :])
122 | logger.debug(f"TransformerAgent output shape: {output.shape}")
123 | return output
124 |
125 |
126 | # Multi-Architecture Swarm Intelligence (MASI) class
127 | class MultiArchitectureSwarm(nn.Module):
128 | def __init__(
129 | self,
130 | num_mlp_agents: int,
131 | num_cnn_agents: int,
132 | num_lstm_agents: int,
133 | num_transformer_agents: int,
134 | input_sizes: Dict[str, Any],
135 | output_size: int,
136 | ):
137 | super(MultiArchitectureSwarm, self).__init__()
138 |
139 | self.agents: List[Agent] = []
140 |
141 | # Initialize MLP Agents
142 | for _ in range(num_mlp_agents):
143 | agent = MLPAgent(
144 | input_size=input_sizes["mlp"]["input_size"],
145 | hidden_size=input_sizes["mlp"]["hidden_size"],
146 | output_size=output_size,
147 | )
148 | self.agents.append(agent)
149 |
150 | # Initialize CNN Agents
151 | for _ in range(num_cnn_agents):
152 | agent = CNNAgent(
153 | input_channels=input_sizes["cnn"]["input_channels"],
154 | num_classes=output_size,
155 | )
156 | self.agents.append(agent)
157 |
158 | # Initialize LSTM Agents
159 | for _ in range(num_lstm_agents):
160 | agent = LSTMAgent(
161 | input_size=input_sizes["lstm"]["input_size"],
162 | hidden_size=input_sizes["lstm"]["hidden_size"],
163 | output_size=output_size,
164 | )
165 | self.agents.append(agent)
166 |
167 | # Initialize Transformer Agents
168 | for _ in range(num_transformer_agents):
169 | agent = TransformerAgent(
170 | input_size=input_sizes["transformer"]["input_size"],
171 | num_heads=input_sizes["transformer"]["num_heads"],
172 | num_layers=input_sizes["transformer"]["num_layers"],
173 | output_size=output_size,
174 | )
175 | self.agents.append(agent)
176 |
177 | logger.info(f"Initialized {len(self.agents)} agents.")
178 |
179 | def forward(self, x: torch.Tensor) -> torch.Tensor:
180 | agent_outputs = []
181 |
182 | for agent in self.agents:
183 | agent_output = agent(x)
184 | agent_outputs.append(agent_output)
185 |
186 | # Aggregate outputs (Simple averaging for now)
187 | global_output = self.aggregate_agent_outputs(agent_outputs)
188 |
189 | return global_output
190 |
191 | def aggregate_agent_outputs(
192 | self, agent_outputs: List[torch.Tensor]
193 | ) -> torch.Tensor:
194 | # Stack outputs and calculate mean
195 | logger.debug(f"Aggregating outputs from {len(agent_outputs)} agents.")
196 | stacked_outputs = torch.stack(agent_outputs)
197 | logger.debug(f"Stacked outputs shape: {stacked_outputs.shape}")
198 | global_output = torch.mean(stacked_outputs, dim=0)
199 | logger.debug(f"Global output shape: {global_output.shape}")
200 | return global_output
201 |
--------------------------------------------------------------------------------
/swarms_torch/structs/mixtral_expert.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class SwiGLU(nn.Module):
7 | def __init__(self, input_dim, hidden_dim, output_dim):
8 | super(SwiGLU, self).__init__()
9 | self.fc1 = nn.Linear(input_dim, hidden_dim)
10 | self.fc2 = nn.Linear(hidden_dim, output_dim)
11 |
12 | def forward(self, x):
13 | return self.fc2(F.silu(self.fc1(x)))
14 |
15 |
16 | class TopKGate(nn.Module):
17 | def __init__(self, model_dim, num_experts, top_k):
18 | super(TopKGate, self).__init__()
19 | self.w_gate = nn.Linear(model_dim, num_experts)
20 | self.top_k = top_k
21 |
22 | def forward(self, x):
23 | gate_logits = self.w_gate(x)
24 | top_logits, top_indices = torch.topk(gate_logits, self.top_k, dim=-1)
25 | top_k_logits = torch.full_like(gate_logits, float("-inf"))
26 | top_k_logits.scatter_(1, top_indices, top_logits)
27 | return F.softmax(top_k_logits, dim=-1)
28 |
29 |
30 | class MoE(nn.Module):
31 | def __init__(self, model_dim, hidden_dim, num_experts, top_k):
32 | super(MoE, self).__init__()
33 | self.experts = nn.ModuleList(
34 | [
35 | SwiGLU(model_dim, hidden_dim, model_dim)
36 | for _ in range(num_experts)
37 | ]
38 | )
39 | self.gate = TopKGate(model_dim, num_experts, top_k)
40 |
41 | def forward(self, x):
42 | gate_scores = self.gate(x)
43 | expert_outputs = torch.stack(
44 | [expert(x) for expert in self.experts], dim=2
45 | )
46 | weighted_expert_outputs = gate_scores.unsqueeze(-1) * expert_outputs
47 | return weighted_expert_outputs.sum(dim=2)
48 |
49 |
50 | # Model architecture parameters
51 | model_dim = 4096
52 | n_layers = 32
53 | head_dim = 128
54 | hidden_dim = 14336
55 | n_heads = 32
56 | context_len = 32768
57 | vocab_size = 32000
58 | num_experts = 8
59 | top_k_experts = 2
60 |
61 | # Create a single MoE layer as a demonstration
62 | moe_layer = MoE(model_dim, hidden_dim, num_experts, top_k_experts)
63 |
64 | # Example input tensor
65 | x = torch.rand(1, context_len, model_dim) # (batch_size, seq_len, model_dim)
66 |
67 | # Forward pass through the MoE layer
68 | output = moe_layer(x)
69 |
70 | print(output)
71 |
--------------------------------------------------------------------------------
/swarms_torch/structs/mixture_of_mamba.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn, Tensor
3 | from zeta.nn import MambaBlock
4 |
5 |
6 | def router(
7 | x: Tensor,
8 | k: int,
9 | largest: bool = True,
10 | experts: nn.ModuleList = None,
11 | limit_of_experts: int = None,
12 | dropout_on: bool = False,
13 | dropout_p: float = 0.2,
14 | dim: int = -1,
15 | *args,
16 | **kwargs,
17 | ):
18 | # If experts is None, then we use the default topk function
19 | topk = torch.topk(x, k, largest=largest, *args, **kwargs)
20 |
21 | # Adaptive log softmax with loss
22 | # softmax = nn.LogSoftmax(dim)
23 | # topk = softmax(x)
24 |
25 | # Dropout
26 | if dropout_on:
27 | dropout = nn.Dropout(dropout_p)
28 | topk = dropout(topk)
29 |
30 | # If limit_of_experts is not None, then we only send the topk to the
31 | # experts. This is useful when we want to limit the number of experts
32 | # that we send the topk to.
33 | if limit_of_experts is not None:
34 | experts = experts[:limit_of_experts]
35 |
36 | # Send the topk to the experts
37 | if experts is not None:
38 | topk = [expert(topk) for expert in experts]
39 | return topk
40 |
41 |
42 | class MixtureOfMambas(nn.Module):
43 | """
44 | Mixtures of Mamba is a swarm of Mamba models. The swarm can be aggregated
45 | using a weighted average or a simple average. We plan to add more aggregation
46 | methods in the future like a gating mechanism or a neural network or a
47 | transformer.
48 |
49 | Args:
50 | num_mambas (int): _description_
51 | dim (int): _description_
52 | d_state (int): _description_
53 | d_conv (_type_): _description_
54 | expand (int): _description_
55 | fusion_method (str, optional): _description_. Defaults to "average".
56 |
57 | Example::
58 | >>> model = MixtureOfMambas(
59 | ... num_mambas=2,
60 | ... dim=512,
61 | ... d_state=1024,
62 | ... depth=4,
63 | ... d_conv=1024,
64 | ... expand=4,
65 | ... fusion_method="average",
66 | ... )
67 | >>> x = torch.rand(1, 512, 512)
68 | >>> model(x).shape
69 | torch.Size([1, 512, 512])
70 | """
71 |
72 | def __init__(
73 | self,
74 | num_mambas: int,
75 | dim: int,
76 | d_state: int,
77 | depth: int,
78 | d_conv,
79 | expand: int,
80 | fusion_method: str = "average",
81 | custom_fusion_func: callable = None,
82 | *args,
83 | **kwargs,
84 | ):
85 | super(MixtureOfMambas, self).__init__()
86 | self.num_mambas = num_mambas
87 | self.dim = dim
88 | self.d_state = d_state
89 | self.depth = depth
90 | self.d_conv = d_conv
91 | self.expand = expand
92 | self.fusion_method = fusion_method
93 | self.custom_fusion_func = custom_fusion_func
94 |
95 | self.models = nn.ModuleList()
96 | for _ in range(num_mambas):
97 | mamba_model = MambaBlock(
98 | dim, depth, d_state, expand, d_conv, *args, **kwargs
99 | )
100 | self.models.append(mamba_model)
101 |
102 | def forward(self, x: torch.Tensor, weights=None):
103 | """Forward pass of the swarm
104 |
105 | Args:
106 | x (torch.Tensor): _description_
107 | weights (_type_, optional): _description_. Defaults to None.
108 |
109 | Raises:
110 | ValueError: _description_
111 |
112 | Returns:
113 | _type_: _description_
114 | """
115 | outputs = [model(x) for model in self.models]
116 |
117 | if self.fusion_method == "average":
118 | return self.average_aggregate(outputs)
119 | elif self.fusion_method == "weighted":
120 | return self.weighted_aggregate(outputs, weights)
121 | elif self.fusion_method == "absmax":
122 | return self.absmax_aggregate(outputs, weights)
123 | elif self.fusion_method == "softmax":
124 | return self.softmax_aggregate(outputs, weights)
125 | elif self.fusion_method == "custom":
126 | if self.custom_fusion_func is None:
127 | raise ValueError(
128 | "custom_fusion_func must be provided if fusion_method is"
129 | " custom"
130 | )
131 | return self.custom_fusion_func(outputs, weights)
132 | else:
133 | raise ValueError(
134 | f"Unknown aggregation method: {self.fusion_method}"
135 | )
136 |
137 | def average_aggregate(self, outputs):
138 | """Average the outputs of the models in the swarm
139 |
140 | Args:
141 | outputs (_type_): _description_
142 |
143 | Returns:
144 | _type_: _description_
145 | """
146 | return torch.mean(torch.stack(outputs), dim=0)
147 |
148 | def weighted_aggegrate(self, outputs, weights):
149 | """Weighted average the outputs of the models in the swarm
150 |
151 | Args:
152 | outputs (_type_): _description_
153 | weights (_type_): _description_
154 |
155 | Raises:
156 | ValueError: _description_
157 |
158 | Returns:
159 | _type_: _description_
160 | """
161 | if weights is None or len(weights) != len(outputs):
162 | raise ValueError("Weights must be the same length as outputs")
163 | weighted_outputs = [
164 | weight * output for weight, output in zip(weights, outputs)
165 | ]
166 | return sum(weighted_outputs)
167 |
168 | def softmax_aggregate(self, outputs, weights):
169 | """Weighted average the outputs of the models in the swarm
170 |
171 | Args:
172 | outputs (_type_): _description_
173 | weights (_type_): _description_
174 |
175 | Raises:
176 | ValueError: _description_
177 |
178 | Returns:
179 | _type_: _description_
180 | """
181 | # if weights is None or len(weights) != len(outputs):
182 | # raise ValueError("Weights must be the same length as outputs")
183 | if weights:
184 | weighted_outputs = [
185 | weight * output for weight, output in zip(weights, outputs)
186 | ]
187 | out = sum(weighted_outputs)
188 | out = torch.softmax(out, dim=1)
189 | else:
190 | out = torch.softmax(outputs, dim=1)
191 |
192 | return out
193 |
194 | def absmax(self, outputs):
195 | """Absolute maximum of the outputs of the models in the swarm
196 |
197 | Args:
198 | outputs (_type_): _description_
199 |
200 | Returns:
201 | _type_: _description_
202 | """
203 | # Absolute maximum of the outputs of the models in the swarm
204 | return torch.max(torch.abs(torch.stack(outputs)), dim=0)[0]
205 |
206 | def absmax_aggregate(self, outputs, weights=None):
207 | """
208 | Weighted average the outputs of the models in the swarm
209 |
210 | Args:
211 | outputs (_type_): _description_
212 | weights (_type_): _description_
213 |
214 | Raises:
215 | ValueError: _description_
216 |
217 | Returns:
218 | _type_: _description_
219 | """
220 | # if weights is not None or len(weights) != len(outputs):
221 | # raise ValueError("Weights must be the same length as outputs")
222 |
223 | if weights:
224 | weighted_outputs = [
225 | weight * output for weight, output in zip(weights, outputs)
226 | ]
227 | return self.absmax(weighted_outputs)
228 | else:
229 | return self.absmax(outputs)
230 |
--------------------------------------------------------------------------------
/swarms_torch/structs/neuronal_transformer.py:
--------------------------------------------------------------------------------
1 | """
2 | Cellular neural network
3 |
4 | Architecture:
5 | - Input -> Linear -> ReLU -> Linear -> ReLU -> Output
6 | - Neuron states are updated after each synapse
7 | - Softmax is applied after each synapse
8 | - Layer normalization is applied after each synapse
9 |
10 | """
11 |
12 | import torch
13 | from torch import nn
14 |
15 |
16 | class TransformerLayer(nn.Module):
17 | """
18 | Transformer Layer
19 |
20 | Architecture:
21 | - Input -> Linear -> ReLU -> Linear -> ReLU -> Output
22 |
23 | Args:
24 | input_dim (int): Input dimension
25 | output_dim (int): Output dimension
26 |
27 | Returns:
28 | torch.Tensor: Output tensor
29 |
30 | Usage
31 |
32 | """
33 |
34 | def __init__(
35 | self,
36 | input_dim,
37 | output_dim,
38 | nhead: int,
39 | ):
40 | super(TransformerLayer, self).__init__()
41 | self.transformer = nn.TransformerEncoderLayer(
42 | d_model=input_dim,
43 | nhead=nhead,
44 | )
45 | self.fc = nn.Linear(input_dim, output_dim)
46 |
47 | def forward(self, x):
48 | return self.fc(self.transformer(x))
49 |
50 |
51 | class Neuron(nn.Module):
52 | def __init__(self, num_states):
53 | super(Neuron, self).__init__()
54 | self.states = nn.Parameter(torch.randn(num_states))
55 |
56 |
57 | class SynapseTransformer(nn.Module):
58 | def __init__(self, input_dim, output_dim, nhead: int):
59 | super(SynapseTransformer, self).__init__()
60 | self.transformer = TransformerLayer(input_dim, output_dim, nhead)
61 |
62 | def forward(self, x):
63 | return self.transformer(x)
64 |
65 |
66 | class NNTransformer(nn.Module):
67 | """
68 | Neural Network NNTransformer
69 |
70 | Args:
71 | neuron_count (int): Number of neurons
72 | num_states (int): Number of states
73 | input_dim (int): Input dimension
74 | output_dim (int): Output dimension
75 | nhead (int): Number of heads in transformer layer
76 |
77 | Returns:
78 | torch.Tensor: Output tensor
79 |
80 | Architecture:
81 | - Input -> Linear -> ReLU -> Linear -> ReLU -> Output
82 | - Neuron states are updated after each synapse
83 | - Softmax is applied after each synapse
84 | - Layer normalization is applied after each synapse
85 |
86 | Usage:
87 | network = CellularNN(5, 10, 10, 10, 2)
88 | output = network(torch.randn(1, 10))
89 | print(output)
90 |
91 |
92 | Training:
93 | network = NNTransformer(5, 10, 10, 10, 2)
94 | output = network(torch.randn(1, 10))
95 | print(output)
96 |
97 |
98 | # Test the network
99 | import torch
100 | import torch.optim as optim
101 | import torch.nn.functional as F
102 |
103 | # Random dataset
104 | batch_size = 64
105 | input_size = 10
106 | output_size = 10
107 |
108 | x = torch.randn(batch_size, input_size) # Random inputs
109 | y = torch.randn(batch_size, output_size) # Random targets
110 |
111 | # Hyperparameters
112 | neuron_count = 5
113 | num_states = 10
114 | input_dim = input_size
115 | output_dim = output_size
116 | n_head = 2
117 |
118 | # Initialize the network
119 | network = CellularNN(neuron_count, num_states, input_dim, output_dim, n_head)
120 |
121 | # Define the loss function and optimizer
122 | criterion = nn.MSELoss()
123 | optimizer = optim.Adam(network.parameters(), lr=0.001)
124 |
125 | # Training loop
126 | epochs = 1000
127 | for epoch in range(epochs):
128 | # Forward pass
129 | outputs = network(x)
130 |
131 | # Compute loss
132 | loss = criterion(outputs, y)
133 |
134 | # Backward pass and optimization
135 | optimizer.zero_grad()
136 | loss.backward()
137 | optimizer.step()
138 |
139 | # Print loss every 100 epochs
140 | if (epoch+1) % 100 == 0:
141 | print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
142 |
143 | # Test the network with a new random input
144 | test_input = torch.randn(1, input_size)
145 | test_output = network(test_input)
146 | print(test_output)
147 |
148 |
149 | """
150 |
151 | def __init__(self, neuron_count, num_states, input_dim, output_dim, nhead):
152 | super(NNTransformer, self).__init__()
153 |
154 | # Initialize neurons and synapses
155 | self.neurons = nn.ModuleList(
156 | [Neuron(num_states) for _ in range(neuron_count)]
157 | )
158 | self.synapses = nn.ModuleList(
159 | [
160 | SynapseTransformer(input_dim, output_dim, nhead)
161 | for _ in range(neuron_count)
162 | ]
163 | )
164 |
165 | self.norm = nn.LayerNorm(output_dim)
166 | self.softmax = nn.Softmax(dim=1)
167 |
168 | def forward(self, x):
169 | for neuron, synapse in zip(self.neurons[:-1], self.synapses):
170 | # norm before synapse
171 | x = self.norm(x)
172 |
173 | # synapse
174 | x = synapse(x)
175 |
176 | # softmax after synapse
177 | x = self.softmax(x)
178 |
179 | neuron.states.data = x
180 | return self.neurons[-1].states
181 |
--------------------------------------------------------------------------------
/swarms_torch/structs/parallel_wrapper.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from typing import List
4 |
5 |
6 | class ParallelSwarm(nn.Module):
7 | def __init__(
8 | self,
9 | models: List[nn.Module],
10 | ):
11 | """
12 | Initializes a parallel swarm of models.
13 |
14 | Args:
15 | models (List[nn.Module]): A list of PyTorch models.
16 |
17 | """
18 | super().__init__()
19 | self.models = models
20 |
21 | for model in models:
22 | self.model = model
23 |
24 | def forward(self, x: torch.Tensor, *args, **kwargs):
25 | """Forward pass of the swarm
26 |
27 | Args:
28 | x (torch.Tensor): _description_
29 | weights (_type_, optional): _description_. Defaults to None.
30 |
31 | Returns:
32 | torch.Tensor: _description_
33 | """
34 | outputs = []
35 | for model in self.models:
36 | outputs.append(model(x, *args, **kwargs))
37 | return outputs
38 |
--------------------------------------------------------------------------------
/swarms_torch/structs/queen_bee.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class QueenBeeGa(nn.Module):
6 | """
7 | Queen Bee evolution for genetic algos
8 |
9 | Inspired by the evolution of bees, the fittest solution is designated
10 | and the rest of the population contends to mate with it.
11 |
12 | The strong exploitation is balanced by a higher than a normal mutation rate.
13 |
14 | Reference:
15 | ---------
16 | https://www.researchgate.net/publication/228961729_A_Queen_Bee_GA_for_optimization
17 |
18 | Usage
19 | -----
20 | optimizer = QueenBeeGa(
21 | goal="Attention is all you need",
22 | pop_size=100,
23 | mutation_prob=0.04,
24 | strong_mutation_rate=0.1,
25 | strong_mutation_prob=0.25,
26 | num_tournament_participants=25
27 | )
28 | optimizer.run(max_generations=100)
29 | """
30 |
31 | def __init__(
32 | self,
33 | goal: str = "Attention is all you need",
34 | pop_size: int = 100,
35 | mutation_prob: float = 0.04,
36 | strong_mutation_rate: float = 0.1,
37 | strong_mutation_prob: float = 0.25,
38 | num_tournament_participants: int = 25,
39 | ):
40 | """
41 | QueenBeeGa with params and initial configs
42 |
43 | Parameters
44 | ----------
45 | goal: str
46 | The goal string to be optimized
47 | pop_size: int
48 | Number of ants
49 | mutation_prob: float
50 | Mutation rate
51 | strong_mutation_rate: float
52 | Strong mutation rate
53 | strong_mutation_prob: float
54 | Strong mutation probability
55 | num_tournament_participants: int
56 | Number of tournament participants
57 | """
58 | self.goal = goal
59 | self.pop_size = pop_size
60 | self.mutation_prob = mutation_prob
61 | self.strong_mutation_rate = strong_mutation_rate
62 | self.strong_mutation_prob = strong_mutation_prob
63 | self.num_tournament_participants = num_tournament_participants
64 |
65 | self.gene_length = len(goal)
66 | self.gene_midpoint = self.gene_length // 2
67 | self.target_gene = self.encode(goal)
68 |
69 | self.strong_mutate_pool_size = strong_mutation_rate * pop_size
70 | self.num_code_mutate = mutation_prob * self.gene_length
71 | self.strong_num_code_mutate = strong_mutation_prob * self.gene_length
72 |
73 | self.pool = torch.randint(0, 255, (pop_size, self.gene_length))
74 | self.queen = None
75 | self.queen_fitness = None
76 | self.generation = 0
77 |
78 | @staticmethod
79 | def encode(s):
80 | """Convert string to it's values"""
81 | return torch.tensor([ord(c) for c in s])
82 |
83 | @staticmethod
84 | def decode(t):
85 | """Convert ASCII values tensor back to string"""
86 | return "".join([chr(i) for i in t.tolist()])
87 |
88 | def run(self, max_generations: int = 1000):
89 | """
90 | Run the queen genertic algorithm evolution
91 |
92 | Parameters:
93 | -----------
94 | max_generations: int
95 | Maximum number of generations
96 | """
97 | for _ in range(max_generations):
98 | self.generation += 1
99 | print(f"Generation: {self.generation}")
100 | self._evolve()
101 | if self._check_convergence():
102 | pass
103 |
104 | def _evolve(self):
105 | """
106 | Execute one step of the evolution process.
107 | """
108 |
109 | # Sort population by fitness
110 | fitnesses = 1.0 / torch.square(self.pool - self.target_gene).sum(dim=-1)
111 | indices = fitnesses.sort(descending=True).indices
112 | self.pool, fitnesses = self.pool[indices], fitnesses[indices]
113 |
114 | # Display every generation
115 | if self.queen is not None:
116 | print("queen:")
117 | print(
118 | f"{self.decode(self.queen)} ({self.queen_fitness.item():.3f})\n"
119 | )
120 | for gene, fitness in zip(self.pool, fitnesses):
121 | print(f"{self.decode(gene)} ({fitness.item():.3f})")
122 |
123 | # If one of the children has a better fitness than queen, that child becomes the new queen
124 | # and the queen replaces the worst bee in the population
125 | if self.queen is not None and self.queen_fitness < fitnesses[0]:
126 | self.pool = torch.cat((self.pool, self.queen[None, :]), dim=0)
127 | fitnesses = torch.cat((fitnesses, self.queen_fitness[None]), dim=0)
128 | self.queen = self.queen_fitness = None
129 |
130 | # Separate the queen bee from the rest of the population
131 | if self.queen is None:
132 | self.queen, self.pool = self.pool[0], self.pool[1:]
133 | self.queen_fitness, fitnesses = fitnesses[0], fitnesses[1:]
134 |
135 | # Deterministic tournament selection
136 | contender_ids = torch.randn(
137 | (self.pop_size - 1, self.pop_size - 1)
138 | ).argsort(dim=-1)[..., : self.num_tournament_participants]
139 | participants, tournaments = (
140 | self.pool[contender_ids],
141 | fitnesses[contender_ids],
142 | )
143 | top_winner = tournaments.topk(
144 | 1, dim=-1, largest=True, sorted=False
145 | ).indices
146 | top_winner = top_winner.unsqueeze(-1).expand(-1, -1, self.gene_length)
147 | parents = participants.gather(1, top_winner).squeeze(1)
148 |
149 | # Cross over all chosen drones with the queen
150 | queen_parents = self.queen.unsqueeze(0).expand(
151 | self.pop_size - 1, self.gene_length
152 | )
153 | self.pool = torch.cat(
154 | (
155 | queen_parents[:, : self.gene_midpoint],
156 | parents[:, self.gene_midpoint :],
157 | ),
158 | dim=-1,
159 | )
160 |
161 | # Mutate genes in population
162 | mutate_mask = (
163 | torch.randn(self.pool.shape).argsort(dim=-1) < self.num_code_mutate
164 | )
165 | noise = torch.randint(0, 2, self.pool.shape) * 2 - 1
166 | mutated_pool = torch.where(mutate_mask, self.pool + noise, self.pool)
167 |
168 | strong_mutate_mask = (
169 | torch.randn(self.pool.shape).argsort(dim=-1)
170 | < self.strong_num_code_mutate
171 | )
172 | noise = torch.randint(0, 2, self.pool.shape) * 2 - 1
173 | strong_mutated_pool = torch.where(
174 | strong_mutate_mask, self.pool + noise, self.pool
175 | )
176 |
177 | strong_mutate_pool_mask = (
178 | torch.randn(self.pop_size - 1).argsort(dim=-1)
179 | < self.strong_mutate_pool_size
180 | )
181 | self.pool = torch.where(
182 | strong_mutate_pool_mask[:, None], strong_mutated_pool, mutated_pool
183 | )
184 | self.pool.clamp_(0, 255)
185 |
186 | def _check_convergence(self):
187 | """
188 | Check if any of the solutions has achieved the goal
189 | """
190 | fitnesses = 1.0 / torch.square(self.pool - self.target_gene).sum(dim=-1)
191 | return (fitnesses == float("inf")).any().item()
192 |
193 |
194 | # # Usage:
195 | # optimizer = QueenBeeGa()
196 | # optimizer.run(max_generations=100)
197 |
--------------------------------------------------------------------------------
/swarms_torch/structs/simple_moe.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn, Tensor
3 | import torch.nn.functional as F
4 |
5 |
6 | class FeedForward(nn.Module):
7 | """
8 | Simple FeedForward module.
9 |
10 | Args:
11 | dim (int): Input dimension
12 | hidden_dim (int): Hidden dimension
13 | mult (int): Multiplier for hidden dimension
14 | dropout (float): Dropout rate
15 | """
16 |
17 | def __init__(
18 | self,
19 | dim: int,
20 | hidden_dim: int = None,
21 | mult: int = 4,
22 | dropout: float = 0.0,
23 | *args,
24 | **kwargs,
25 | ):
26 | super().__init__()
27 | hidden_dim = hidden_dim or dim * mult
28 |
29 | self.net = nn.Sequential(
30 | nn.Linear(dim, hidden_dim),
31 | nn.GELU(),
32 | nn.Dropout(dropout),
33 | nn.Linear(hidden_dim, dim),
34 | nn.Dropout(dropout),
35 | )
36 |
37 | def forward(self, x: Tensor) -> Tensor:
38 | return self.net(x)
39 |
40 |
41 | class GatingMechanism(nn.Module):
42 | def __init__(
43 | self,
44 | dim: int,
45 | num_experts: int,
46 | ):
47 | """
48 | GatingMechanism is a class that represents the gating mechanism in a mixture of experts model.
49 |
50 | Args:
51 | dim (int): The input dimension.
52 | num_experts (int): The number of experts in the mixture.
53 |
54 | """
55 | super().__init__()
56 | self.gate = nn.Linear(dim, num_experts)
57 |
58 | def forward(self, x: Tensor):
59 | """
60 | Forward pass of the gating mechanism.
61 |
62 | Args:
63 | x (Tensor): The input tensor.
64 |
65 | Returns:
66 | Tensor: The output tensor after applying the gating mechanism.
67 |
68 | """
69 | return F.softmax(self.gate(x), dim=-1)
70 |
71 |
72 | class SimpleMoE(nn.Module):
73 | """
74 | Simple Mixture of Experts (MoE) model.
75 |
76 | Args:
77 | dim (int): Input dimension.
78 | hidden_dim (int): Hidden dimension of the feedforward network.
79 | output_dim (int): Output dimension.
80 | num_experts (int): Number of experts in the MoE.
81 | mult (int, optional): Multiplier for the hidden dimension. Defaults to 4.
82 | """
83 |
84 | def __init__(
85 | self,
86 | dim,
87 | hidden_dim: int,
88 | output_dim: int,
89 | num_experts: int,
90 | mult: int = 4,
91 | ):
92 | super().__init__()
93 | self.dim = dim
94 | self.hidden_dim = hidden_dim
95 | self.output_dim = output_dim
96 | self.num_experts = num_experts
97 | self.mult = mult
98 |
99 | self.experts = nn.ModuleList(
100 | [FeedForward(dim, dim, mult) for _ in range(num_experts)]
101 | )
102 | self.gate = GatingMechanism(dim, num_experts)
103 |
104 | def forward(self, x: Tensor):
105 | """
106 | Forward pass of the SimpleMoE model.
107 |
108 | Args:
109 | x (Tensor): Input tensor of shape (batch_size, sequence_length, input_dim).
110 |
111 | Returns:
112 | Tensor: Output tensor of shape (batch_size, sequence_length, output_dim).
113 | """
114 | gating_scores = self.gate(x)
115 |
116 | expert_outputs = torch.stack(
117 | [expert(x) for expert in self.experts], dim=-1
118 | )
119 |
120 | output = torch.sum(gating_scores.unsqueeze(2) * expert_outputs, dim=-1)
121 |
122 | return output
123 |
--------------------------------------------------------------------------------
/swarms_torch/structs/switch_moe.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn, Tensor
3 | import torch.nn.functional as F
4 |
5 |
6 | class FeedForward(nn.Module):
7 | """
8 | Simple FeedForward module.
9 |
10 | Args:
11 | dim (int): Input dimension
12 | hidden_dim (int): Hidden dimension
13 | mult (int): Multiplier for hidden dimension
14 | dropout (float): Dropout rate
15 | """
16 |
17 | def __init__(
18 | self,
19 | dim: int,
20 | hidden_dim: int = None,
21 | mult: int = 4,
22 | dropout: float = 0.0,
23 | *args,
24 | **kwargs,
25 | ):
26 | super().__init__()
27 | hidden_dim = hidden_dim or dim * mult
28 |
29 | self.net = nn.Sequential(
30 | nn.Linear(dim, hidden_dim),
31 | nn.GELU(),
32 | nn.Dropout(dropout),
33 | nn.Linear(hidden_dim, dim),
34 | nn.Dropout(dropout),
35 | )
36 |
37 | def forward(self, x: Tensor) -> Tensor:
38 | return self.net(x)
39 |
40 |
41 | class SwitchGate(nn.Module):
42 | """
43 | SwitchGate module for MoE (Mixture of Experts) model.
44 |
45 | Args:
46 | dim (int): Input dimension.
47 | num_experts (int): Number of experts.
48 | capacity_factor (float, optional): Capacity factor for sparsity. Defaults to 1.0.
49 | *args: Variable length argument list.
50 | **kwargs: Arbitrary keyword arguments.
51 | """
52 |
53 | def __init__(
54 | self,
55 | dim,
56 | num_experts: int,
57 | capacity_factor: float = 1.0,
58 | epsilon: float = 1e-6,
59 | *args,
60 | **kwargs,
61 | ):
62 | super().__init__()
63 | self.dim = dim
64 | self.num_experts = num_experts
65 | self.capacity_factor = capacity_factor
66 | self.epsilon = epsilon
67 | self.w_gate = nn.Linear(dim, num_experts)
68 |
69 | def forward(self, x: Tensor, use_aux_loss=False):
70 | """
71 | Forward pass of the SwitchGate module.
72 |
73 | Args:
74 | x (Tensor): Input tensor.
75 |
76 | Returns:
77 | Tensor: Gate scores.
78 | """
79 | # Compute gate scores
80 | gate_scores = F.softmax(self.w_gate(x), dim=-1)
81 |
82 | # Determine the top-1 expert for each token
83 | capacity = int(self.capacity_factor * x.size(0))
84 |
85 | top_k_scores, top_k_indices = gate_scores.topk(1, dim=-1)
86 |
87 | # Mask to enforce sparsity
88 | mask = torch.zeros_like(gate_scores).scatter_(1, top_k_indices, 1)
89 |
90 | # Combine gating scores with the mask
91 | masked_gate_scores = gate_scores * mask
92 |
93 | # Denominators
94 | denominators = masked_gate_scores.sum(0, keepdim=True) + self.epsilon
95 |
96 | # Norm gate scores to sum to the capacity
97 | gate_scores = (masked_gate_scores / denominators) * capacity
98 |
99 | if use_aux_loss:
100 | # Calculate load balancing loss
101 | # Both metrics should be per-expert (sum over batch dimension)
102 | load = gate_scores.sum(
103 | 0
104 | ) # Sum over all examples - shape: (num_experts,)
105 | importance = gate_scores.mean(
106 | 0
107 | ) # Mean over all examples - shape: (num_experts,)
108 |
109 | # Aux loss encourages load balancing between experts
110 | # Using coefficient from Switch Transformer paper
111 | loss = self.num_experts * ((load * importance).sum())
112 |
113 | return gate_scores, loss
114 |
115 | return gate_scores, None
116 |
117 |
118 | class SwitchMoE(nn.Module):
119 | """
120 | A module that implements the Switched Mixture of Experts (MoE) architecture.
121 |
122 | Args:
123 | dim (int): The input dimension.
124 | hidden_dim (int): The hidden dimension of the feedforward network.
125 | output_dim (int): The output dimension.
126 | num_experts (int): The number of experts in the MoE.
127 | capacity_factor (float, optional): The capacity factor that controls the capacity of the MoE. Defaults to 1.0.
128 | mult (int, optional): The multiplier for the hidden dimension of the feedforward network. Defaults to 4.
129 | *args: Variable length argument list.
130 | **kwargs: Arbitrary keyword arguments.
131 |
132 | Attributes:
133 | dim (int): The input dimension.
134 | hidden_dim (int): The hidden dimension of the feedforward network.
135 | output_dim (int): The output dimension.
136 | num_experts (int): The number of experts in the MoE.
137 | capacity_factor (float): The capacity factor that controls the capacity of the MoE.
138 | mult (int): The multiplier for the hidden dimension of the feedforward network.
139 | experts (nn.ModuleList): The list of feedforward networks representing the experts.
140 | gate (SwitchGate): The switch gate module.
141 |
142 | """
143 |
144 | def __init__(
145 | self,
146 | dim: int,
147 | hidden_dim: int,
148 | output_dim: int,
149 | num_experts: int,
150 | capacity_factor: float = 1.0,
151 | mult: int = 4,
152 | use_aux_loss: bool = False,
153 | *args,
154 | **kwargs,
155 | ):
156 | super().__init__()
157 | self.dim = dim
158 | self.hidden_dim = hidden_dim
159 | self.output_dim = output_dim
160 | self.num_experts = num_experts
161 | self.capacity_factor = capacity_factor
162 | self.mult = mult
163 | self.use_aux_loss = use_aux_loss
164 |
165 | self.experts = nn.ModuleList(
166 | [
167 | FeedForward(dim, dim, mult, *args, **kwargs)
168 | for _ in range(num_experts)
169 | ]
170 | )
171 |
172 | self.gate = SwitchGate(
173 | dim,
174 | num_experts,
175 | capacity_factor,
176 | )
177 |
178 | def forward(self, x: Tensor):
179 | """
180 | Forward pass of the SwitchMoE module.
181 |
182 | Args:
183 | x (Tensor): The input tensor.
184 |
185 | Returns:
186 | Tensor: The output tensor of the MoE.
187 |
188 | """
189 | # (batch_size, seq_len, num_experts)
190 | gate_scores, loss = self.gate(x, use_aux_loss=self.use_aux_loss)
191 |
192 | # Dispatch to experts
193 | expert_outputs = [expert(x) for expert in self.experts]
194 |
195 | # Check if any gate scores are nan and handle
196 | if torch.isnan(gate_scores).any():
197 | print("NaN in gate scores")
198 | gate_scores[torch.isnan(gate_scores)] = 0
199 |
200 | # Stack and weight outputs
201 | stacked_expert_outputs = torch.stack(
202 | expert_outputs, dim=-1
203 | ) # (batch_size, seq_len, output_dim, num_experts)
204 | if torch.isnan(stacked_expert_outputs).any():
205 | stacked_expert_outputs[torch.isnan(stacked_expert_outputs)] = 0
206 |
207 | # Combine expert outputs and gating scores
208 | moe_output = torch.sum(
209 | gate_scores.unsqueeze(-2) * stacked_expert_outputs, dim=-1
210 | )
211 |
212 | return moe_output, loss
213 |
--------------------------------------------------------------------------------
/swarms_torch/swarmalators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/swarms-pytorch/0fa9adeb7da3e33ec53dba65b985c606a7ff558a/swarms_torch/swarmalators/__init__.py
--------------------------------------------------------------------------------
/swarms_torch/swarmalators/swarmalator_base.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def pairwise_distances(x):
5 | # Compute pairwise distance matrix
6 | diff = x.unsqueeze(1) - x.unsqueeze(0)
7 | return torch.sqrt((diff**2).sum(2))
8 |
9 |
10 | def function_for_x(
11 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
12 | ):
13 | dists = pairwise_distances(xi)
14 | mask = (dists < R).float() - torch.eye(N)
15 |
16 | interaction_term = mask.unsqueeze(2) * (
17 | sigma_i.unsqueeze(0) - sigma_i.unsqueeze(1)
18 | )
19 | interaction_sum = interaction_term.sum(1)
20 |
21 | # Define dynamics for x based on our assumptions
22 | dx = J * interaction_sum + alpha * xi - beta * (xi**3)
23 | return dx
24 |
25 |
26 | def function_for_sigma(
27 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
28 | ):
29 | dists = pairwise_distances(xi)
30 | mask = (dists < R).float() - torch.eye(N)
31 |
32 | interaction_term = mask.unsqueeze(2) * (xi.unsqueeze(0) - xi.unsqueeze(1))
33 | interaction_sum = interaction_term.sum(1)
34 |
35 | # Define dynamics for sigma based on our assumptions
36 | d_sigma = (
37 | gamma * interaction_sum + epsilon_a * sigma_i - epsilon_r * (sigma_i**3)
38 | )
39 | return d_sigma
40 |
41 |
42 | def simulate_swarmalators(
43 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=100, dt=0.1
44 | ):
45 | xi = 2 * torch.rand(N, 3) - 1
46 | sigma_i = torch.nn.functional.normalize(torch.randn(N, D), dim=1)
47 |
48 | results_xi = []
49 | results_sigma_i = []
50 |
51 | for t in range(T):
52 | for i in range(N):
53 | dx = function_for_x(
54 | xi,
55 | sigma_i,
56 | N,
57 | J,
58 | alpha,
59 | beta,
60 | gamma,
61 | epsilon_a,
62 | epsilon_r,
63 | R,
64 | D,
65 | )
66 | d_sigma = function_for_sigma(
67 | xi,
68 | sigma_i,
69 | N,
70 | J,
71 | alpha,
72 | beta,
73 | gamma,
74 | epsilon_a,
75 | epsilon_r,
76 | R,
77 | D,
78 | )
79 |
80 | # RK4 for xi
81 | k1_x = dt * dx
82 | k2_x = dt * function_for_x(
83 | xi + 0.5 * k1_x,
84 | sigma_i,
85 | N,
86 | J,
87 | alpha,
88 | beta,
89 | gamma,
90 | epsilon_a,
91 | epsilon_r,
92 | R,
93 | D,
94 | )
95 | k3_x = dt * function_for_x(
96 | xi + 0.5 * k2_x,
97 | sigma_i,
98 | N,
99 | J,
100 | alpha,
101 | beta,
102 | gamma,
103 | epsilon_a,
104 | epsilon_r,
105 | R,
106 | D,
107 | )
108 | k4_x = dt * function_for_x(
109 | xi + k3_x,
110 | sigma_i,
111 | N,
112 | J,
113 | alpha,
114 | beta,
115 | gamma,
116 | epsilon_a,
117 | epsilon_r,
118 | R,
119 | D,
120 | )
121 | xi = xi + (1 / 6) * (k1_x + 2 * k2_x + 2 * k3_x + k4_x)
122 |
123 | # RK4 for sigma_i
124 | k1_sigma = dt * d_sigma
125 | k2_sigma = dt * function_for_sigma(
126 | xi,
127 | sigma_i + 0.5 * k1_sigma,
128 | N,
129 | J,
130 | alpha,
131 | beta,
132 | gamma,
133 | epsilon_a,
134 | epsilon_r,
135 | R,
136 | D,
137 | )
138 | k3_sigma = dt * function_for_sigma(
139 | xi,
140 | sigma_i + 0.5 * k2_sigma,
141 | N,
142 | J,
143 | alpha,
144 | beta,
145 | gamma,
146 | epsilon_a,
147 | epsilon_r,
148 | R,
149 | D,
150 | )
151 | k4_sigma = dt * function_for_sigma(
152 | xi,
153 | sigma_i + k3_sigma,
154 | N,
155 | J,
156 | alpha,
157 | beta,
158 | gamma,
159 | epsilon_a,
160 | epsilon_r,
161 | R,
162 | D,
163 | )
164 | sigma_i = sigma_i + (1 / 6) * (
165 | k1_sigma + 2 * k2_sigma + 2 * k3_sigma + k4_sigma
166 | )
167 | sigma_i = torch.nn.functional.normalize(sigma_i, dim=1)
168 |
169 | results_xi.append(xi.clone())
170 | results_sigma_i.append(sigma_i.clone())
171 |
172 | return results_xi, results_sigma_i
173 |
--------------------------------------------------------------------------------
/swarms_torch/swarmalators/swarmalator_transformer.py:
--------------------------------------------------------------------------------
1 | """
2 | Swarmalators with transformer models, SUPER EXPERIMENTAL, NEEDS WORK
3 | """
4 |
5 | import torch
6 | from torch import nn
7 |
8 |
9 | class SwarmalatorModel(nn.Module):
10 | """
11 | # Example
12 | N = 100 # number of swarmalators
13 | D = 3 # dimensions
14 |
15 | model = SwarmalatorModel(N, D)
16 | positions, orientations = model()
17 | print(positions, orientations)
18 | """
19 |
20 | def __init__(
21 | self, N, D, nhead=8, num_encoder_layers=6, num_decoder_layers=6
22 | ):
23 | super(SwarmalatorModel, self).__init__()
24 | self.N = N
25 | self.D = D
26 |
27 | self.positions = nn.Parameter(torch.randn(N, D))
28 | self.orientations = nn.Parameter(torch.randn(N, D))
29 |
30 | # Transformer encoder to process positions and orientations
31 | encoder_layer = nn.TransformerEncoderLayer(d_model=D, nhead=nhead)
32 | self.transformer_encoder = nn.TransformerEncoder(
33 | encoder_layer, num_layers=num_encoder_layers
34 | )
35 |
36 | # Transformer decoder to produce updated positions and orientations
37 | decoder_layer = nn.TransformerDecoderLayer(d_model=D, nhead=nhead)
38 | self.transformer_decoder = nn.TransformerDecoder(
39 | decoder_layer, num_layers=num_decoder_layers
40 | )
41 |
42 | def forward(self, src_mask=None, tgt_mask=None, memory_mask=None):
43 | # Using transformer encoder to get memory
44 | position_memory = self.transformer_encoder(
45 | self.positions.unsqueeze(1), mask=src_mask
46 | )
47 | orientation_memory = self.transformer_encoder(
48 | self.orientations.unsqueeze(1), mask=src_mask
49 | )
50 | # Using transformer decoder to get updated positions and orientations
51 | updated_positions = self.transformer_decoder(
52 | self.positions.unsqueeze(1),
53 | position_memory,
54 | tgt_mask=tgt_mask,
55 | memory_mask=memory_mask,
56 | )
57 | updated_orientations = self.transformer_decoder(
58 | self.orientations.unsqueeze(1),
59 | orientation_memory,
60 | tgt_mask=tgt_mask,
61 | memory_mask=memory_mask,
62 | )
63 |
64 | return updated_positions.squeeze(1), updated_orientations.squeeze(1)
65 |
--------------------------------------------------------------------------------
/swarms_torch/swarmalators/swarmalator_visualize.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.animation import FuncAnimation
3 | from swarms_torch.swarmalators.swarmalator_base import simulate_swarmalators
4 |
5 | # Example usage:
6 | N = 100
7 | J, alpha, beta, gamma, epsilon_a, epsilon_r, R = [0.1] * 7
8 | D = 3 # Ensure D is an integer
9 | xi, sigma_i = simulate_swarmalators(
10 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
11 | )
12 | print(xi[-1], sigma_i[-1])
13 |
14 |
15 | def visualize_swarmalators(results_xi):
16 | fig = plt.figure(figsize=(8, 8))
17 | ax = fig.add_subplot(111, projection="3d")
18 |
19 | ax.set_xlim(-2, 2)
20 | ax.set_ylim(-2, 2)
21 | ax.set_zlim(-2, 2)
22 |
23 | # Initialize the scatter plot
24 | scatter = ax.scatter([], [], [])
25 |
26 | def init():
27 | scatter._offsets3d = ([], [], [])
28 | return (scatter,)
29 |
30 | def update(num):
31 | ax.view_init(30, 0.3 * num)
32 | x_data, y_data, z_data = results_xi[num].t()
33 | scatter._offsets3d = (x_data, y_data, z_data)
34 | return (scatter,)
35 |
36 | FuncAnimation(
37 | fig, update, frames=len(results_xi), init_func=init, blit=False
38 | )
39 |
40 | plt.show()
41 |
42 |
43 | # # Call the visualization function
44 | # visualize_swarmalators(xi)
45 |
--------------------------------------------------------------------------------
/swarms_torch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kyegomez/swarms-pytorch/0fa9adeb7da3e33ec53dba65b985c606a7ff558a/swarms_torch/utils/__init__.py
--------------------------------------------------------------------------------
/swarms_torch/utils/particle_swarm.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class ParticleSwarmOptimization:
5 | """
6 | Particle Swarm Optimization
7 | Overview: https://en.wikipedia.org/wiki/Particle_swarm_optimization
8 |
9 | How does it work?
10 | 1. Initialize particles with random positions and velocities
11 | 2. For each particle, compute the fitness value
12 | 3. Update the personal best and global best
13 | 4. Update the velocity and position of each particle
14 | 5. Repeat step 2 to 4 until the maximum number of iterations is reached
15 |
16 |
17 |
18 | Parameters
19 | ----------
20 | goal: str
21 | The goal string to be optimized
22 | n_particles: int
23 | Number of particles
24 | inertia: float
25 | Inertia weight
26 | personal_best_weight: float
27 | Personal best weight
28 | global_best_weight: float
29 | Global best weight
30 |
31 | Usage
32 | -----
33 | pso = ParticleSwarmOptimization(goal="Attention is all you need", n_particles=100)
34 | pso.optimize(iterations=1000)
35 |
36 |
37 | Future Improvements
38 | -------------------
39 | 1. Add a stopping criterion
40 | 2. Add a callback function to track the progress
41 | 3. Add a function to plot the fitness value
42 | 4. Add a function to plot the particles
43 | 5. Add a function to plot the velocity
44 | 6. Add a function to plot the position
45 | 7. Add a function to plot the personal best
46 | 8. Add a function to plot the global best
47 | 9. Add a function to plot the personal best weight
48 | 10. Add a function to plot the global best weight
49 |
50 |
51 |
52 | """
53 |
54 | def __init__(
55 | self,
56 | goal: str = None,
57 | n_particles: int = 100,
58 | inertia: float = 0.5,
59 | personal_best_weight: float = 1.5,
60 | global_best_weight: float = 1.5,
61 | dim: int = 1,
62 | ):
63 | self.goal = torch.tensor([ord(c) for c in goal])
64 | self.n_particles = n_particles
65 | self.inertia = inertia
66 | self.personal_best_weight = personal_best_weight
67 | self.global_best_weight = global_best_weight
68 |
69 | self.particles = torch.randint(0, 255, (n_particles, len(goal)))
70 | self.velocities = torch.zeros((n_particles, len(goal)))
71 |
72 | self.personal_best = self.particles.clone()
73 | self.global_best = self.particles[0].clone()
74 |
75 | def compute_fitness(
76 | self,
77 | particle,
78 | ):
79 | return 1.0 / (1.0 + torch.norm((particle - self.goal).float()))
80 |
81 | def update(
82 | self,
83 | ):
84 | """Update the particles"""
85 | for i in range(self.n_particles):
86 | fitness = self.compute_fitness(
87 | self.particles[i],
88 | )
89 |
90 | personal_best_fitness = self.compute_fitness(
91 | self.personal_best[i],
92 | )
93 |
94 | if fitness > personal_best_fitness:
95 | self.personal_best[i] = self.particles[i]
96 |
97 | global_best_fitness = self.compute_fitness(self.global_best)
98 |
99 | if fitness > global_best_fitness:
100 | self.global_best = self.particles[i]
101 |
102 | # update velocity
103 | personal_attraction = (
104 | self.personal_best_weight
105 | * torch.rand(self.goal.size())
106 | * (self.personal_best[i] - self.particles[i])
107 | )
108 |
109 | global_attraction = (
110 | self.global_best_weight
111 | * torch.rand(self.goal.size())
112 | * (self.global_best - self.particles[i])
113 | )
114 |
115 | self.velocities[i] = (
116 | self.inertia * self.velocities[i]
117 | + personal_attraction
118 | + global_attraction
119 | )
120 |
121 | # Update position
122 | self.particles[i] += self.velocities[i].int()
123 | self.particles[i].clamp_(0, 255)
124 |
125 | def optimize(
126 | self,
127 | iterations: int = 1000,
128 | ):
129 | """Optimize the goal string"""
130 | for _ in range(iterations):
131 | self.update()
132 | best_particle = self.global_best
133 | print(
134 | "Best Particle: ", "".join([chr(int(i)) for i in best_particle])
135 | )
136 |
--------------------------------------------------------------------------------
/swarms_torch/utils/sakana.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | def fish(dim: int, mult: int = 4):
5 | return nn.Sequential(
6 | nn.Linear(dim, dim * mult),
7 | nn.Softplus(),
8 | nn.Dropout(0.1),
9 | nn.LayerNorm(dim * mult),
10 | nn.Softmax(dim=-1), # change this line
11 | nn.Linear(dim * mult, dim),
12 | )
13 |
--------------------------------------------------------------------------------
/swarms_torch/utils/spiral_optimization.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class SPO(nn.Module):
6 | """
7 | Spiral Optimization (SPO) Algorithm in PyTorch.
8 |
9 | Implements the SPO algorithm for optimization towards a target string.
10 |
11 | How does it work?
12 | ----------
13 | 1. Initialize the search points randomly
14 | 2. Initialize the center randomly
15 | 3. Update the search points based on the spiral model
16 | 4. Find the best search point and set as the new center
17 | 5. Repeat step 3 to 4 until the maximum number of iterations is reached
18 |
19 | Usage
20 | -----
21 | from swarms_torch import SPO
22 |
23 | goaling = "Hello SPO"
24 | spo = SPO(goaling, m=100, k_max=1000)
25 | spo.optimize()
26 |
27 | print("Best Matched String:", spo.best_string())
28 |
29 | Future Features to implement
30 | --------
31 | 1. Add a stopping criterion
32 | 2. Add a callback function to track the progress
33 | 3. Add a function to plot the search points
34 | 4. Add a function to plot the best solution
35 |
36 | """
37 |
38 | def __init__(self, goal: str = None, m: int = 10, k_max: int = 1000):
39 | """
40 | Initialize the SPO class.
41 |
42 | Args:
43 | - goal: The target string.
44 | - m: Number of search points (strings).
45 | - k_max: Maximum number of iterations.
46 | """
47 | self.goal = torch.tensor(
48 | [ord(c) for c in goal], dtype=torch.float32
49 | ) # ASCII representation
50 |
51 | self.m = m
52 | self.k_max = k_max
53 | self.n_dim = len(goal)
54 |
55 | # Initializing the search points and center randomly
56 | # Note: 32-126 is the ASCII range for all printable characters
57 | self.points = torch.randint(
58 | 32, 127, (self.m, self.n_dim), dtype=torch.float32
59 | )
60 | self.center = torch.randint(32, 127, (self.n_dim,), dtype=torch.float32)
61 |
62 | def _step_rate(self, k):
63 | """
64 | Define the step rate function.
65 |
66 | Args:
67 | - k: Current iteration.
68 |
69 | Returns: Step rate for the current iteration.
70 | """
71 | return 1 / (1 + k)
72 |
73 | def _update_points(self, k):
74 | """Update the search points based on the spiral model."""
75 | r = self._step_rate(k)
76 | R = torch.eye(self.n_dim) # Identity for simplicity in n-dimensions
77 | for i in range(self.m):
78 | self.points[i] = self.center + r * torch.mv(
79 | R, (self.points[i] - self.center)
80 | )
81 |
82 | def _update_center(self):
83 | """Find the best search point and set as the new center."""
84 | fitnesses = torch.norm(self.points - self.goal, dim=1)
85 | best_idx = torch.argmin(fitnesses)
86 | self.center = self.points[best_idx]
87 |
88 | def optimize(self):
89 | """Run the optimization loop."""
90 | for k in range(self.k_max):
91 | self._update_points(k)
92 | self._update_center()
93 | if (
94 | torch.norm(self.center - self.goal) < 1e-5
95 | ): # Example convergence condition
96 | break
97 |
98 | def best_string(self):
99 | """Convert the best found point to its string representation"""
100 | return "".join([chr(int(c)) for c in self.center.round()])
101 |
102 |
103 | # # Example Usage
104 | # goal = "Attention is all you need"
105 | # optimizer = SPO(goal)
106 | # optimizer.optimize()
107 | # print(f"Optimized String: {optimizer.best_string()}")
108 |
--------------------------------------------------------------------------------
/test_switch_moe_fix.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.structs.switch_moe import SwitchMoE
3 |
4 |
5 | def test_switch_moe_aux_loss():
6 | """Test that SwitchMoE works with auxiliary loss enabled."""
7 |
8 | # Set up test parameters
9 | batch_size = 32
10 | seq_len = 128
11 | dim = 512
12 | num_experts = 8
13 |
14 | # Create model with auxiliary loss enabled
15 | model = SwitchMoE(
16 | dim=dim,
17 | hidden_dim=dim,
18 | output_dim=dim,
19 | num_experts=num_experts,
20 | use_aux_loss=True,
21 | )
22 |
23 | # Create test input
24 | x = torch.randn(batch_size, dim)
25 |
26 | try:
27 | # Forward pass
28 | output, loss = model(x)
29 |
30 | print("✅ Success! No runtime error occurred.")
31 | print(f"Input shape: {x.shape}")
32 | print(f"Output shape: {output.shape}")
33 | print(f"Auxiliary loss: {loss.item() if loss is not None else 'None'}")
34 |
35 | # Verify shapes
36 | assert (
37 | output.shape == x.shape
38 | ), f"Output shape {output.shape} doesn't match input shape {x.shape}"
39 | assert (
40 | loss is not None
41 | ), "Loss should not be None when use_aux_loss=True"
42 | assert torch.isfinite(loss), "Loss should be finite"
43 |
44 | print("✅ All assertions passed!")
45 |
46 | except Exception as e:
47 | print(f"❌ Error occurred: {e}")
48 | raise e
49 |
50 |
51 | if __name__ == "__main__":
52 | test_switch_moe_aux_loss()
53 |
--------------------------------------------------------------------------------
/tests/ant_colony.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import torch
3 |
4 | from swarms_torch import AntColonyOptimization # Import your class
5 |
6 |
7 | class TestAntColonyOptimization(unittest.TestCase):
8 | def setUp(self):
9 | self.aco = AntColonyOptimization(
10 | goal="Hello ACO", num_ants=1000, num_iterations=10
11 | )
12 |
13 | def test_initialization(self):
14 | self.assertEqual(self.aco.goal.tolist(), [ord(c) for c in "Hello ACO"])
15 | self.assertEqual(self.aco.pheromones.size(), torch.Size([1000]))
16 | self.assertEqual(self.aco.pheromones.tolist(), [1.0] * 1000)
17 |
18 | def test_fitness(self):
19 | solution = torch.tensor(
20 | [ord(c) for c in "Hello ACO"], dtype=torch.float32
21 | )
22 | self.assertEqual(
23 | self.aco.fitness(solution).item(), 0
24 | ) # Should be maximum fitness
25 |
26 | def test_update_pheromones(self):
27 | initial_pheromones = self.aco.pheromones.clone()
28 | self.aco.solutions = [
29 | torch.tensor([ord(c) for c in "Hello ACO"], dtype=torch.float32)
30 | for _ in range(1000)
31 | ]
32 | self.aco.update_pheromones()
33 | # After updating, pheromones should not remain the same
34 | self.assertFalse(torch.equal(initial_pheromones, self.aco.pheromones))
35 |
36 | def test_choose_next_path(self):
37 | path = self.aco.choose_next_path()
38 | # Path should be an integer index within the number of ants
39 | self.assertIsInstance(path, int)
40 | self.assertGreaterEqual(path, 0)
41 | self.assertLess(path, 1000)
42 |
43 | def test_optimize(self):
44 | solution = self.aco.optimize()
45 | self.assertIsInstance(solution, str)
46 | # Given enough iterations and ants, the solution should approach the goal. For short runs, this might not hold.
47 | # self.assertEqual(solution, "Hello ACO")
48 |
49 | def test_invalid_parameters(self):
50 | with self.assertRaises(ValueError):
51 | _ = AntColonyOptimization(num_ants=-5)
52 | with self.assertRaises(ValueError):
53 | _ = AntColonyOptimization(evaporation_rate=1.5)
54 |
55 |
56 | if __name__ == "__main__":
57 | unittest.main()
58 |
--------------------------------------------------------------------------------
/tests/cellular_swarm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch import TransformerCell, CellularSwarm
3 |
4 |
5 | def test_transformercell_initialization():
6 | transformercell = TransformerCell(input_dim=512, nhead=8)
7 | assert isinstance(transformercell, TransformerCell)
8 | assert transformercell.neighborhood_size == 3
9 |
10 |
11 | def test_transformercell_forward():
12 | transformercell = TransformerCell(input_dim=512, nhead=8)
13 | x = torch.randn(10, 32, 512)
14 | neighbors = [torch.randn(10, 32, 512)]
15 | output = transformercell(x, neighbors)
16 | assert output.shape == torch.Size([20, 32, 512])
17 |
18 |
19 | def test_cellularswarm_initialization():
20 | cellularswarm = CellularSwarm(cell_count=5, input_dim=512, nhead=8)
21 | assert isinstance(cellularswarm, CellularSwarm)
22 | assert len(cellularswarm.cells) == 5
23 | assert cellularswarm.time_steps == 4
24 |
25 |
26 | def test_cellularswarm_forward():
27 | cellularswarm = CellularSwarm(cell_count=5, input_dim=512, nhead=8)
28 | x = torch.randn(10, 32, 512)
29 | output = cellularswarm(x)
30 | assert output.shape == torch.Size([10, 32, 512])
31 |
--------------------------------------------------------------------------------
/tests/fish_school.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.structs.fish_school import Fish, FishSchool
3 |
4 |
5 | def test_fish_initialization():
6 | fish = Fish(dim=512, heads=8, depth=6)
7 | assert isinstance(fish, Fish)
8 |
9 |
10 | def test_fish_train():
11 | fish = Fish(dim=512, heads=8, depth=6)
12 | src = torch.randn(10, 32, 512)
13 | tgt = torch.randn(10, 32, 512)
14 | labels = torch.randint(0, 512, (10, 32))
15 | fish.train(src, tgt, labels)
16 | assert isinstance(fish.food, float)
17 |
18 |
19 | def test_fishschool_initialization():
20 | fishschool = FishSchool(
21 | num_fish=10, dim=512, heads=8, depth=6, num_iter=100
22 | )
23 | assert isinstance(fishschool, FishSchool)
24 | assert len(fishschool.fish) == 10
25 |
26 |
27 | def test_fishschool_forward():
28 | fishschool = FishSchool(
29 | num_fish=10, dim=512, heads=8, depth=6, num_iter=100
30 | )
31 | src = torch.randn(10, 32, 512)
32 | tgt = torch.randn(10, 32, 512)
33 | labels = torch.randint(0, 512, (10, 32))
34 | fishschool.forward(src, tgt, labels)
35 | assert isinstance(fishschool.fish[0].food, float)
36 |
--------------------------------------------------------------------------------
/tests/neuronal_transformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.structs.neuronal_transformer import (
3 | TransformerLayer,
4 | Neuron,
5 | SynapseTransformer,
6 | NNTransformer,
7 | )
8 |
9 |
10 | def test_transformerlayer_initialization():
11 | transformerlayer = TransformerLayer(input_dim=512, output_dim=256, nhead=8)
12 | assert isinstance(transformerlayer, TransformerLayer)
13 |
14 |
15 | def test_transformerlayer_forward():
16 | transformerlayer = TransformerLayer(input_dim=512, output_dim=256, nhead=8)
17 | x = torch.randn(10, 32, 512)
18 | output = transformerlayer(x)
19 | assert output.shape == torch.Size([10, 32, 256])
20 |
21 |
22 | def test_neuron_initialization():
23 | neuron = Neuron(num_states=10)
24 | assert isinstance(neuron, Neuron)
25 | assert neuron.states.shape == torch.Size([10])
26 |
27 |
28 | def test_synapsetransformer_initialization():
29 | synapsetransformer = SynapseTransformer(
30 | input_dim=512, output_dim=256, nhead=8
31 | )
32 | assert isinstance(synapsetransformer, SynapseTransformer)
33 |
34 |
35 | def test_synapsetransformer_forward():
36 | synapsetransformer = SynapseTransformer(
37 | input_dim=512, output_dim=256, nhead=8
38 | )
39 | x = torch.randn(10, 32, 512)
40 | output = synapsetransformer(x)
41 | assert output.shape == torch.Size([10, 32, 256])
42 |
43 |
44 | def test_nntransformer_initialization():
45 | nntransformer = NNTransformer(
46 | neuron_count=5, num_states=10, input_dim=512, output_dim=256, nhead=8
47 | )
48 | assert isinstance(nntransformer, NNTransformer)
49 | assert len(nntransformer.neurons) == 5
50 | assert len(nntransformer.synapses) == 5
51 |
52 |
53 | def test_nntransformer_forward():
54 | nntransformer = NNTransformer(
55 | neuron_count=5, num_states=10, input_dim=512, output_dim=256, nhead=8
56 | )
57 | x = torch.randn(1, 10)
58 | output = nntransformer(x)
59 | assert output.shape == torch.Size([10])
60 |
--------------------------------------------------------------------------------
/tests/particle_swarm.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import torch
3 |
4 | from swarms_torch import ParticleSwarmOptimization # Import your class here
5 |
6 |
7 | class TestParticleSwarmOptimization(unittest.TestCase):
8 | def setUp(self):
9 | self.pso = ParticleSwarmOptimization(goal="Hello", n_particles=10)
10 |
11 | def test_initialization(self):
12 | self.assertEqual(self.pso.goal.tolist(), [ord(c) for c in "Hello"])
13 | self.assertEqual(self.pso.particles.size(), (10, 5))
14 | self.assertEqual(self.pso.velocities.size(), (10, 5))
15 |
16 | def test_compute_fitness(self):
17 | particle = torch.tensor([ord(c) for c in "Hello"])
18 | fitness = self.pso.compute_fitness(particle)
19 | self.assertEqual(fitness.item(), 1.0)
20 |
21 | def test_update(self):
22 | initial_particle = self.pso.particles.clone()
23 | self.pso.update()
24 | # After updating, particles should not remain the same (in most cases)
25 | self.assertFalse(torch.equal(initial_particle, self.pso.particles))
26 |
27 | def test_optimize(self):
28 | initial_best_particle = self.pso.global_best.clone()
29 | self.pso.optimize(iterations=10)
30 | # After optimization, global best should be closer to the goal
31 | initial_distance = torch.norm(
32 | (initial_best_particle - self.pso.goal).float()
33 | ).item()
34 | final_distance = torch.norm(
35 | (self.pso.global_best - self.pso.goal).float()
36 | ).item()
37 | self.assertLess(final_distance, initial_distance)
38 |
39 |
40 | if __name__ == "__main__":
41 | unittest.main()
42 |
--------------------------------------------------------------------------------
/tests/queen_bee.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import torch
3 | from swarms_torch.structs.queen_bee import QueenBeeGa # Import the class
4 |
5 |
6 | class TestQueenBeeGa(unittest.TestCase):
7 | def setUp(self):
8 | self.optimizer = QueenBeeGa(goal="Hello QBGA", pop_size=50)
9 |
10 | def test_initialization(self):
11 | self.assertEqual(self.optimizer.goal, "Hello QBGA")
12 | self.assertEqual(self.optimizer.gene_length, len("Hello QBGA"))
13 | self.assertIsNone(self.optimizer.queen)
14 | self.assertIsNone(self.optimizer.queen_fitness)
15 |
16 | def test_encode_decode(self):
17 | encoded = QueenBeeGa.encode("Hello")
18 | decoded = QueenBeeGa.decode(encoded)
19 | self.assertEqual(decoded, "Hello")
20 |
21 | def test_evolution(self):
22 | initial_population = self.optimizer.pool.clone()
23 | self.optimizer._evolve()
24 | self.assertFalse(torch.equal(initial_population, self.optimizer.pool))
25 |
26 | def test_run(self):
27 | initial_population = self.optimizer.pool.clone()
28 | self.optimizer.run(max_generations=10)
29 | self.assertNotEqual(
30 | QueenBeeGa.decode(self.optimizer.queen),
31 | QueenBeeGa.decode(initial_population[0]),
32 | )
33 |
34 | def test_check_convergence(self):
35 | self.optimizer.pool = torch.stack([self.optimizer.target_gene] * 50)
36 | self.assertTrue(self.optimizer._check_convergence())
37 |
38 | def test_invalid_parameters(self):
39 | with self.assertRaises(ValueError):
40 | _ = QueenBeeGa(mutation_prob=1.5)
41 | with self.assertRaises(ValueError):
42 | _ = QueenBeeGa(strong_mutation_rate=-0.5)
43 |
44 |
45 | if __name__ == "__main__":
46 | unittest.main()
47 |
--------------------------------------------------------------------------------
/tests/spiral_optimization.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from swarms_torch.utils.spiral_optimization import SPO
3 |
4 |
5 | def test_spo_initialization():
6 | spo = SPO(goal="Hello SPO", m=100, k_max=1000)
7 | assert isinstance(spo, SPO)
8 | assert spo.goal.shape == torch.Size([9])
9 | assert spo.points.shape == torch.Size([100, 9])
10 | assert spo.center.shape == torch.Size([9])
11 |
12 |
13 | def test_spo_step_rate():
14 | spo = SPO(goal="Hello SPO", m=100, k_max=1000)
15 | step_rate = spo._step_rate(1)
16 | assert step_rate == 0.5
17 |
18 |
19 | def test_spo_update_points():
20 | spo = SPO(goal="Hello SPO", m=100, k_max=1000)
21 | spo._update_points(1)
22 | assert spo.points.shape == torch.Size([100, 9])
23 |
24 |
25 | def test_spo_update_center():
26 | spo = SPO(goal="Hello SPO", m=100, k_max=1000)
27 | spo._update_center()
28 | assert spo.center.shape == torch.Size([9])
29 |
30 |
31 | def test_spo_optimize():
32 | spo = SPO(goal="Hello SPO", m=100, k_max=1000)
33 | spo.optimize()
34 | assert spo.center.shape == torch.Size([9])
35 |
36 |
37 | def test_spo_best_string():
38 | spo = SPO(goal="Hello SPO", m=100, k_max=1000)
39 | spo.optimize()
40 | best_string = spo.best_string()
41 | assert isinstance(best_string, str)
42 | assert len(best_string) == 9
43 |
--------------------------------------------------------------------------------
/tests/swarmalator_base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import pytest
3 | from swarms_torch.swarmalators.swarmalator_base import (
4 | pairwise_distances,
5 | function_for_x,
6 | function_for_sigma,
7 | simulate_swarmalators,
8 | )
9 |
10 | # Define global constants for testing
11 | N = 10
12 | J = 1.0
13 | alpha = 0.1
14 | beta = 0.2
15 | gamma = 0.3
16 | epsilon_a = 0.01
17 | epsilon_r = 0.02
18 | R = 0.5
19 | D = 3
20 | T = 100
21 | dt = 0.1
22 |
23 | # === Test pairwise_distances ===
24 |
25 |
26 | def test_pairwise_distances_shape():
27 | x = torch.randn(N, D)
28 | distances = pairwise_distances(x)
29 | assert distances.shape == (N, N)
30 |
31 |
32 | def test_pairwise_distances_identity():
33 | x = torch.randn(N, D)
34 | distances = pairwise_distances(x)
35 | for i in range(N):
36 | assert distances[i, i] == pytest.approx(0.0, abs=1e-6)
37 |
38 |
39 | def test_pairwise_distances_symmetry():
40 | x = torch.randn(N, D)
41 | distances = pairwise_distances(x)
42 | for i in range(N):
43 | for j in range(i + 1, N):
44 | assert distances[i, j] == pytest.approx(distances[j, i], abs=1e-6)
45 |
46 |
47 | # === Test function_for_x ===
48 |
49 |
50 | def test_function_for_x_shape():
51 | xi = torch.randn(N, D)
52 | sigma_i = torch.randn(N, D)
53 | dx = function_for_x(
54 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
55 | )
56 | assert dx.shape == (N, D)
57 |
58 |
59 | def test_function_for_x_output_range():
60 | xi = torch.randn(N, D)
61 | sigma_i = torch.randn(N, D)
62 | dx = function_for_x(
63 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
64 | )
65 | assert (dx >= -1.0).all() and (dx <= 1.0).all()
66 |
67 |
68 | def test_function_for_x_zero_at_equilibrium():
69 | xi = torch.zeros(N, D)
70 | sigma_i = torch.zeros(N, D)
71 | dx = function_for_x(
72 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
73 | )
74 | assert (dx == 0.0).all()
75 |
76 |
77 | # === Test function_for_sigma ===
78 |
79 |
80 | def test_function_for_sigma_shape():
81 | xi = torch.randn(N, D)
82 | sigma_i = torch.randn(N, D)
83 | d_sigma = function_for_sigma(
84 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
85 | )
86 | assert d_sigma.shape == (N, D)
87 |
88 |
89 | def test_function_for_sigma_output_range():
90 | xi = torch.randn(N, D)
91 | sigma_i = torch.randn(N, D)
92 | d_sigma = function_for_sigma(
93 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
94 | )
95 | assert (d_sigma >= -1.0).all() and (d_sigma <= 1.0).all()
96 |
97 |
98 | def test_function_for_sigma_zero_at_equilibrium():
99 | xi = torch.zeros(N, D)
100 | sigma_i = torch.zeros(N, D)
101 | d_sigma = function_for_sigma(
102 | xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D
103 | )
104 | assert (d_sigma == 0.0).all()
105 |
106 |
107 | # === Test simulate_swarmalators ===
108 |
109 |
110 | def test_simulate_swarmalators_output_shape():
111 | results_xi, results_sigma_i = simulate_swarmalators(
112 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=T, dt=dt
113 | )
114 | assert len(results_xi) == T
115 | assert len(results_sigma_i) == T
116 | assert results_xi[0].shape == (N, D)
117 | assert results_sigma_i[0].shape == (N, D)
118 |
119 |
120 | def test_simulate_swarmalators_convergence():
121 | results_xi, results_sigma_i = simulate_swarmalators(
122 | N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=T, dt=dt
123 | )
124 | for i in range(1, T):
125 | assert torch.allclose(results_xi[i], results_xi[i - 1], atol=1e-6)
126 | assert torch.allclose(
127 | results_sigma_i[i], results_sigma_i[i - 1], atol=1e-6
128 | )
129 |
130 |
131 | def test_simulate_swarmalators_non_zero_initial_condition():
132 | xi = torch.randn(N, D)
133 | sigma_i = torch.randn(N, D)
134 | results_xi, results_sigma_i = simulate_swarmalators(
135 | N,
136 | J,
137 | alpha,
138 | beta,
139 | gamma,
140 | epsilon_a,
141 | epsilon_r,
142 | R,
143 | D,
144 | T=T,
145 | dt=dt,
146 | xi=xi,
147 | sigma_i=sigma_i,
148 | )
149 | assert not torch.allclose(results_xi[0], xi, atol=1e-6)
150 | assert not torch.allclose(results_sigma_i[0], sigma_i, atol=1e-6)
151 |
152 |
153 | # Add more tests as needed...
154 |
--------------------------------------------------------------------------------
/tests/test_mixture_of_mamba.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import torch
3 | from swarms_torch.structs.mixture_of_mamba import MixtureOfMambas
4 |
5 |
6 | @pytest.fixture
7 | def mixture():
8 | num_mambas = 5
9 | dim = 10
10 | d_state = 20
11 | d_conv = 30
12 | expand = 40
13 | return MixtureOfMambas(num_mambas, dim, d_state, d_conv, expand)
14 |
15 |
16 | def test_init(mixture):
17 | assert mixture.num_mambas == 5
18 | assert mixture.dim == 10
19 | assert mixture.d_state == 20
20 | assert mixture.d_conv == 30
21 | assert mixture.expand == 40
22 | assert len(mixture.models) == 5
23 |
24 |
25 | def test_forward_average(mixture):
26 | x = torch.rand((1, 10))
27 | output = mixture.forward(x)
28 | assert output.shape == (1, 10)
29 |
30 |
31 | def test_forward_weighted(mixture):
32 | x = torch.rand((1, 10))
33 | weights = torch.ones(5)
34 | mixture.fusion_method = "weighted"
35 | output = mixture.forward(x, weights)
36 | assert output.shape == (1, 10)
37 |
38 |
39 | def test_forward_invalid_aggregation(mixture):
40 | x = torch.rand((1, 10))
41 | mixture.fusion_method = "invalid"
42 | with pytest.raises(ValueError):
43 | mixture.forward(x)
44 |
45 |
46 | def test_average_aggregate(mixture):
47 | outputs = [torch.rand((1, 10)) for _ in range(5)]
48 | output = mixture.average_aggregate(outputs)
49 | assert output.shape == (1, 10)
50 |
51 |
52 | def test_weighted_aggregate(mixture):
53 | outputs = [torch.rand((1, 10)) for _ in range(5)]
54 | weights = torch.ones(5)
55 | output = mixture.weighted_aggregate(outputs, weights)
56 | assert output.shape == (1, 10)
57 |
58 |
59 | def test_weighted_aggregate_invalid_weights(mixture):
60 | outputs = [torch.rand((1, 10)) for _ in range(5)]
61 | weights = torch.ones(4)
62 | with pytest.raises(ValueError):
63 | mixture.weighted_aggregate(outputs, weights)
64 |
65 |
66 | def test_forward_different_dimensions(mixture):
67 | x = torch.rand((2, 10))
68 | with pytest.raises(ValueError):
69 | mixture.forward(x)
70 |
71 |
72 | def test_forward_no_weights(mixture):
73 | x = torch.rand((1, 10))
74 | mixture.fusion_method = "weighted"
75 | with pytest.raises(ValueError):
76 | mixture.forward(x)
77 |
78 |
79 | def test_forward_extra_weights(mixture):
80 | x = torch.rand((1, 10))
81 | weights = torch.ones(6)
82 | mixture.fusion_method = "weighted"
83 | with pytest.raises(ValueError):
84 | mixture.forward(x, weights)
85 |
--------------------------------------------------------------------------------
/tests/transformer_hive.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import torch
3 | from swarms_torch.structs.hivemind_swarm_transformer import HivemindSwarm
4 |
5 |
6 | # Create a fixture for the HivemindSwarm model
7 | @pytest.fixture
8 | def swarm_model():
9 | return HivemindSwarm(
10 | dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=3
11 | )
12 |
13 |
14 | # Test the basic functionality of HivemindSwarm
15 | def test_hivemind_swarm_forward(swarm_model):
16 | x = torch.randint(0, 20000, (1, 32))
17 | y = swarm_model(x)
18 | assert y.shape == (1, 32, 512)
19 |
20 |
21 | # Test if the swarm consists of the correct number of transformers
22 | def test_num_transformers_in_swarm(swarm_model):
23 | assert len(list(swarm_model.experts)) == 3
24 |
25 |
26 | # Test if the gate mechanism works as expected
27 | def test_gate_mechanism(swarm_model):
28 | x = torch.randint(0, 20000, (1, 32))
29 | outputs = torch.stack([expert(x) for expert in swarm_model.experts], dim=1)
30 | gate = swarm_model.gate_activation(
31 | swarm_model.gate_bias + swarm_model.gate(outputs)
32 | )
33 |
34 | # Check if the gate values sum to 1 along the transformer dimension
35 | assert torch.allclose(gate.sum(dim=-1), torch.ones(1, 3))
36 |
37 |
38 | # Test if the model can handle different input shapes
39 | def test_different_input_shapes(swarm_model):
40 | x1 = torch.randint(0, 20000, (1, 32))
41 | x2 = torch.randint(0, 20000, (1, 16))
42 | y1 = swarm_model(x1)
43 | y2 = swarm_model(x2)
44 | assert y1.shape == (1, 32, 512)
45 | assert y2.shape == (1, 16, 512)
46 |
47 |
48 | # Test if the model can handle different numbers of models in the swarm
49 | def test_different_num_models():
50 | swarm_model_1 = HivemindSwarm(
51 | dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=1
52 | )
53 | swarm_model_2 = HivemindSwarm(
54 | dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=5
55 | )
56 |
57 | x = torch.randint(0, 20000, (1, 32))
58 | y1 = swarm_model_1(x)
59 | y2 = swarm_model_2(x)
60 |
61 | assert y1.shape == (1, 32, 512)
62 | assert y2.shape == (1, 32, 512)
63 |
64 |
65 | # Test if the model works with different configurations
66 | def test_different_configurations():
67 | model_1 = HivemindSwarm(
68 | dim=256, max_seq_len=16, depth=4, heads=4, dim_head=64, num_models=2
69 | )
70 | model_2 = HivemindSwarm(
71 | dim=1024, max_seq_len=64, depth=8, heads=16, dim_head=128, num_models=4
72 | )
73 |
74 | x = torch.randint(0, 20000, (1, 16))
75 | y1 = model_1(x)
76 | y2 = model_2(x)
77 |
78 | assert y1.shape == (1, 16, 256)
79 | assert y2.shape == (1, 16, 1024)
80 |
--------------------------------------------------------------------------------
/tests/transformer_pso.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import DataLoader
3 | from swarms_torch.pso.transformer_pso import (
4 | SimpleTransformer,
5 | TransformerParticleSwarmOptimization,
6 | )
7 |
8 |
9 | def test_simpletransformer_initialization():
10 | simpletransformer = SimpleTransformer(
11 | input_dim=10, d_model=512, nhead=8, num_layers=1, output_dim=2
12 | )
13 | assert isinstance(simpletransformer, SimpleTransformer)
14 |
15 |
16 | def test_simpletransformer_forward():
17 | simpletransformer = SimpleTransformer(
18 | input_dim=10, d_model=512, nhead=8, num_layers=1, output_dim=2
19 | )
20 | x = torch.randint(0, 10, (10, 32))
21 | output = simpletransformer(x)
22 | assert output.shape == torch.Size([32, 2])
23 |
24 |
25 | def test_TransformerParticleSwarmOptimization_initialization():
26 | model_constructor = SimpleTransformer
27 | model_args = (10, 512, 8, 1, 2)
28 | device = "cpu"
29 | criterion = torch.nn.CrossEntropyLoss()
30 | data_loader = DataLoader(
31 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)],
32 | batch_size=32,
33 | )
34 | pso = TransformerParticleSwarmOptimization(
35 | model_constructor, model_args, device, criterion, data_loader
36 | )
37 | assert isinstance(pso, TransformerParticleSwarmOptimization)
38 | assert len(pso.particles) == 10
39 | assert len(pso.velocities) == 10
40 | assert len(pso.personal_best) == 10
41 |
42 |
43 | def test_TransformerParticleSwarmOptimization_compute_fitness():
44 | model_constructor = SimpleTransformer
45 | model_args = (10, 512, 8, 1, 2)
46 | device = "cpu"
47 | criterion = torch.nn.CrossEntropyLoss()
48 | data_loader = DataLoader(
49 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)],
50 | batch_size=32,
51 | )
52 | pso = TransformerParticleSwarmOptimization(
53 | model_constructor, model_args, device, criterion, data_loader
54 | )
55 | fitness = pso.compute_fitness(pso.particles[0].state_dict())
56 | assert isinstance(fitness, float)
57 |
58 |
59 | def test_TransformerParticleSwarmOptimization_update():
60 | model_constructor = SimpleTransformer
61 | model_args = (10, 512, 8, 1, 2)
62 | device = "cpu"
63 | criterion = torch.nn.CrossEntropyLoss()
64 | data_loader = DataLoader(
65 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)],
66 | batch_size=32,
67 | )
68 | pso = TransformerParticleSwarmOptimization(
69 | model_constructor, model_args, device, criterion, data_loader
70 | )
71 | pso.update()
72 | assert len(pso.particles) == 10
73 | assert len(pso.velocities) == 10
74 | assert len(pso.personal_best) == 10
75 |
76 |
77 | def test_TransformerParticleSwarmOptimization_optimize():
78 | model_constructor = SimpleTransformer
79 | model_args = (10, 512, 8, 1, 2)
80 | device = "cpu"
81 | criterion = torch.nn.CrossEntropyLoss()
82 | data_loader = DataLoader(
83 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)],
84 | batch_size=32,
85 | )
86 | pso = TransformerParticleSwarmOptimization(
87 | model_constructor, model_args, device, criterion, data_loader
88 | )
89 | pso.optimize(iterations=10)
90 | assert len(pso.particles) == 10
91 | assert len(pso.velocities) == 10
92 | assert len(pso.personal_best) == 10
93 |
94 |
95 | def test_TransformerParticleSwarmOptimization_get_best_model():
96 | model_constructor = SimpleTransformer
97 | model_args = (10, 512, 8, 1, 2)
98 | device = "cpu"
99 | criterion = torch.nn.CrossEntropyLoss()
100 | data_loader = DataLoader(
101 | [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)],
102 | batch_size=32,
103 | )
104 | pso = TransformerParticleSwarmOptimization(
105 | model_constructor, model_args, device, criterion, data_loader
106 | )
107 | pso.optimize(iterations=10)
108 | best_model = pso.get_best_model()
109 | assert isinstance(best_model, SimpleTransformer)
110 |
--------------------------------------------------------------------------------