├── .github └── workflows │ ├── rust_cd.yml │ └── rust_ci.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── gradient_descent.rs └── scalar_back_propagation.rs └── src ├── layer.rs ├── lib.rs ├── mlp.rs ├── neuron.rs └── value.rs /.github/workflows/rust_cd.yml: -------------------------------------------------------------------------------- 1 | name: Rust CD 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | 10 | jobs: 11 | publish: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Publish 18 | run: cargo publish --token ${{ secrets.CRATES_IO }} 19 | -------------------------------------------------------------------------------- /.github/workflows/rust_ci.yml: -------------------------------------------------------------------------------- 1 | name: Rust CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | 13 | # Added by cargo 14 | 15 | /target 16 | /Cargo.lock 17 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "micrograd-rs" 3 | description = "Rust implementation of Andrej Karpathy's micrograd scalar-valued engine with basic neural network." 4 | authors = ["Daniel Way "] 5 | version = "0.0.3" 6 | edition = "2021" 7 | license = "MIT" 8 | repository = "https://github.com/danielway/micrograd-rs" 9 | exclude = [".github"] 10 | 11 | [dependencies] 12 | rand = "0.8.5" 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Daniel Way 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # micrograd-rs 2 | 3 | [![Crate](https://img.shields.io/crates/v/micrograd-rs.svg)](https://crates.io/crates/micrograd-rs) 4 | [![Rust CI](https://github.com/danielway/micrograd-rs/actions/workflows/rust_ci.yml/badge.svg?branch=master)](https://github.com/danielway/micrograd-rs/actions/workflows/rust_ci.yml) 5 | [![Rust CD](https://github.com/danielway/micrograd-rs/actions/workflows/rust_cd.yml/badge.svg)](https://github.com/danielway/micrograd-rs/actions/workflows/rust_cd.yml) 6 | 7 | A Rust implementation of [Andrej Karpathy's micrograd engine](https://github.com/karpathy/micrograd). Implemented as part of a personal exploration into neural networking with applications like [`danielway/nn-sim`](https://github.com/danielway/nn-sim). 8 | 9 | > A tiny scalar-valued autograd engine and a neural net library on top of it with PyTorch-like API 10 | -------------------------------------------------------------------------------- /examples/gradient_descent.rs: -------------------------------------------------------------------------------- 1 | //! This is a basic example of performing gradient descent with a neural network using micrograd-rs. 2 | 3 | use micrograd_rs::{Value, MLP}; 4 | 5 | fn main() { 6 | let mlp = MLP::new(3, vec![4, 4, 1]); 7 | 8 | let xs = vec![ 9 | vec![2.0, 3.0, -1.0], 10 | vec![3.0, -1.0, 0.5], 11 | vec![0.5, 1.0, 1.0], 12 | vec![1.0, 1.0, -1.0], 13 | ]; 14 | 15 | let ys = vec![1.0, -1.0, -1.0, 1.0]; 16 | 17 | for _ in 0..100 { 18 | // Forward pass 19 | let ypred: Vec = xs 20 | .iter() 21 | .map(|x| mlp.forward(x.iter().map(|x| Value::from(*x)).collect())[0].clone()) 22 | .collect(); 23 | let ypred_floats: Vec = ypred.iter().map(|v| v.data()).collect(); 24 | 25 | // Loss function 26 | let ygt = ys.iter().map(|y| Value::from(*y)); 27 | let loss: Value = ypred 28 | .into_iter() 29 | .zip(ygt) 30 | .map(|(yp, yg)| (yp - yg).pow(&Value::from(2.0))) 31 | .sum(); 32 | 33 | println!("Loss: {} Predictions: {:?}", loss.data(), ypred_floats); 34 | 35 | // Backward pass 36 | mlp.parameters().iter().for_each(|p| p.clear_gradient()); 37 | loss.backward(); 38 | 39 | // Adjustment 40 | mlp.parameters().iter().for_each(|p| p.adjust(-0.05)); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /examples/scalar_back_propagation.rs: -------------------------------------------------------------------------------- 1 | //! This is a basic example of performing back-propagation on a two-input/weight graph. 2 | 3 | use micrograd_rs::Value; 4 | 5 | fn main() { 6 | let x1 = Value::from(2.0).with_label("x1"); 7 | let x1_clone = x1.clone(); 8 | let x2 = Value::from(0.0).with_label("x2"); 9 | 10 | let w1 = Value::from(-3.0).with_label("w1"); 11 | let w2 = Value::from(1.0).with_label("w2"); 12 | 13 | let b = Value::from(6.8813735870195432).with_label("b"); 14 | 15 | let x1w1 = (x1 * w1).with_label("x1w1"); 16 | let x2w2 = (x2 * w2).with_label("x2w2"); 17 | 18 | let x1w1x2w2 = (x1w1 + x2w2).with_label("x1w1x2w2"); 19 | 20 | let n = (x1w1x2w2 + b).with_label("n"); 21 | let o = n.tanh().with_label("o"); 22 | 23 | o.backward(); 24 | 25 | assert_eq!(0.7071, round_to(o.data(), 4.0)); 26 | assert_eq!(-1.5, round_to(x1_clone.gradient(), 3.0)); 27 | println!("{:?}", o); 28 | } 29 | 30 | fn round_to(value: f64, digits: f64) -> f64 { 31 | let ten: f64 = 10.0; 32 | (ten.powf(digits) * value).round() / ten.powf(digits) 33 | } 34 | -------------------------------------------------------------------------------- /src/layer.rs: -------------------------------------------------------------------------------- 1 | use crate::{neuron::Neuron, value::Value}; 2 | 3 | #[derive(Clone)] 4 | pub struct Layer { 5 | neurons: Vec, 6 | } 7 | 8 | impl Layer { 9 | pub fn new(input_count: usize, output_count: usize) -> Layer { 10 | Layer { 11 | neurons: (0..output_count) 12 | .map(|_| Neuron::new(input_count)) 13 | .collect(), 14 | } 15 | } 16 | 17 | pub fn forward(&self, xs: &Vec) -> Vec { 18 | self.neurons.iter().map(|n| n.forward(xs)).collect() 19 | } 20 | 21 | pub fn parameters(&self) -> Vec { 22 | self.neurons.iter().flat_map(|n| n.parameters()).collect() 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | mod value; 2 | pub use crate::value::Value; 3 | 4 | mod neuron; 5 | pub use crate::neuron::Neuron; 6 | 7 | mod layer; 8 | pub use crate::layer::Layer; 9 | 10 | mod mlp; 11 | pub use crate::mlp::MLP; 12 | -------------------------------------------------------------------------------- /src/mlp.rs: -------------------------------------------------------------------------------- 1 | use crate::{layer::Layer, value::Value}; 2 | 3 | #[derive(Clone)] 4 | pub struct MLP { 5 | layers: Vec, 6 | } 7 | 8 | impl MLP { 9 | pub fn new(input_count: usize, output_counts: Vec) -> MLP { 10 | let output_counts_len = output_counts.len(); 11 | let layer_sizes: Vec = [input_count].into_iter().chain(output_counts).collect(); 12 | 13 | MLP { 14 | layers: (0..output_counts_len) 15 | .map(|i| Layer::new(layer_sizes[i], layer_sizes[i + 1])) 16 | .collect(), 17 | } 18 | } 19 | 20 | pub fn forward(&self, mut xs: Vec) -> Vec { 21 | for layer in &self.layers { 22 | xs = layer.forward(&xs); 23 | } 24 | xs 25 | } 26 | 27 | pub fn parameters(&self) -> Vec { 28 | self.layers.iter().flat_map(|l| l.parameters()).collect() 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/neuron.rs: -------------------------------------------------------------------------------- 1 | use crate::value::Value; 2 | 3 | use rand::{thread_rng, Rng}; 4 | 5 | #[derive(Clone)] 6 | pub struct Neuron { 7 | weights: Vec, 8 | bias: Value, 9 | } 10 | 11 | impl Neuron { 12 | pub fn new(input_count: usize) -> Neuron { 13 | let mut rng = thread_rng(); 14 | let mut rand_value_fn = || { 15 | let data = rng.gen_range(-1.0..1.0); 16 | Value::from(data) 17 | }; 18 | 19 | let mut weights = Vec::new(); 20 | for _ in 0..input_count { 21 | weights.push(rand_value_fn()); 22 | } 23 | 24 | Neuron { 25 | weights, 26 | bias: rand_value_fn().with_label("b"), 27 | } 28 | } 29 | 30 | pub fn forward(&self, xs: &Vec) -> Value { 31 | let products = std::iter::zip(&self.weights, xs) 32 | .map(|(a, b)| a * b) 33 | .collect::>(); 34 | 35 | let sum = self.bias.clone() + products.into_iter().reduce(|acc, prd| acc + prd).unwrap(); 36 | sum.tanh() 37 | } 38 | 39 | pub fn parameters(&self) -> Vec { 40 | [self.bias.clone()] 41 | .into_iter() 42 | .chain(self.weights.clone()) 43 | .collect() 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | cell::{Ref, RefCell}, 3 | collections::HashSet, 4 | fmt::Debug, 5 | hash::Hash, 6 | iter::Sum, 7 | ops::{Add, Deref, Mul, Neg, Sub}, 8 | rc::Rc, 9 | }; 10 | 11 | #[derive(Clone, Eq, PartialEq, Debug)] 12 | pub struct Value(Rc>); 13 | 14 | impl Value { 15 | pub fn from(t: T) -> Value 16 | where 17 | T: Into, 18 | { 19 | t.into() 20 | } 21 | 22 | fn new(value: ValueInternal) -> Value { 23 | Value(Rc::new(RefCell::new(value))) 24 | } 25 | 26 | pub fn with_label(self, label: &str) -> Value { 27 | self.borrow_mut().label = Some(label.to_string()); 28 | self 29 | } 30 | 31 | pub fn data(&self) -> f64 { 32 | self.borrow().data 33 | } 34 | 35 | pub fn gradient(&self) -> f64 { 36 | self.borrow().gradient 37 | } 38 | 39 | pub fn clear_gradient(&self) { 40 | self.borrow_mut().gradient = 0.0; 41 | } 42 | 43 | pub fn adjust(&self, factor: f64) { 44 | let mut value = self.borrow_mut(); 45 | value.data += factor * value.gradient; 46 | } 47 | 48 | pub fn pow(&self, other: &Value) -> Value { 49 | let result = self.borrow().data.powf(other.borrow().data); 50 | 51 | let prop_fn: PropagateFn = |value| { 52 | let mut base = value.previous[0].borrow_mut(); 53 | let power = value.previous[1].borrow(); 54 | base.gradient += power.data * (base.data.powf(power.data - 1.0)) * value.gradient; 55 | }; 56 | 57 | Value::new(ValueInternal::new( 58 | result, 59 | None, 60 | Some("^".to_string()), 61 | vec![self.clone(), other.clone()], 62 | Some(prop_fn), 63 | )) 64 | } 65 | 66 | pub fn tanh(&self) -> Value { 67 | let result = self.borrow().data.tanh(); 68 | 69 | let prop_fn: PropagateFn = |value| { 70 | let mut previous = value.previous[0].borrow_mut(); 71 | previous.gradient += (1.0 - value.data.powf(2.0)) * value.gradient; 72 | }; 73 | 74 | Value::new(ValueInternal::new( 75 | result, 76 | None, 77 | Some("tanh".to_string()), 78 | vec![self.clone()], 79 | Some(prop_fn), 80 | )) 81 | } 82 | 83 | pub fn backward(&self) { 84 | let mut visited: HashSet = HashSet::new(); 85 | 86 | self.borrow_mut().gradient = 1.0; 87 | self.backward_internal(&mut visited, self); 88 | } 89 | 90 | fn backward_internal(&self, visited: &mut HashSet, value: &Value) { 91 | if !visited.contains(&value) { 92 | visited.insert(value.clone()); 93 | 94 | let borrowed_value = value.borrow(); 95 | if let Some(prop_fn) = borrowed_value.propagate { 96 | prop_fn(&borrowed_value); 97 | } 98 | 99 | for child_id in &value.borrow().previous { 100 | self.backward_internal(visited, child_id); 101 | } 102 | } 103 | } 104 | } 105 | 106 | impl Hash for Value { 107 | fn hash(&self, state: &mut H) { 108 | self.0.borrow().hash(state); 109 | } 110 | } 111 | 112 | impl Deref for Value { 113 | type Target = Rc>; 114 | 115 | fn deref(&self) -> &Self::Target { 116 | &self.0 117 | } 118 | } 119 | 120 | impl> From for Value { 121 | fn from(t: T) -> Value { 122 | Value::new(ValueInternal::new(t.into(), None, None, Vec::new(), None)) 123 | } 124 | } 125 | 126 | impl Add for Value { 127 | type Output = Value; 128 | 129 | fn add(self, other: Value) -> Self::Output { 130 | add(&self, &other) 131 | } 132 | } 133 | 134 | impl<'a, 'b> Add<&'b Value> for &'a Value { 135 | type Output = Value; 136 | 137 | fn add(self, other: &'b Value) -> Self::Output { 138 | add(self, other) 139 | } 140 | } 141 | 142 | fn add(a: &Value, b: &Value) -> Value { 143 | let result = a.borrow().data + b.borrow().data; 144 | 145 | let prop_fn: PropagateFn = |value| { 146 | let mut first = value.previous[0].borrow_mut(); 147 | let mut second = value.previous[1].borrow_mut(); 148 | 149 | first.gradient += value.gradient; 150 | second.gradient += value.gradient; 151 | }; 152 | 153 | Value::new(ValueInternal::new( 154 | result, 155 | None, 156 | Some("+".to_string()), 157 | vec![a.clone(), b.clone()], 158 | Some(prop_fn), 159 | )) 160 | } 161 | 162 | impl Sub for Value { 163 | type Output = Value; 164 | 165 | fn sub(self, other: Value) -> Self::Output { 166 | add(&self, &(-other)) 167 | } 168 | } 169 | 170 | impl<'a, 'b> Sub<&'b Value> for &'a Value { 171 | type Output = Value; 172 | 173 | fn sub(self, other: &'b Value) -> Self::Output { 174 | add(self, &(-other)) 175 | } 176 | } 177 | 178 | impl Mul for Value { 179 | type Output = Value; 180 | 181 | fn mul(self, other: Value) -> Self::Output { 182 | mul(&self, &other) 183 | } 184 | } 185 | 186 | impl<'a, 'b> Mul<&'b Value> for &'a Value { 187 | type Output = Value; 188 | 189 | fn mul(self, other: &'b Value) -> Self::Output { 190 | mul(self, other) 191 | } 192 | } 193 | 194 | fn mul(a: &Value, b: &Value) -> Value { 195 | let result = a.borrow().data * b.borrow().data; 196 | 197 | let prop_fn: PropagateFn = |value| { 198 | let mut first = value.previous[0].borrow_mut(); 199 | let mut second = value.previous[1].borrow_mut(); 200 | 201 | first.gradient += second.data * value.gradient; 202 | second.gradient += first.data * value.gradient; 203 | }; 204 | 205 | Value::new(ValueInternal::new( 206 | result, 207 | None, 208 | Some("*".to_string()), 209 | vec![a.clone(), b.clone()], 210 | Some(prop_fn), 211 | )) 212 | } 213 | 214 | impl Neg for Value { 215 | type Output = Value; 216 | 217 | fn neg(self) -> Self::Output { 218 | mul(&self, &Value::from(-1)) 219 | } 220 | } 221 | 222 | impl<'a> Neg for &'a Value { 223 | type Output = Value; 224 | 225 | fn neg(self) -> Self::Output { 226 | mul(self, &Value::from(-1)) 227 | } 228 | } 229 | 230 | impl Sum for Value { 231 | fn sum>(mut iter: I) -> Self { 232 | let mut sum = Value::from(0.0); 233 | loop { 234 | let val = iter.next(); 235 | if val.is_none() { 236 | break; 237 | } 238 | 239 | sum = sum + val.unwrap(); 240 | } 241 | sum 242 | } 243 | } 244 | 245 | type PropagateFn = fn(value: &Ref); 246 | 247 | pub struct ValueInternal { 248 | data: f64, 249 | gradient: f64, 250 | label: Option, 251 | operation: Option, 252 | previous: Vec, 253 | propagate: Option, 254 | } 255 | 256 | impl ValueInternal { 257 | fn new( 258 | data: f64, 259 | label: Option, 260 | op: Option, 261 | prev: Vec, 262 | propagate: Option, 263 | ) -> ValueInternal { 264 | ValueInternal { 265 | data, 266 | gradient: 0.0, 267 | label, 268 | operation: op, 269 | previous: prev, 270 | propagate, 271 | } 272 | } 273 | } 274 | 275 | impl PartialEq for ValueInternal { 276 | fn eq(&self, other: &Self) -> bool { 277 | self.data == other.data 278 | && self.gradient == other.gradient 279 | && self.label == other.label 280 | && self.operation == other.operation 281 | && self.previous == other.previous 282 | } 283 | } 284 | 285 | impl Eq for ValueInternal {} 286 | 287 | impl Hash for ValueInternal { 288 | fn hash(&self, state: &mut H) { 289 | self.data.to_bits().hash(state); 290 | self.gradient.to_bits().hash(state); 291 | self.label.hash(state); 292 | self.operation.hash(state); 293 | self.previous.hash(state); 294 | } 295 | } 296 | 297 | impl Debug for ValueInternal { 298 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 299 | f.debug_struct("ValueInternal") 300 | .field("data", &self.data) 301 | .field("gradient", &self.gradient) 302 | .field("label", &self.label) 303 | .field("operation", &self.operation) 304 | .field("previous", &self.previous) 305 | .finish() 306 | } 307 | } 308 | --------------------------------------------------------------------------------