├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.md └── src ├── dist.rs ├── lib.rs └── prob.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "assoc" 7 | version = "0.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "bfdc70193dadb9d7287fa4b633f15f90c876915b31f6af17da307fc59c9859a8" 10 | 11 | [[package]] 12 | name = "either" 13 | version = "1.8.1" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" 16 | 17 | [[package]] 18 | name = "itertools" 19 | version = "0.10.5" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 22 | dependencies = [ 23 | "either", 24 | ] 25 | 26 | [[package]] 27 | name = "porco" 28 | version = "0.1.4" 29 | dependencies = [ 30 | "assoc", 31 | "itertools", 32 | ] 33 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "porco" 3 | version = "0.1.4" 4 | authors = [""] 5 | edition = "2018" 6 | description = "Composable probability distributions" 7 | documentation = "https://docs.rs/porco" 8 | readme = "README.md" 9 | homepage = "https://github.com/mingyli/porco" 10 | repository = "https://github.com/mingyli/porco" 11 | license = "MIT" 12 | 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | 15 | [dependencies] 16 | assoc = "0.1.3" 17 | itertools = "0.10" 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # porco 2 | 3 | [docs.rs](https://docs.rs/porco) 4 | 5 | Composable probability distributions. 6 | 7 | ## Examples 8 | 9 | Create simple probability distributions. 10 | 11 | ```rust 12 | enum Coin { 13 | Heads, 14 | Tails, 15 | } 16 | 17 | impl Coin { 18 | fn flip() -> Distribution { 19 | Distribution::uniform([Coin::Heads, Coin::Tails]) 20 | } 21 | } 22 | 23 | let coin = Coin::flip(); 24 | assert_eq!(coin.pmf(&Coin::Heads), Probability(0.5)); 25 | ``` 26 | 27 | Compose operations over distributions using combinators. 28 | 29 | ```rust 30 | fn reflip_if_tails(coin: Coin) -> Distribution { 31 | match coin { 32 | Coin::Heads => Distribution::always(Coin::Heads), 33 | Coin::Tails => Coin::flip(), 34 | } 35 | } 36 | 37 | let coin = Coin::flip().and_then(reflip_if_tails); 38 | assert_eq!(coin.pmf(&Coin::Heads), Probability(0.75)); 39 | ``` 40 | 41 | Compute summary statistics of random variables. 42 | 43 | ```rust 44 | let die = Distribution::uniform([1, 2, 3, 4, 5, 6]); 45 | let ev = die.given(|&v| v <= 4).expectation(); 46 | assert_eq!(ev, 2.5); 47 | ``` 48 | -------------------------------------------------------------------------------- /src/dist.rs: -------------------------------------------------------------------------------- 1 | use std::iter::FromIterator; 2 | 3 | use assoc::AssocExt; 4 | 5 | use crate::Probability; 6 | 7 | /// [`Distribution`] is a discrete probability distribution over 8 | /// the set of outcomes `T`. 9 | /// 10 | /// See the [module level documentation for an overview](crate). 11 | /// 12 | /// The underlying implementation of a `Distribution` is an associative 13 | /// array `Vec<(T, Probability)>` through the [`assoc`] crate. 14 | #[derive(Debug, Clone, PartialEq)] 15 | pub struct Distribution(Vec<(T, Probability)>); 16 | 17 | impl Distribution 18 | where 19 | T: PartialEq, 20 | { 21 | /// Create a distribution using given outcome probabilities. 22 | /// 23 | /// ```rust 24 | /// # use porco::{Distribution, Probability}; 25 | /// # #[derive(Debug, PartialEq)] 26 | /// # enum Coin { 27 | /// # Heads, 28 | /// # Tails, 29 | /// # } 30 | /// let biased_coin = Distribution::new(vec![ 31 | /// (Coin::Heads, Probability(0.75)), 32 | /// (Coin::Tails, Probability(0.25)), 33 | /// ]); 34 | /// ``` 35 | /// 36 | /// It can be easier to collect an iterator through [`Distribution::from_iter`]: 37 | /// 38 | /// ```rust 39 | /// # use porco::{Distribution, Probability}; 40 | /// # #[derive(Debug, PartialEq)] 41 | /// # enum Coin { 42 | /// # Heads, 43 | /// # Tails, 44 | /// # } 45 | /// let biased_coin: Distribution = vec![ 46 | /// (Coin::Heads, Probability(0.75)), 47 | /// (Coin::Tails, Probability(0.25)), 48 | /// ] 49 | /// .into_iter() 50 | /// .collect(); 51 | /// ``` 52 | pub fn new>(iter: I) -> Distribution { 53 | Distribution(iter.into_iter().collect()).regroup() 54 | } 55 | 56 | /// Create a distribution where the given outcome always occurs. 57 | /// 58 | /// ```rust 59 | /// # use porco::{Distribution, Probability}; 60 | /// # #[derive(Debug, PartialEq)] 61 | /// # enum Coin { 62 | /// # Heads, 63 | /// # Tails, 64 | /// # } 65 | /// let rigged_coin = Distribution::always(Coin::Heads); 66 | /// assert_eq!(rigged_coin.pmf(&Coin::Heads), Probability(1.0)); 67 | /// ``` 68 | pub fn always(t: T) -> Distribution { 69 | Distribution(vec![(t, Probability::ONE)]) 70 | } 71 | 72 | /// Create a uniform distribution over a collection of outcomes. 73 | /// 74 | /// ```rust 75 | /// # use porco::{Distribution, Probability}; 76 | /// # #[derive(Debug, PartialEq)] 77 | /// # enum Coin { 78 | /// # Heads, 79 | /// # Tails, 80 | /// # } 81 | /// let fair_coin = Distribution::uniform(vec![Coin::Heads, Coin::Tails]); 82 | /// assert_eq!(fair_coin.pmf(&Coin::Heads), Probability(0.5)); 83 | /// ``` 84 | pub fn uniform>(iter: I) -> Distribution { 85 | let outcomes: Vec<_> = iter.into_iter().collect(); 86 | let p = Probability(1.0 / outcomes.len() as f64); 87 | Distribution::from_iter(outcomes.into_iter().map(|t| (t, p))) 88 | } 89 | 90 | /// Convert a `Distribution` into a `Distribution` by mapping 91 | /// outcomes in `T` to outcomes in `U`. 92 | /// 93 | /// ``` 94 | /// # use porco::{Distribution, Probability}; 95 | /// # #[derive(Debug, PartialEq)] 96 | /// # enum Coin { 97 | /// # Heads, 98 | /// # Tails, 99 | /// # } 100 | /// let dist = Distribution::uniform(vec![0, 1, 2, 3]).map(|v| { 101 | /// if v == 3 { 102 | /// Coin::Heads 103 | /// } else { 104 | /// Coin::Tails 105 | /// } 106 | /// }); 107 | /// assert_eq!(dist.pmf(&Coin::Heads), Probability(0.25)); 108 | /// assert_eq!(dist.pmf(&Coin::Tails), Probability(0.75)); 109 | /// ``` 110 | pub fn map(self, f: F) -> Distribution 111 | where 112 | U: PartialEq, 113 | F: Fn(T) -> U, 114 | { 115 | Distribution::from_iter(self.0.into_iter().map(|(t, p)| (f(t), p))) 116 | } 117 | 118 | /// Convert a `Distribution` into a `Distribution` by mapping 119 | /// outcomes in `T` to distributions over `U`. 120 | /// 121 | /// ``` 122 | /// # use porco::{Distribution, Probability}; 123 | /// # #[derive(Debug, PartialEq)] 124 | /// # enum Coin { 125 | /// # Heads, 126 | /// # Tails, 127 | /// # } 128 | /// # impl Coin { 129 | /// # fn flip() -> Distribution { 130 | /// # Distribution::uniform(vec![Coin::Heads, Coin::Tails]) 131 | /// # } 132 | /// # } 133 | /// fn roll_a_die_if_heads(coin: Coin) -> Distribution> { 134 | /// match coin { 135 | /// Coin::Heads => Distribution::uniform(vec![Some(1), Some(2), Some(3), Some(4)]), 136 | /// Coin::Tails => Distribution::always(None), 137 | /// } 138 | /// } 139 | /// 140 | /// let dist = Coin::flip().and_then(roll_a_die_if_heads); 141 | /// assert_eq!(dist.pmf(&None), Probability(0.5)); 142 | /// assert_eq!(dist.pmf(&Some(2)), Probability(0.125)); 143 | /// ``` 144 | /// 145 | /// [`Distribution::and_then`] can also be used to construct joint distributions. 146 | /// 147 | /// ``` 148 | /// # use porco::{Distribution, Probability}; 149 | /// # #[derive(Copy, Clone, Debug, PartialEq)] 150 | /// # enum Coin { 151 | /// # Heads, 152 | /// # Tails, 153 | /// # } 154 | /// # impl Coin { 155 | /// # fn flip() -> Distribution { 156 | /// # Distribution::uniform(vec![Coin::Heads, Coin::Tails]) 157 | /// # } 158 | /// # } 159 | /// fn flip_another(coin: Coin) -> Distribution<(Coin, Coin)> { 160 | /// Distribution::uniform(vec![(coin, Coin::Heads), (coin, Coin::Tails)]) 161 | /// } 162 | /// 163 | /// let two_coins = Coin::flip().and_then(flip_another); 164 | /// assert_eq!(two_coins.pmf(&(Coin::Heads, Coin::Heads)), Probability(0.25)); 165 | /// ``` 166 | pub fn and_then(self, f: F) -> Distribution 167 | where 168 | U: PartialEq, 169 | F: Fn(T) -> Distribution, 170 | { 171 | Distribution::from_iter( 172 | self.0 173 | .into_iter() 174 | .map(|(t, p)| (f(t), p)) 175 | .flat_map(|(dist, p)| dist.0.into_iter().map(move |(t, p2)| (t, p * p2))), 176 | ) 177 | } 178 | 179 | fn regroup(self) -> Distribution { 180 | Distribution(self.0.into_iter().fold(Vec::new(), |mut vec, (t, p)| { 181 | vec.entry(t).and_modify(|e| *e = *e + p).or_insert(p); 182 | vec 183 | })) 184 | } 185 | 186 | fn normalize(self) -> Distribution { 187 | let factor: f64 = self.0.iter().map(|(_, p)| p.0).sum(); 188 | self.0.into_iter().map(|(t, p)| (t, p / factor)).collect() 189 | } 190 | 191 | /// Create a distribution from a distribution conditioned on an event occurring. 192 | /// 193 | /// ``` 194 | /// # use porco::{Distribution, Probability}; 195 | /// # #[derive(Debug, PartialEq)] 196 | /// # enum Coin { 197 | /// # Heads, 198 | /// # Tails, 199 | /// # } 200 | /// let die = Distribution::uniform(vec![1, 2, 3, 4, 5, 6]); 201 | /// let die_given_less_than_three = die.given(|&v| v < 3); 202 | /// assert_eq!(die_given_less_than_three.pmf(&1), Probability(0.5)); 203 | /// ``` 204 | pub fn given(self, condition: F) -> Distribution 205 | where 206 | F: Fn(&T) -> bool, 207 | { 208 | Distribution::from_iter(self.0.into_iter().filter(|(t, _)| condition(t))).normalize() 209 | } 210 | 211 | /// Get the probability of an outcome occurring from the probability mass function. 212 | pub fn pmf(&self, t: &T) -> Probability { 213 | *self.0.get(t).unwrap_or(&Probability::ZERO) 214 | } 215 | } 216 | 217 | impl Distribution 218 | where 219 | T: Into + Clone, 220 | { 221 | /// Compute the expectation of a random variable. 222 | /// 223 | /// A random variable is a mapping from outcomes to real values. 224 | /// 225 | /// ``` 226 | /// # use porco::{Distribution, Probability}; 227 | /// # #[derive(Debug, PartialEq)] 228 | /// # enum Coin { 229 | /// # Heads, 230 | /// # Tails, 231 | /// # } 232 | /// let biased_coin = Distribution::from([ 233 | /// (Coin::Heads, Probability(0.25)), 234 | /// (Coin::Tails, Probability(0.75)), 235 | /// ]); 236 | /// let ev = biased_coin 237 | /// .map(|coin| match coin { 238 | /// Coin::Heads => 1, 239 | /// Coin::Tails => 0, 240 | /// }) 241 | /// .expectation(); 242 | /// assert_eq!(ev, 0.25); 243 | /// ``` 244 | pub fn expectation(&self) -> f64 { 245 | self.0.iter().map(|(t, p)| t.clone().into() * p.0).sum() 246 | } 247 | } 248 | 249 | impl Distribution 250 | where 251 | T: std::ops::Add + Clone + PartialEq, 252 | { 253 | /// Perform the convolution of two random variables. 254 | /// 255 | /// If `x` and `y` are two independent random variables, then `x.convolve(y)` is the 256 | /// distribution of the random variable `x + y`. 257 | /// 258 | /// ```rust 259 | /// # use porco::{Distribution, Probability}; 260 | /// fn two_sided_die() -> Distribution { 261 | /// Distribution::uniform(vec![1, 2]) 262 | /// } 263 | /// 264 | /// let x = two_sided_die(); 265 | /// let y = two_sided_die(); 266 | /// let sum = x.convolve(y); 267 | /// assert_eq!(sum.pmf(&2), Probability(0.25)); 268 | /// assert_eq!(sum.pmf(&3), Probability(0.5)); 269 | /// ``` 270 | pub fn convolve(self, other: Distribution) -> Distribution { 271 | use itertools::Itertools; 272 | 273 | self.0 274 | .into_iter() 275 | .cartesian_product(other.0) 276 | .map(|((t1, p1), (t2, p2))| (t1 + t2, p1 * p2)) 277 | .collect() 278 | } 279 | } 280 | 281 | impl Distribution> 282 | where 283 | T: PartialEq, 284 | { 285 | /// Convert a `Distribution>` into a `Distribution`. 286 | /// 287 | /// A `Distribution>` can be interpreted as a sequence of 288 | /// two experiments, where the outcome of the first informs what experiment 289 | /// is conducted second. 290 | /// 291 | /// ``` 292 | /// # use porco::{Distribution, Probability}; 293 | /// # #[derive(Debug, PartialEq)] 294 | /// # enum Coin { 295 | /// # Heads, 296 | /// # Tails, 297 | /// # } 298 | /// # impl Coin { 299 | /// # fn flip() -> Distribution { 300 | /// # Distribution::uniform(vec![Coin::Heads, Coin::Tails]) 301 | /// # } 302 | /// # } 303 | /// fn reflip_if_tails(coin: Coin) -> Distribution { 304 | /// match coin { 305 | /// Coin::Heads => Distribution::always(Coin::Heads), 306 | /// Coin::Tails => Coin::flip(), 307 | /// } 308 | /// } 309 | /// 310 | /// let dist: Distribution> = Coin::flip().map(reflip_if_tails); 311 | /// let coin = dist.flatten(); 312 | /// assert_eq!(coin.pmf(&Coin::Heads), Probability(0.75)); 313 | /// ``` 314 | pub fn flatten(self) -> Distribution { 315 | self.and_then(std::convert::identity) 316 | } 317 | } 318 | 319 | impl FromIterator<(T, f64)> for Distribution 320 | where 321 | T: PartialEq, 322 | { 323 | /// ```rust 324 | /// use porco::Distribution; 325 | /// 326 | /// let dist: Distribution<&str> = vec![("a", 0.4), ("b", 0.6)].into_iter().collect(); 327 | /// ``` 328 | fn from_iter>(iter: I) -> Self { 329 | Distribution::from_iter(iter.into_iter().map(|(t, p)| (t, Probability(p)))) 330 | } 331 | } 332 | 333 | impl FromIterator<(T, Probability)> for Distribution 334 | where 335 | T: PartialEq, 336 | { 337 | fn from_iter>(iter: I) -> Self { 338 | Distribution::new(iter) 339 | } 340 | } 341 | 342 | impl From> for Distribution 343 | where 344 | T: PartialEq, 345 | { 346 | fn from(v: Vec<(T, Probability)>) -> Self { 347 | Distribution::from_iter(v) 348 | } 349 | } 350 | 351 | impl From<[(T, Probability); N]> for Distribution 352 | where 353 | T: PartialEq, 354 | { 355 | fn from(s: [(T, Probability); N]) -> Self { 356 | Distribution::from_iter(s) 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Porco is a library for working with and composing probability 2 | //! distributions. 3 | //! 4 | //! The API is inspired by the contents of 5 | //! [Probabilistic Functional Programming in Haskell][paper] 6 | //! but with naming conventions that match those of [`Option`] and [`Result`] 7 | //! (such as [`Option::and_then`]). 8 | //! 9 | //! ```rust 10 | //! # use porco::{Probability, Distribution}; 11 | //! # #[derive(Debug, PartialEq)] 12 | //! enum Coin { 13 | //! Heads, 14 | //! Tails, 15 | //! } 16 | //! 17 | //! impl Coin { 18 | //! fn flip() -> Distribution { 19 | //! Distribution::uniform(vec![Coin::Heads, Coin::Tails]) 20 | //! } 21 | //! } 22 | //! 23 | //! let coin = Coin::flip(); 24 | //! assert_eq!(coin.pmf(&Coin::Heads), Probability(0.5)); 25 | //! ``` 26 | //! 27 | //! You can compose various operations over `Distribution`s using combinators 28 | //! like [`Distribution::map`], [`Distribution::and_then`], and 29 | //! [`Distribution::given`]. 30 | //! 31 | //! ```rust 32 | //! # use porco::{Probability, Distribution}; 33 | //! # #[derive(Debug, PartialEq)] 34 | //! # enum Coin { 35 | //! # Heads, 36 | //! # Tails, 37 | //! # } 38 | //! # impl Coin { 39 | //! # fn flip() -> Distribution { 40 | //! # Distribution::uniform(vec![Coin::Heads, Coin::Tails]) 41 | //! # } 42 | //! # } 43 | //! fn reflip_if_tails(coin: Coin) -> Distribution { 44 | //! match coin { 45 | //! Coin::Heads => Distribution::always(Coin::Heads), 46 | //! Coin::Tails => Coin::flip(), 47 | //! } 48 | //! } 49 | //! 50 | //! let coin = Coin::flip().and_then(reflip_if_tails); 51 | //! assert_eq!(coin.pmf(&Coin::Heads), Probability(0.75)); 52 | //! ``` 53 | //! 54 | //! You can also manipulate random variables and compute summary statistics. 55 | //! 56 | //! ```rust 57 | //! # use porco::{Probability, Distribution}; 58 | //! let die = Distribution::uniform(vec![1, 2, 3, 4, 5, 6]); 59 | //! let ev = die.given(|&v| v <= 4).expectation(); 60 | //! assert_eq!(ev, 2.5); 61 | //! 62 | //! fn two_sided_die() -> Distribution { 63 | //! Distribution::uniform(vec![1, 2]) 64 | //! } 65 | //! 66 | //! let x = two_sided_die(); 67 | //! let y = two_sided_die(); 68 | //! let sum = x.convolve(y); 69 | //! assert_eq!(sum.pmf(&2), Probability(0.25)); 70 | //! assert_eq!(sum.pmf(&3), Probability(0.5)); 71 | //! ``` 72 | //! 73 | //! [paper]: https://web.engr.oregonstate.edu/~erwig/papers/PFP_JFP06.pdf 74 | mod dist; 75 | mod prob; 76 | 77 | pub use dist::Distribution; 78 | pub use prob::Probability; 79 | -------------------------------------------------------------------------------- /src/prob.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryFrom, ops}; 2 | 3 | /// [`Probability`] is a light container for probabilities. 4 | #[derive(Debug, Copy, Clone, PartialEq)] 5 | pub struct Probability(pub f64); 6 | 7 | impl Probability { 8 | pub const ZERO: Probability = Probability(0.0); 9 | pub const ONE: Probability = Probability(1.0); 10 | } 11 | 12 | impl From for f64 { 13 | fn from(probability: Probability) -> Self { 14 | probability.0 15 | } 16 | } 17 | 18 | impl From for Probability { 19 | fn from(p: f64) -> Self { 20 | Probability::try_from(p).expect("The probability is between 0.0 and 1.0") 21 | } 22 | } 23 | 24 | // TODO: Consider using TryFrom instead of From. 25 | // impl TryFrom for Probability { 26 | // type Error = &'static str; 27 | // 28 | // fn try_from(p: f64) -> Result { 29 | // if (0.0..=1.0).contains(&p) { 30 | // Ok(Probability(p)) 31 | // } else { 32 | // Err("TODO: Use error type.") 33 | // } 34 | // } 35 | // } 36 | 37 | impl ops::Add for Probability { 38 | type Output = Self; 39 | 40 | fn add(self, other: Self) -> Self { 41 | Self(self.0 + other.0) 42 | } 43 | } 44 | 45 | impl ops::Sub for Probability { 46 | type Output = Self; 47 | 48 | fn sub(self, other: Self) -> Self { 49 | Self(self.0 - other.0) 50 | } 51 | } 52 | 53 | impl ops::Mul for Probability { 54 | type Output = Self; 55 | 56 | fn mul(self, other: Self) -> Self { 57 | Self(self.0 * other.0) 58 | } 59 | } 60 | 61 | impl ops::Div for Probability { 62 | type Output = Self; 63 | 64 | fn div(self, other: Self) -> Self { 65 | Self(self.0 / other.0) 66 | } 67 | } 68 | 69 | impl ops::Div for Probability { 70 | type Output = Self; 71 | 72 | fn div(self, other: f64) -> Self { 73 | Self(self.0 / other) 74 | } 75 | } 76 | --------------------------------------------------------------------------------