├── LICENSE.txt ├── README.md ├── docs ├── common.html ├── common.ipynb ├── common_cards_coins_dice.html ├── common_cards_coins_dice.ipynb ├── common_continuous.html ├── common_continuous.ipynb ├── common_discrete.html ├── common_discrete.ipynb ├── common_general_comments.html ├── common_general_comments.ipynb ├── common_joint.html ├── common_joint.ipynb ├── common_random.html ├── common_random.ipynb ├── conditioning.html ├── conditioning.ipynb ├── graphics.html ├── graphics.ipynb ├── index.html ├── index.ipynb ├── joint.html ├── joint.ipynb ├── mc.html ├── mc.ipynb ├── probspace.html ├── probspace.ipynb ├── process.html ├── process.ipynb ├── rv.html ├── rv.ipynb ├── sim.html └── sim.ipynb ├── labs ├── Lab 1 - Probability Spaces.ipynb ├── Lab 2 - Random Variables.ipynb ├── Lab 3 - Discrete Distributions.ipynb ├── Lab 4 - Poisson Processes.ipynb ├── Lab 5 - Central Limit Theorem.ipynb ├── Lab 6 - Joint and Conditional Distributions.ipynb ├── Lab 7 - Stochastic Processes.ipynb └── README.md ├── setup.py ├── symbulate ├── __init__.py ├── base.py ├── distributions.py ├── gaussian_process.py ├── independence.py ├── index_sets.py ├── markov_chains.py ├── math.py ├── plot.py ├── poisson_process.py ├── probability_space.py ├── random_processes.py ├── random_variables.py ├── result.py ├── results.py ├── table.py └── tests │ ├── __init__.py │ └── test_distributions.py └── tutorial ├── README.md ├── gs_joint.ipynb ├── gs_probspace.ipynb ├── gs_rv.ipynb └── index.ipynb /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Dennis Sun and Kevin Ross 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Symbulate 3 | 4 | *Symbulate* is a Python package which provides a user friendly 5 | framework for specifying and conducting simulations from probability models. 6 | 7 | # Installation Instructions 8 | 9 | It is recommended that you first install the 10 | [Anaconda distribution](https://www.anaconda.com/download/), which 11 | is a Python environment with many scientific packages installed 12 | (including all of the packages that Symbulate is built on). 13 | 14 | The fastest way to get up and running with Symbulate is to run the 15 | command `pip install symbulate` at the command line. 16 | 17 | 18 | # Documentation 19 | 20 | Please see the [documentation](https://dlsun.github.io/symbulate/index.html) for examples of how to use Symbulate. 21 | -------------------------------------------------------------------------------- /docs/common.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Documentation\n", 8 | "\n", 9 | "# Common Probability Models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Many commonly used probability models are built into Symbulate. These models can be used to either specify probability spaces or distributions of random variables or processes." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "1. [**Cards, coins, dice**](common_cards_coins_dice.html)\n", 24 | "1. [**Discrete distributions**](common_discrete.html)\n", 25 | "1. [**Continuous distributions**](common_continuous.html)\n", 26 | "1. [**Methods for common discrete and continuous distributions**](common_general_comments.html)\n", 27 | "1. [**Joint distributions**](common_joint.html)\n", 28 | "1. [**Common random processes**](common_random.html)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "< [Random variables](rv.html) | [Contents](index.html) | [Cards, coins, dice](common_cards_coins_dice.html) >" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [] 46 | } 47 | ], 48 | "metadata": { 49 | "anaconda-cloud": {}, 50 | "kernelspec": { 51 | "display_name": "Python 3", 52 | "language": "python", 53 | "name": "python3" 54 | }, 55 | "language_info": { 56 | "codemirror_mode": { 57 | "name": "ipython", 58 | "version": 3 59 | }, 60 | "file_extension": ".py", 61 | "mimetype": "text/x-python", 62 | "name": "python", 63 | "nbconvert_exporter": "python", 64 | "pygments_lexer": "ipython3", 65 | "version": "3.6.1" 66 | } 67 | }, 68 | "nbformat": 4, 69 | "nbformat_minor": 1 70 | } 71 | -------------------------------------------------------------------------------- /docs/common_cards_coins_dice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Documentation\n", 8 | "\n", 9 | "# Cards, coins, and dice" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Many probabilistic situations involving physical objects like cards, coins, and dice can be specified with [BoxModel](probspace.html#boxmodel)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "< [Common probability models](common.html) | [Contents](index.html) | [Common discrete distributions](common_discrete.html) >" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Be sure to import Symbulate using the following commands." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "from symbulate import *\n", 40 | "%matplotlib inline" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "*Example*. Rolling a fair n-sided die (with n=6)." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAD8CAYAAABdCyJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGFxJREFUeJzt3X+wX3Wd3/HniwCry48FJWIGEgmdLCy71UgzgNU66zJY\nYNVAnVrYXWQpNVIBpV2nG7Wzy053WnVQO24ZYtTsYhel+IOabrPQwDJad0WS0JQQMCVmoSQNJEol\nCC0QePeP7wn97uXm3pOcnHv55j4fM9/5nvM553Pu+wwzeXE+55zPN1WFJEn765DpLkCSNNoMEklS\nJwaJJKkTg0SS1IlBIknqxCCRJHVikEiSOjFIJEmdGCSSpE4One4CpsJxxx1XJ5100nSXIUkjZd26\ndT+uqtmT7TcjguSkk05i7dq1012GJI2UJI+02c+hLUlSJwaJJKkTg0SS1EmvQZLk3CSbkmxOsnSc\n7acm+X6SZ5N8dKj9lCTrhz67klzTbLs2ybahbef3eQ6SpIn1drM9ySzgeuAcYCuwJsnKqnpgaLcn\ngA8DFwz3rapNwMKh42wDbh3a5XNVdV1ftUuS2uvziuQMYHNVbamq54CbgcXDO1TVjqpaAzw/wXHO\nBn5UVa2eHpAkTa0+g+QE4NGh9a1N2766CPjamLark9yXZEWSY/e3QElSd6/om+1JDgfeA3x9qPkG\n4GQGQ1/bgc/spe+SJGuTrN25c2fvtUrSTNVnkGwD5g6tn9i07YvzgHur6vE9DVX1eFW9UFUvAl9k\nMIT2MlW1vKoWVdWi2bMnfTFTkrSf+gySNcCCJPObK4uLgJX7eIyLGTOslWTO0OqFwP2dqpSkg9Sf\n3PMof3LPo5Pv2FFvT21V1e4kVwG3A7OAFVW1MckVzfZlSV4PrAWOBl5sHvE9rap2JTmCwRNfHxxz\n6E8nWQgU8PA42yVJU6jXubaqahWwakzbsqHlxxgMeY3X92ngteO0X3KAy5QkdfCKvtkuSQfKVA3z\nzEQGiSSpE4NEktSJQSJJ6sQgkSR1YpBoxvMmrNSNQSLNQIanDiSDRH+D/8BI2lcGiSSpE4NEktSJ\nQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqRODJIJ+E6FJE3OIJEkdWKQSJI6MUgkSZ0YJJKkTgwSSVIn\nvQZJknOTbEqyOcnScbafmuT7SZ5N8tEx2x5OsiHJ+iRrh9pfk2R1koea72P7PAdJ0sR6C5Iks4Dr\ngfOA04CLk5w2ZrcngA8D1+3lMO+oqoVVtWiobSlwZ1UtAO5s1iVJ06TPK5IzgM1VtaWqngNuBhYP\n71BVO6pqDfD8Phx3MXBjs3wjcMGBKFaStH/6DJITgOG3+bY2bW0VcEeSdUmWDLUfX1Xbm+XHgOO7\nlSlJ6uLQ6S5gAm+rqm1JXgesTvLDqvru8A5VVUlqvM5N+CwBmDdvXv/VStIM1ecVyTZg7tD6iU1b\nK1W1rfneAdzKYKgM4PEkcwCa7x176b+8qhZV1aLZs2fvR/mSpDb6DJI1wIIk85McDlwErGzTMckR\nSY7aswy8E7i/2bwSuLRZvhT49gGtWpK0T3ob2qqq3UmuAm4HZgErqmpjkiua7cuSvB5YCxwNvJjk\nGgZPeB0H3JpkT41frarbmkN/ErglyeXAI8D7+joHSdLker1HUlWrgFVj2pYNLT/GYMhrrF3Am/Zy\nzJ8AZx/AMiVJHfhmuySpE4NEktSJQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqRODBJJUicGiSSpE4NE\nktSJQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqRODBJJUicGiSSpE4NEktTJpEGS5LVTUYgkaTS1uSK5\nO8nXk5yfJL1XJEkaKW2C5BeB5cAlwENJ/nWSX+y3LEnSqJg0SGpgdVVdDHwAuBS4J8l3krxlor5J\nzk2yKcnmJEvH2X5qku8neTbJR4fa5ya5K8kDSTYm+cjQtmuTbEuyvvmcv09nLEk6oA6dbIfmHslv\nMbgieRy4GlgJLAS+DszfS79ZwPXAOcBWYE2SlVX1wNBuTwAfBi4Y03038DtVdW+So4B1SVYP9f1c\nVV3X8hwlST1qM7T1feBo4IKq+vWq+lZV7a6qtcCyCfqdAWyuqi1V9RxwM7B4eIeq2lFVa4Dnx7Rv\nr6p7m+WngAeBE1qflSRpykx6RQKcUlU13oaq+tQE/U4AHh1a3wqcuQ+1AZDkJODNwA+Gmq9O8n5g\nLYMrl/+9r8eVJB0Yba5I/kuSY/asJDk2ye091vSSJEcC3wSuqapdTfMNwMkMhta2A5/ZS98lSdYm\nWbtz586pKFeSZqQ2QTK7qn66Z6X5v//Xtei3DZg7tH5i09ZKksMYhMhNVfWtob//eFW9UFUvAl9k\nMIT2MlW1vKoWVdWi2bNnt/2zkqR91CZIXkgyb89KkjcA4w51jbEGWJBkfpLDgYsY3KSfVPO+ypeB\nB6vqs2O2zRlavRC4v80xJUn9aHOP5BPA95J8Bwjw94Alk3Wqqt1JrgJuB2YBK6pqY5Irmu3Lkrye\nwX2Oo4EXk1wDnAa8kcFTYhuSrG8O+fGqWgV8OslCBmH2MPDB1mcrSTrgJg2SqrotyenAWU3TNVX1\n4zYHb/7hXzWmbdnQ8mMMhrzG+h6D0BrvmJe0+duSpKnR5ooE4OcYvPNxKHBaEqrqu/2VJUkaFW1e\nSPwU8I+AjcCLTXMBBokkqdUVyQUM3iV5tu9iJEmjp81TW1uAw/ouRJI0mtpckTwDrE9yJ/DSVUlV\nfbi3qiRJI6NNkKyk5fsfkqSZp83jvzcmeTUwr6o2TUFNkqQR0uandt8NrAdua9YXJvEKRZIEtLvZ\nfi2D+ax+ClBV6xlMmihJUqsgeb6qnhzT9uK4e0qSZpw2N9s3JvkNYFaSBQx+0fCv+i1LkjQq2lyR\nXA38MoNHf78G7AKu6bMoSdLoaPPU1jMMZgD+RP/lSJJGTZu5tu5inN8fqapf66UiSdJIaXOP5KND\ny68C3gvs7qccSdKoaTO0tW5M018muaeneiRJI6bN0NZrhlYPAf4O8Au9VSRJGilthrbWMbhHEgZD\nWn8NXN5nUZKk0dFmaGv+VBQiSRpNbYa2/sFE26vqWweuHEnSqGkztHU58HeBv2jW38HgzfadDIa8\nDBJJmsHavNl+GHBaVb23qt7L4C33w6rqsqr6xxN1THJukk1JNidZOs72U5N8P8mzST7apm+S1yRZ\nneSh5vvYdqcqSepDmyCZW1Xbh9YfB+ZN1inJLOB64DzgNODiJKeN2e0JBnN3XbcPfZcCd1bVAuDO\nZl2SNE3aBMmdSW5P8ttJfhv4z8AdLfqdAWyuqi1V9RxwM7B4eIeq2lFVa4Dn96HvYuDGZvlG4IIW\ntUiSetLmqa2rklwIvL1pWl5Vt7Y49gnAo0PrW4EzW9Y1Ud/jh66QHgOOH+8ASZYASwDmzZv0AkqS\ntJ/a3GwHuBd4qqruSPLzSY6qqqf6LKyNqqokL5sHrNm2HFgOsGjRonH3kSR11+andj8AfAP4QtN0\nAvAfWxx7GzB3aP3Epq2Nifo+nmROU9scYEfLY0qSetDmHsmVwFsZ/A4JVfUQ8LoW/dYAC5LMT3I4\ncBHQ9rfeJ+q7Eri0Wb4U+HbLY0qSetBmaOvZqnouCQBJDmWcaeXHqqrdSa4CbgdmASuqamOSK5rt\ny5K8HlgLHA28mOQaBo8a7xqvb3PoTwK3JLkceAR43z6cryTpAGsTJN9J8nHg1UnOAT4E/Kc2B6+q\nVcCqMW3LhpYfYzBs1apv0/4T4Ow2f1+S1L82Q1tLGbzFvgH4IIN/3P9ln0VJkkbHhFckzYuBX6mq\n3wS+ODUlSZJGyYRXJFX1AvCG5oa3JEkv0+YeyRYGv4q4Enh6T2NVfba3qiRJI6NNkPyo+RwCHNVv\nOZKkUbPXIElyaFXtrqo/mMqCJEmjZaJ7JPfsWUjyR1NQiyRpBE0UJBlafmvfhUiSRtNEQeJEh5Kk\nSU10s/3UJPcxuDL5W80yzXpV1Rt7r06S9Io3UZD80pRVIUkaWXsNkqp6ZCoLkSSNpjZzbUmStFcG\niSSpk1ZBkuTVSU7puxhJ0uhp81O77wbWA7c16wubebckSWp1RXItcAbwU4CqWg/M77EmSdIIaRMk\nz1fVk2PafFlRkgS0m/13Y5LfAGYlWQB8GPirfsuSJI2KNlckVwO/DDwLfBV4Erimz6IkSaOjzRXJ\nqVX1CeATfRcjSRo9ba5IPpPkwST/Ksmv7MvBk5ybZFOSzUmWjrM9ST7fbL8vyelN+ylJ1g99diW5\nptl2bZJtQ9vO35eaJEkH1qRXJFX1jiSvB94HfCHJ0cB/qKo/nKhfklnA9cA5wFZgTZKVVfXA0G7n\nAQuaz5nADcCZVbUJWDh0nG3ArUP9PldV17U8R0lSj1q9kFhVj1XV54ErGLxT8nstup0BbK6qLVX1\nHHAzsHjMPouBr9TA3cAxSeaM2eds4EfO/SVJr0xtXkj8pWY4aQPwRwye2DqxxbFPAB4dWt/atO3r\nPhcBXxvTdnUzFLYiybF7qXtJkrVJ1u7cubNFuZKk/dHmimQFg5cR/35V/WpV3VBVO3quC4AkhwPv\nAb4+1HwDcDKDoa/twGfG61tVy6tqUVUtmj17du+1StJM1eYeyVv289jbgLlD6yc2bfuyz3nAvVX1\n+FA9Ly0n+SLwZ/tZnyTpANjrFUmSW5rvDc0w0p7PhqFfS5zIGmBBkvnNlcVFwNg5ulYC72+e3joL\neLKqtg9tv5gxw1pj7qFcCNzfohZJUk8muiL5SPP9rv05cFXtTnIVcDswC1hRVRuTXNFsXwasAs4H\nNgPPAJft6Z/kCAZPfH1wzKE/nWQhg2laHh5nuyRpCk30C4l7rgw+VFW/O7wtyaeA3315r5cdYxWD\nsBhuWza0XMCVe+n7NPDacdovmezvSpKmTpub7eeM03begS5EkjSa9npFkuSfAh8CTh5zT+Qo4C/7\nLkySNBomukfyVeDPgX8DDE9v8lRVPdFrVZKkkTHRPZInGcz0ezFAktcBrwKOTHJkVf3PqSlRkvRK\n1uqndpM8BPw18B0GT0r9ec91SZJGRJub7X8InAX8j6qaz2Duq7t7rUqSNDLa/tTuT4BDkhxSVXcB\ni3quS5I0Itr8sNVPkxwJfBe4KckO4Ol+y5IkjYo2VySLgf8D/DPgNuBHwLv7LEqSNDraTNo4fPVx\nY4+1SJJG0EQvJD7FYD6rl5qa9TCY3eTonmuTJI2Aid4jOWoqC5EkjaZWP7Wb5G1JLmuWj0syv9+y\nJEmjos0Lib/PYKbfjzVNhwN/2mdRkqTR0eaK5EIGP3f7NEBV/S8GEzdKktQqSJ5rfjek4KUfnJIk\nCWgXJLck+QJwTJIPAHcAX+q3LEnSqGjzHsl1Sc4BdgGnAL9XVat7r0ySNBLaTJFCExyrAZIckuQ3\nq+qmXiuTJI2EvQ5tJTk6yceS/Lsk78zAVcAW4H1TV6Ik6ZVsonsk/57BUNYG4J8AdwH/ELigqha3\nOXiSc5NsSrI5ydJxtifJ55vt9yU5fWjbw0k2JFmfZO1Q+2uSrE7yUPN9bMtzlST1YKKhrZOr6m8D\nJPkSsB2YV1X/t82Bk8wCrgfOAbYCa5KsrKoHhnY7D1jQfM4Ebmi+93hHVf14zKGXAndW1SebcFrK\n4D0XSdI0mOiK5Pk9C1X1ArC1bYg0zgA2V9WWqnoOuJnBTMLDFgNfqYG7GTwZNmeS4y7m/08eeSNw\nwT7UJEk6wCYKkjcl2dV8ngLeuGc5ya4Wxz4BeHRofWvT1nafAu5Isi7JkqF9jq+q7c3yY8DxLWqR\nJPVkokkbZ01lIeN4W1VtS/I6YHWSH1bVd4d3qKpKUuN1bsJnCcC8efP6r1aSZqhWkzbup23A3KH1\nE5u2VvtU1Z7vHcCtDIbKAB7fM/zVfO8Y749X1fKqWlRVi2bPnt3xVCRJe9NnkKwBFiSZn+Rw4CJg\n5Zh9VgLvb57eOgt4sqq2JzkiyVHw0pQs7wTuH+pzabN8KfDtHs9BkjSJVi8k7o+q2t28d3I7MAtY\nUVUbk1zRbF8GrALOBzYDzwCXNd2PB25NsqfGr1bVbc22TzKYtuVy4BF8p0WSplVvQQJQVasYhMVw\n27Kh5QKuHKffFuBNeznmT4CzD2ylkqT91efQliRpBjBIJEmdGCSSpE4MEklSJwaJJKkTg0SS1IlB\nIknqxCCRJHVikEiSOjFIJEmdGCSSpE4MEklSJwaJJKkTg0SS1IlBIknqxCCRJHVikEiSOjFIJEmd\nGCSSpE4MEklSJwaJJKmTXoMkyblJNiXZnGTpONuT5PPN9vuSnN60z01yV5IHkmxM8pGhPtcm2ZZk\nffM5v89zkCRN7NC+DpxkFnA9cA6wFViTZGVVPTC023nAguZzJnBD870b+J2qujfJUcC6JKuH+n6u\nqq7rq3ZJUnt9XpGcAWyuqi1V9RxwM7B4zD6Lga/UwN3AMUnmVNX2qroXoKqeAh4ETuixVknSfuoz\nSE4AHh1a38rLw2DSfZKcBLwZ+MFQ89XNUNiKJMceqIIlSfvuFX2zPcmRwDeBa6pqV9N8A3AysBDY\nDnxmL32XJFmbZO3OnTunpF5Jmon6DJJtwNyh9RObtlb7JDmMQYjcVFXf2rNDVT1eVS9U1YvAFxkM\nob1MVS2vqkVVtWj27NmdT0aSNL4+g2QNsCDJ/CSHAxcBK8fssxJ4f/P01lnAk1W1PUmALwMPVtVn\nhzskmTO0eiFwf3+nIEmaTG9PbVXV7iRXAbcDs4AVVbUxyRXN9mXAKuB8YDPwDHBZ0/2twCXAhiTr\nm7aPV9Uq4NNJFgIFPAx8sK9zkCRNrrcgAWj+4V81pm3Z0HIBV47T73tA9nLMSw5wmZKkDl7RN9sl\nSa98BokkqRODRJLUiUEiSerEIJEkdWKQSJI6MUgkSZ0YJJKkTgwSSVInBokkqRODRJLUiUEiSerE\nIJEkdWKQSJI6MUgkSZ0YJJKkTgwSSVInBokkqRODRJLUiUEiSerEIJEkddJrkCQ5N8mmJJuTLB1n\ne5J8vtl+X5LTJ+ub5DVJVid5qPk+ts9zkCRNrLcgSTILuB44DzgNuDjJaWN2Ow9Y0HyWADe06LsU\nuLOqFgB3NuuSpGnS5xXJGcDmqtpSVc8BNwOLx+yzGPhKDdwNHJNkziR9FwM3Nss3Ahf0eA6SpEn0\nGSQnAI8OrW9t2trsM1Hf46tqe7P8GHD8gSpYkrTvDp3uArqoqkpS421LsoTBcBnAz5Js2s8/c9xl\n8OP97DuqPOeZwXOeGbqc8xva7NRnkGwD5g6tn9i0tdnnsAn6Pp5kTlVtb4bBdoz3x6tqObB8/8sf\nSLK2qhZ1Pc4o8ZxnBs95ZpiKc+5zaGsNsCDJ/CSHAxcBK8fssxJ4f/P01lnAk82w1UR9VwKXNsuX\nAt/u8RwkSZPo7YqkqnYnuQq4HZgFrKiqjUmuaLYvA1YB5wObgWeAyybq2xz6k8AtSS4HHgHe19c5\nSJIm1+s9kqpaxSAshtuWDS0XcGXbvk37T4CzD2ylE+o8PDaCPOeZwXOeGXo/5wz+LZckaf84RYok\nqRODZC+SrEiyI8n9013LVEkyN8ldSR5IsjHJR6a7pj4leVWSe5L89+Z8/2C6a5oqSWYl+W9J/my6\na5kKSR5OsiHJ+iRrp7ueqZDkmCTfSPLDJA8meUtvf8uhrfEleTvwMwZv3v/KdNczFZrHqedU1b1J\njgLWARdU1QPTXFovkgQ4oqp+luQw4HvAR5pZFg5qSf45sAg4uqreNd319C3Jw8Ciqpox75AkuRH4\nr1X1pebp15+vqp/28be8ItmLqvou8MR01zGVqmp7Vd3bLD8FPMjLZyM4aDRT8/ysWT2s+Rz0/2eV\n5ETg14EvTXct6keSXwDeDnwZoKqe6ytEwCDRXiQ5CXgz8IPpraRfzRDPegYvtq6uqoP6fBv/FvgX\nwIvTXcgUKuCOJOuaWS8OdvOBncAfN0OYX0pyRF9/zCDRyyQ5EvgmcE1V7ZruevpUVS9U1UIGsyec\nkeSgHsZM8i5gR1Wtm+5aptjbmv/O5wFXNkPXB7NDgdOBG6rqzcDT9DhTukGiv6G5V/BN4Kaq+tZ0\n1zNVmsv+u4Bzp7uWnr0VeE9zz+Bm4NeS/On0ltS/qtrWfO8AbmUww/jBbCuwdegK+xsMgqUXBole\n0tx8/jLwYFV9drrr6VuS2UmOaZZfDZwD/HB6q+pXVX2sqk6sqpMYTD30F1X1W9NcVq+SHNE8PEIz\nvPNO4KB+GrOqHgMeTXJK03Q20NtDMyM9+2+fknwN+FXguCRbgd+vqi9Pb1W9eytwCbChuW8A8PFm\nloGD0RzgxuaH1A4BbqmqGfE47AxzPHDr4P+TOBT4alXdNr0lTYmrgZuaJ7a20ExB1Qcf/5UkdeLQ\nliSpE4NEktSJQSJJ6sQgkSR1YpBIkjoxSCRJnRgkkqRODBJJUif/D4DexV94nPZfAAAAAElFTkSu\nQmCC\n", 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "metadata": {}, 63 | "output_type": "display_data" 64 | } 65 | ], 66 | "source": [ 67 | "n = 6\n", 68 | "die = list(range(1, n+1))\n", 69 | "P = BoxModel(die)\n", 70 | "RV(P).sim(10000).plot()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "*Example.* Flipping a fair coin twice and recording the results in sequence." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/html": [ 88 | "\n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | "
OutcomeValue
('H', 'H')0.2518
('H', 'T')0.2457
('T', 'H')0.2503
('T', 'T')0.2522
Total1.0
\n", 98 | " " 99 | ], 100 | "text/plain": [ 101 | "{('H', 'H'): 0.2518,\n", 102 | " ('H', 'T'): 0.2457,\n", 103 | " ('T', 'H'): 0.2503,\n", 104 | " ('T', 'T'): 0.2522}" 105 | ] 106 | }, 107 | "execution_count": 3, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "P = BoxModel(['H', 'T'], size=2, order_matters=True)\n", 114 | "P.sim(10000).tabulate(normalize=True)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "*Example.* Unequally likely outcomes on a colored \"spinner\"." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/html": [ 132 | "\n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | "
OutcomeValue
brown0.2458
orange0.5065
yellow0.2477
Total1.0
\n", 142 | " " 143 | ], 144 | "text/plain": [ 145 | "{'brown': 0.2458, 'orange': 0.5065, 'yellow': 0.2477}" 146 | ] 147 | }, 148 | "execution_count": 4, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "P = BoxModel(['orange', 'brown', 'yellow'], probs=[0.5, 0.25, 0.25])\n", 155 | "P.sim(10000).tabulate(normalize = True)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "`DeckOfCards()` is a special case of BoxModel for drawing from a standard deck of 52 cards. By default `replace=False`.\n", 163 | "\n", 164 | "*Example.* Simulated hands of 5 cards each." 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 5, 170 | "metadata": { 171 | "scrolled": true 172 | }, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/html": [ 177 | "\n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "
IndexResult
0((2, 'Clubs'), (4, 'Clubs'), ('A', 'Diamonds'), ('Q', 'Spades'), (3, 'Hearts'))
1((7, 'Hearts'), ('J', 'Spades'), (5, 'Hearts'), ('A', 'Diamonds'), (5, 'Spades'))
2((2, 'Clubs'), ('Q', 'Spades'), ('A', 'Diamonds'), ('K', 'Spades'), (2, 'Hearts'))
\n", 199 | " " 200 | ], 201 | "text/plain": [ 202 | "[((2, 'Clubs'),\n", 203 | " (4, 'Clubs'),\n", 204 | " ('A', 'Diamonds'),\n", 205 | " ('Q', 'Spades'),\n", 206 | " (3, 'Hearts')),\n", 207 | " ((7, 'Hearts'),\n", 208 | " ('J', 'Spades'),\n", 209 | " (5, 'Hearts'),\n", 210 | " ('A', 'Diamonds'),\n", 211 | " (5, 'Spades')),\n", 212 | " ((2, 'Clubs'),\n", 213 | " ('Q', 'Spades'),\n", 214 | " ('A', 'Diamonds'),\n", 215 | " ('K', 'Spades'),\n", 216 | " (2, 'Hearts'))]" 217 | ] 218 | }, 219 | "execution_count": 5, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "DeckOfCards(size=5).sim(3)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "< [Common probability models](common.html) | [Contents](index.html) | [Common discrete distributions](common_discrete.html) >" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": { 239 | "collapsed": true 240 | }, 241 | "outputs": [], 242 | "source": [] 243 | } 244 | ], 245 | "metadata": { 246 | "kernelspec": { 247 | "display_name": "Python 3", 248 | "language": "python", 249 | "name": "python3" 250 | }, 251 | "language_info": { 252 | "codemirror_mode": { 253 | "name": "ipython", 254 | "version": 3 255 | }, 256 | "file_extension": ".py", 257 | "mimetype": "text/x-python", 258 | "name": "python", 259 | "nbconvert_exporter": "python", 260 | "pygments_lexer": "ipython3", 261 | "version": "3.6.1" 262 | } 263 | }, 264 | "nbformat": 4, 265 | "nbformat_minor": 2 266 | } 267 | -------------------------------------------------------------------------------- /docs/common_random.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Documentation\n", 8 | "\n", 9 | "# Common Random Processes" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Several common random processes are built in to Symbulate, including the following. See the documentation for these processes for more details.\n", 17 | "\n", 18 | "* [Discrete time Markov chains](mc.html#dtmc)\n", 19 | "* [Continuous time Markov chains](mc.html#ctmc)\n", 20 | "* [Poisson processes](mc.html#poisson)\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "< [Common joint distributions](common_joint.html) | [Contents](index.html) | [Multiple random variables and joint distributions](joint.html) >" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [] 38 | } 39 | ], 40 | "metadata": { 41 | "kernelspec": { 42 | "display_name": "Python 3", 43 | "language": "python", 44 | "name": "python3" 45 | }, 46 | "language_info": { 47 | "codemirror_mode": { 48 | "name": "ipython", 49 | "version": 3 50 | }, 51 | "file_extension": ".py", 52 | "mimetype": "text/x-python", 53 | "name": "python", 54 | "nbconvert_exporter": "python", 55 | "pygments_lexer": "ipython3", 56 | "version": "3.6.1" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 2 61 | } 62 | -------------------------------------------------------------------------------- /docs/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Documentation\n", 8 | "\n", 9 | "The Python package [Symbulate](https://github.com/dlsun/symbulate) provides a user friendly framework for conducting simulations involving probability models. The syntax of Symbulate mirrors the \"language of probability\" and makes it intuitive to specify, run, analyze, and visualize\n", 10 | "the results of a simulation." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "1. [**Introduction and installing Symbulate**](#intro)\n", 25 | "1. [**Probability spaces**](probspace.html)\n", 26 | "1. [**Simulation tools**](sim.html)\n", 27 | "1. [**Random variables**](rv.html)\n", 28 | "1. [**Common probability models**](common.html)\n", 29 | " 1. [**Cards, coins, dice**](common_cards_coins_dice.html)\n", 30 | " 1. [**Discrete distributions**](common_discrete.html)\n", 31 | " 1. [**Continuous distributions**](common_continuous.html)\n", 32 | " 1. [**Methods for common discrete and continuous distributions**](common_general_comments.html)\n", 33 | " 1. [**Joint distributions**](common_joint.html)\n", 34 | " 1. [**Common random processes**](common_random.html)\n", 35 | "1. [**Multiple random variables and joint distributions**](joint.html)\n", 36 | "1. [**Conditioning**](conditioning.html)\n", 37 | "1. [**Random processes**](process.html)\n", 38 | "1. [**Markov processes**](mc.html)\n", 39 | "1. [**Symbulate graphics**](graphics.html)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Installing Symbulate\n", 54 | "\n", 55 | "Symbulate can be downloaded from the [Symbulate Github repository](https://github.com/dlsun/symbulate). Instructions for downloading and installing Symbulate can be found [here](http://calpoly.edu/~dsun09/python.html). Import Symbulate during a session using the following commands. (The second line in an iPython \"magic\" which enables inline plotting within a Jupyter notebook.)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 1, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "from symbulate import *\n", 67 | "%matplotlib inline" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Getting started with Symbulate" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "An interactive tutorial providing an introduction to Symbulate is available [here](https://github.com/dlsun/symbulate/tree/master/tutorial). The full tutorial consists of four notebooks, each taking about 20 to 30 minutes to complete." 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## A few words about Jupyter notebooks" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "The primary interface with Symbulate is via Jupyter notebooks. A [Jupyter](http://jupyter.org/index.html) notebook is a document with cells containing either markdown text or code that can be executed interactively, with output visible immediately beneath the input. Jupyter notebooks provide a user friendly interface supporting interactive and reproducible programming and documentation.\n", 96 | "\n", 97 | "Each section of these documentation files was written in a Jupyter notebook, in which code cells (`In[]:`) are followed by any output they produce (`Out[]:`). Note that Jupyter notebooks only display the output of the last line of code in a cell (aside from code which produces a plot)." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 2, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "0.75" 109 | ] 110 | }, 111 | "execution_count": 2, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "1 + 2\n", 118 | "3 / 4" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "source": [ 127 | "To display the output of multiple commands, place the commands in separate cells. (Cells can be added using the + button on the toolbar.)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 3, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "3" 139 | ] 140 | }, 141 | "execution_count": 3, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "1 + 2" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "0.75" 159 | ] 160 | }, 161 | "execution_count": 4, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "3 / 4" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "While not necessary, if desired output can be formatted and displayed using Python print statements. More information on Python print statements can be found [here](https://docs.python.org/3/tutorial/inputoutput.html)." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 5, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": [ 186 | "0.667\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "print('{:.3f}'.format(2/3))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "Help documentation can be accessed in Jupyter notebooks in a code cell by using the question mark `?` followed by the named of the object for which help is desired (e.g. `?BoxModel`.)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "< [Contents](#contents) | [Probability spaces](probspace.html) >" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": true 213 | }, 214 | "outputs": [], 215 | "source": [] 216 | } 217 | ], 218 | "metadata": { 219 | "anaconda-cloud": {}, 220 | "kernelspec": { 221 | "display_name": "Python 3", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.6.1" 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 1 240 | } 241 | -------------------------------------------------------------------------------- /docs/probspace.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Documentation\n", 8 | "# Probability Spaces" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "< [Contents](index.html) | [Simulation tools](sim.html) >" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "A **probability space** consists of a **sample space** of possible outcomes and a **probability measure** which specifies how to assign probabilities to related events. Several common probability spaces are available in Symbulate. Users can also define their own probability spaces." 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | " 1. [**BoxModel:**](#boxmodel) Define a simple box model probability space.\n", 44 | " 1. [**Draw:**](#draw) Draw an outcome according to a probability model.\n", 45 | " 1. [**ProbabilitySpace:**](#probability_space) Define more general probability spaces.\n", 46 | " 1. [**Independent spaces:**](#indep) Combine independent probability spaces." 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "source": [ 55 | "Be sure to import Symbulate during a session using the following commands.\n", 56 | "" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 1, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "from symbulate import *\n", 68 | "%matplotlib inline" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "### BoxModel\n", 83 | "\n", 84 | "The probability space in many elementary situations can be defined via a \"box model.\" To define a Symbulate `BoxModel` enter a list repesenting the tickets in the box. For example, rolling a fair six-sided die could be represented as a box model with six tickets labeled 1 through 6." 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 2, 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "die = [1, 2, 3, 4, 5, 6]\n", 96 | "roll = BoxModel(die)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "The list of numbers could also have been created using `range()` in Python. Remember that Python indexing starts from 0 by default. Remember also that `range` gives you all the values, up to, but *not including* the last value." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 3, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "die = list(range(1, 6+1)) # this is just a list of the number 1 through 6\n", 115 | "roll = BoxModel(die)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Draw\n", 130 | "\n", 131 | "`BoxModel` itself just defines the model; it does not return any values. (The same is true for any probability space.) The `.draw()` method can be used to simulate one draw from the `BoxModel` (or any probability space)." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 4, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "5" 143 | ] 144 | }, 145 | "execution_count": 4, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "roll.draw()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### BoxModel options\n", 159 | "* `box`: A list of \"tickets\" to sample from.\n", 160 | "* `size`: How many tickets to draw from the box.\n", 161 | "* `replace`: `True` if the draws are made with replacement; `False` if without replacement\n", 162 | "* `probs`: Probabilities that the tickets are selected. By default, all tickets are equally likely.\n", 163 | "* `order_matters`: `True` if different orderings of the same tickets drawn are counted as different outcomes; `False` if the order in which the tickets are drawn is irrelevant.\n", 164 | "\n", 165 | "Multiple tickets can be drawn from the box using the `size` argument." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 5, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "data": { 175 | "text/plain": [ 176 | "(4, 4, 3)" 177 | ] 178 | }, 179 | "execution_count": 5, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "BoxModel(die, size=3).draw()" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "By default `BoxModel` assumes equally likely tickets. This can be changed using the `probs` argument, by specifying a probability value for each ticket.\n", 193 | "\n", 194 | "*Example.* Suppose 32% of Americans are Democrats, 27% are Republican, and 41% are Independent. Five randomly selected Americans are surveyed about their political party affiliation.\n", 195 | "\n", 196 | "This situation could be represented as sampling with replacement from a box with 100 tickets, 32 of which are Democrat, etc, from which 5 tickets are drawn. But rather than specifying a list of 100 tickets, we can just specify the three tickets and the corresponding probabilities with `probs`. " 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 6, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/plain": [ 207 | "('D', 'R', 'R', 'I', 'D')" 208 | ] 209 | }, 210 | "execution_count": 6, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "BoxModel(['D', 'R', 'I'], probs=[0.32, 0.27, 0.41], size=5).draw()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "\n", 224 | "The `probs` argument requires that the probabilities are already normalized to sum to 1. Non-normalized values can be handled by entering the tickets as a dictionary, specifying the label on each ticket and the number of tickets in the box with that label. Note that a dictionary is enclosed in braces `{}` rather than brackets `[]`.\n", 225 | "\n", 226 | "The following code is equivalent to the previous code which used the `probs` option." 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 7, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "('I', 'I', 'D', 'R', 'R')" 238 | ] 239 | }, 240 | "execution_count": 7, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "BoxModel({'D': 32,'R': 27, 'I': 41}, size=5).draw()" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "By default `BoxModel` assumes sampling with replacement; each ticket is placed back in the box before the next ticket is selected. Sampling *without replacement* can be handled with `replace=False`. (The default is `replace=True`.)\n", 254 | "\n", 255 | "*Example.* Two people are selected at random from Anakin, Bella, Frodo, Harry, Katniss to go on a quest." 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 8, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "('A', 'F')" 267 | ] 268 | }, 269 | "execution_count": 8, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "BoxModel(['A','B','F','H','K'], size=2, replace=False).draw()" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "Note that by default, `BoxModel` returns ordered outcomes, e.g. ('A', 'B') is distinct from ('B', 'A'). To return unordered outcomes, set `order_matters=False`." 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "### ProbabilitySpace\n", 297 | "\n", 298 | "Symbulate has many [common probability models](common.html) built in. The `ProbabilitySpace` command allows for user defined probability models. The first step in creating a probability space is to define a function that explains how to draw one outcome. \n", 299 | "\n", 300 | "*Example.* Ten percent of all e-mail is spam. Thirty percent of spam e-mails contain the word \"money\", while 2% of non-spam e-mails contain the word \"money\". Suppose an e-mail contains the word \"money\". What is the probability that it is spam?\n", 301 | "\n", 302 | "We can think of the sample space of outcomes of pairs of the possible email types (spam or not) and wordings (money or not), with the probability measure following the above specifications. First we draw from a `BoxModel` to determine the email type. Then, depending on the result of the first draw, we draw from one of two `BoxModel`s to determine the wording. The function `spam_sim` below encodes these specifications; note the use of `.draw()`." 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 9, 308 | "metadata": { 309 | "collapsed": true 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "def spam_sim():\n", 314 | " email_type = BoxModel([\"spam\", \"not spam\"], probs=[.1, .9]).draw()\n", 315 | " if email_type == \"spam\":\n", 316 | " has_money = BoxModel([\"money\", \"no money\"], probs=[.3, .7]).draw()\n", 317 | " else:\n", 318 | " has_money = BoxModel([\"money\", \"no money\"], probs=[.02, .98]).draw()\n", 319 | " return email_type, has_money" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": {}, 325 | "source": [ 326 | "A `ProbabilitySpace` can be created once the specifications of the simulation have been defined through a function." 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 10, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/plain": [ 337 | "('not spam', 'no money')" 338 | ] 339 | }, 340 | "execution_count": 10, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "P = ProbabilitySpace(spam_sim)\n", 347 | "P.draw()" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "### Commonly used probability spaces\n", 355 | "\n", 356 | "Symbulate has many [commonly used probability spaces](common.html) built in. Here are just a few examples. " 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 11, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "6" 368 | ] 369 | }, 370 | "execution_count": 11, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "Binomial(n=10, p=0.5).draw()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 12, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "0.3619289993227831" 388 | ] 389 | }, 390 | "execution_count": 12, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "Normal(mean=0, sd=1).draw()" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 13, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "data": { 406 | "text/plain": [ 407 | "(-0.22749679873889656, 1.6296631715075314, 0.79580835264536565)" 408 | ] 409 | }, 410 | "execution_count": 13, 411 | "metadata": {}, 412 | "output_type": "execute_result" 413 | } 414 | ], 415 | "source": [ 416 | "mean_vector = [0, 1, 2]\n", 417 | "cov_matrix = [[1.00, 0.50, 0.25],\n", 418 | " [0.50, 2.00, 0.00],\n", 419 | " [0.25, 0.00, 4.00]]\n", 420 | "\n", 421 | "MultivariateNormal(mean = mean_vector, cov = cov_matrix).draw()" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "" 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "### Independent probability spaces\n", 436 | "\n", 437 | "**Independent** probability spaces can be constructed by multiplying (`*` in Python) two probability spaces. The product `*` syntax reflects that under independence joint probabilities are products of marginal probabilities: For example, events $A$ and $B$ are independent if and only if $P(A\\cap B) = P(A)P(B)$. \n", 438 | "\n", 439 | "Multiple independent copies of a probability space can be created by raising a probability space to a power (`**` in Python).\n", 440 | "\n", 441 | "*Example.* Roll a fair six-sided die and a fair four-sided die." 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 14, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/plain": [ 452 | "(4, 2)" 453 | ] 454 | }, 455 | "execution_count": 14, 456 | "metadata": {}, 457 | "output_type": "execute_result" 458 | } 459 | ], 460 | "source": [ 461 | "die6 = list(range(1, 6+1, 1))\n", 462 | "die4 = list(range(1, 4+1, 1))\n", 463 | "rolls = BoxModel(die6) * BoxModel(die4)\n", 464 | "rolls.draw()" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "*Example.* A triple of independent outcomes" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 15, 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/plain": [ 482 | "('H', 3, 0.17962033022575485)" 483 | ] 484 | }, 485 | "execution_count": 15, 486 | "metadata": {}, 487 | "output_type": "execute_result" 488 | } 489 | ], 490 | "source": [ 491 | "(BoxModel(['H', 'T']) * Poisson(lam=2) * Exponential(rate=5)).draw()" 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": {}, 497 | "source": [ 498 | "*Example.* Four independent Normal(0,1) values." 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": 16, 504 | "metadata": {}, 505 | "outputs": [ 506 | { 507 | "data": { 508 | "text/plain": [ 509 | "(1.500833820300937,\n", 510 | " 0.5960138782343144,\n", 511 | " -0.015339890428991629,\n", 512 | " -0.48063522961405397)" 513 | ] 514 | }, 515 | "execution_count": 16, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "P = Normal(mean=0, sd=1) ** 4\n", 522 | "P.draw()" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "Infinitely many independent copies of a probability space can be created by raising the probabilty space to the `inf` power, i.e. `** inf`\n", 530 | "\n", 531 | "*Example*. Infinitely many independent Normal(0, 1) values." 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 17, 537 | "metadata": {}, 538 | "outputs": [ 539 | { 540 | "data": { 541 | "text/plain": [ 542 | "(0.9051545133819178, -0.6795117235056013, -0.18059828981272197, -0.16974084974970854, -0.7238363446156592, -0.8130618472750923, -0.5101024347016073, 1.9570595463860194, 1.9213150322634953, 1.55599067233092, '...')" 543 | ] 544 | }, 545 | "execution_count": 17, 546 | "metadata": {}, 547 | "output_type": "execute_result" 548 | } 549 | ], 550 | "source": [ 551 | "P = Normal(mean=0, sd=1) ** inf\n", 552 | "P.draw()" 553 | ] 554 | }, 555 | { 556 | "cell_type": "markdown", 557 | "metadata": {}, 558 | "source": [ 559 | "< [Contents](index.html) | [Simulation tools](sim.html) >" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": { 566 | "collapsed": true 567 | }, 568 | "outputs": [], 569 | "source": [] 570 | } 571 | ], 572 | "metadata": { 573 | "anaconda-cloud": {}, 574 | "kernelspec": { 575 | "display_name": "Python 3", 576 | "language": "python", 577 | "name": "python3" 578 | }, 579 | "language_info": { 580 | "codemirror_mode": { 581 | "name": "ipython", 582 | "version": 3 583 | }, 584 | "file_extension": ".py", 585 | "mimetype": "text/x-python", 586 | "name": "python", 587 | "nbconvert_exporter": "python", 588 | "pygments_lexer": "ipython3", 589 | "version": "3.6.1" 590 | } 591 | }, 592 | "nbformat": 4, 593 | "nbformat_minor": 1 594 | } 595 | -------------------------------------------------------------------------------- /labs/Lab 4 - Poisson Processes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "editable": false 7 | }, 8 | "source": [ 9 | "# Symbulate Lab 4 - Poisson Processes" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "editable": false 16 | }, 17 | "source": [ 18 | "This Jupyter notebook provides a template for you to fill in. Read the notebook from start to finish, completing the parts as indicated. To run a cell, make sure the cell is highlighted by clicking on it, then press SHIFT + ENTER on your keyboard. (Alternatively, you can click the \"play\" button in the toolbar above.)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "editable": false 25 | }, 26 | "source": [ 27 | "In this lab you will use the Symbulate package. You should have completed [Section 2](https://github.com/dlsun/symbulate/blob/master/tutorial/gs_rv.ipynb) of the \"Getting Started Tutorial\" and read Sections 1-4 and parts of Section 5 of the [documentation](https://dlsun.github.io/symbulate/index.html). A few specific links to the documentation are provided below, but it will probably make more sense if you read the documentation from start to finish. Aside from part e) of Part I, **you should Symbulate commands whenever possible.** If you find yourself writing long blocks of Python code, you are probably doing something wrong. For example, you should not need to write any `for` loops.\n", 28 | "\n", 29 | "**Warning:** You may notice that many of the cells in this notebook are not editable. This is intentional and for your own safety. We have made these cells read-only so that you don't accidentally modify or delete them. However, you should still be able to execute the code in these cells." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": { 36 | "editable": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "from symbulate import *\n", 41 | "%matplotlib inline" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": { 47 | "editable": false 48 | }, 49 | "source": [ 50 | "# Overview\n", 51 | "\n", 52 | "Suppose that (harmless) micrometeors strike the International Space Station (ISS) at rate 1.5 per hour on average. You will consider three models of how the strikes occur over time.\n", 53 | "\n", 54 | " * **Model 1:** Strikes occur independently of each other, in each *minute* of time there is at most one strike, and the probability that a strike occurs in any minute is 1.5/60. \n", 55 | " * **Model 2:** Over any period of time, the number of strikes which occur has a Poisson distribution, and the numbers of strikes which occur in non-overlapping time periods are independent.\n", 56 | " * **Model 3:** The time elapsed between any two strikes has an [Exponential](https://dlsun.github.io/symbulate/common_continuous.html#exponential) distribution with mean 40 minutes, and the times between strikes are independent. " 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "editable": false 63 | }, 64 | "source": [ 65 | "## Model 1\n", 66 | "\n", 67 | "Suppose that strikes occur independently of each other, in each *minute* of time there is at most one strike, and the probability that a strike occurs in any minute is 1.5/60. (Why is 1.5/60 a reasonable value for the probability?)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "editable": false 74 | }, 75 | "source": [ 76 | "### a)\n", 77 | "\n", 78 | "Use simulation to approximate the distribution of the number of strikes that occur in the next 3 hours. Make a plot of the distribution, and approximate the mean and the standard deviation. " 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 2, 84 | "metadata": { 85 | "autograder": { 86 | "comments": null, 87 | "id": "model1a", 88 | "score": null 89 | }, 90 | "deletable": false 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "# Type all of your code for this problem in this cell.\n", 95 | "# Feel free to add additional cells for scratch work, but they will not be graded." 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": { 101 | "editable": false 102 | }, 103 | "source": [ 104 | "### b)\n", 105 | "\n", 106 | "Approximate the probability that there are at least 8 strikes in the next 3 hours." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "metadata": { 113 | "autograder": { 114 | "comments": null, 115 | "id": "model1b", 116 | "score": null 117 | }, 118 | "deletable": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "# Type all of your code for this problem in this cell.\n", 123 | "# Feel free to add additional cells for scratch work, but they will not be graded." 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "editable": false 130 | }, 131 | "source": [ 132 | "## Model 2\n", 133 | "\n", 134 | "Suppose that over any period of time, the number of strikes which occur has a Poisson distribution, and the numbers of strikes which occur in non-overlapping time periods are independent." 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "editable": false 141 | }, 142 | "source": [ 143 | "### a)\n", 144 | "\n", 145 | "Use simulation to approximate the distribution of the number of strikes that occur in the next 3 hours. Make a plot of the distribution, and approximate the mean and the standard deviation. (**Hint:** Based on the rate at which meteors strike the ISS, what must the mean of the Poisson distribution be?)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "metadata": { 152 | "autograder": { 153 | "comments": null, 154 | "id": "model2a", 155 | "score": null 156 | }, 157 | "deletable": false 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "# Type all of your code for this problem in this cell.\n", 162 | "# Feel free to add additional cells for scratch work, but they will not be graded." 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": { 168 | "editable": false 169 | }, 170 | "source": [ 171 | "### b)\n", 172 | "\n", 173 | "Approximate the probability that there are at least 8 strikes in the next 3 hours." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 5, 179 | "metadata": { 180 | "autograder": { 181 | "comments": null, 182 | "id": "model2b", 183 | "score": null 184 | }, 185 | "deletable": false 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "# Type all of your code for this problem in this cell.\n", 190 | "# Feel free to add additional cells for scratch work, but they will not be graded." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "editable": false 197 | }, 198 | "source": [ 199 | "## Model 3\n", 200 | "\n", 201 | "Now suppose that the time elapsed between any two strikes has an [Exponential](https://dlsun.github.io/symbulate/common_continuous.html#exponential) distribution with mean 40 minutes, and the times between strikes are independent. (Why is 40 minutes a reasonable value for the mean?)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": { 207 | "editable": false 208 | }, 209 | "source": [ 210 | "### a)\n", 211 | "\n", 212 | "Use simulation to approximate the distribution of the number of strikes that occur in the next 3 hours. Make a plot of the distribution, and approximate the mean and the standard deviation. \n", 213 | "\n", 214 | "(**Hint:** The exponential RVs represent the time between strikes, or _interarrival times_. Simulate infinitely many interarrival times using \n", 215 | "\n", 216 | "```\n", 217 | "P = Exponential(...) ** inf\n", 218 | "```\n", 219 | "\n", 220 | "Then, define a Python function `count_strikes_in_3_hours(...)` that takes each infinite sequence of interarrivals and counts up how many strikes there were in the first 3 hours. Finally, define a random variable on the probability space `P` using your function.)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 6, 226 | "metadata": { 227 | "autograder": { 228 | "comments": null, 229 | "id": "model3a", 230 | "score": null 231 | }, 232 | "deletable": false 233 | }, 234 | "outputs": [], 235 | "source": [ 236 | "# Type all of your code for this problem in this cell.\n", 237 | "# Feel free to add additional cells for scratch work, but they will not be graded.\n", 238 | "\n", 239 | "def count_strikes_in_3_hours(interarrival_times):\n", 240 | " for time in interarrival_times:\n", 241 | " # Be sure to return something inside this for loop;\n", 242 | " # otherwise this for loop will run forever, since it\n", 243 | " # is iterating over an infinite list!\n", 244 | " raise NotImplementedError()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": { 250 | "editable": false 251 | }, 252 | "source": [ 253 | "### b)\n", 254 | "\n", 255 | "Approximate the probability that there are at least 8 strikes in the next 3 hours. \n", 256 | "\n", 257 | "(**Hint:** The exponential RVs represent the time between events. Define another RV for the time at which the 8th strike occurs, and use that RV to approximate the probability in question.)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 7, 263 | "metadata": { 264 | "autograder": { 265 | "comments": null, 266 | "id": "model3b", 267 | "score": null 268 | }, 269 | "deletable": false 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "# Type all of your code for this problem in this cell.\n", 274 | "# Feel free to add additional cells for scratch work, but they will not be graded." 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": { 280 | "editable": false 281 | }, 282 | "source": [ 283 | "## Comparison of models\n", 284 | "\n", 285 | "Review your answers for the three models. In each model, we made what appeared to be different assumptions. Does it seem that the distribution of the number of strikes is the same under each of these sets of assumptions? Discuss briefly." 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "autograder": { 292 | "comments": null, 293 | "id": "comparison", 294 | "score": null 295 | }, 296 | "deletable": false 297 | }, 298 | "source": [ 299 | "**TYPE YOUR EXPLANATION HERE.**" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": { 305 | "editable": false 306 | }, 307 | "source": [ 308 | "## Submission Instructions\n", 309 | "\n", 310 | "Before you submit this notebook, click the \"Kernel\" drop-down menu at the top of this page and select \"Restart & Run All\". This will ensure that all of the code in your notebook executes properly. Please fix any errors, and repeat the process until the entire notebook executes without any errors.\n" 311 | ] 312 | } 313 | ], 314 | "metadata": { 315 | "anaconda-cloud": {}, 316 | "celltoolbar": "Edit Metadata", 317 | "kernelspec": { 318 | "display_name": "Python 3", 319 | "language": "python", 320 | "name": "python3" 321 | }, 322 | "language_info": { 323 | "codemirror_mode": { 324 | "name": "ipython", 325 | "version": 3 326 | }, 327 | "file_extension": ".py", 328 | "mimetype": "text/x-python", 329 | "name": "python", 330 | "nbconvert_exporter": "python", 331 | "pygments_lexer": "ipython3", 332 | "version": "3.6.6" 333 | } 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 1 337 | } 338 | -------------------------------------------------------------------------------- /labs/Lab 5 - Central Limit Theorem.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Lab 5 - Central Limit Theorem" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This Jupyter notebook provides a template for you to fill in. Read the notebook from start to finish, completing the parts as indicated. To run a cell, make sure the cell is highlighted by clicking on it, then press SHIFT + ENTER on your keyboard. (Alternatively, you can click the \"play\" button in the toolbar above.)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "In this lab you will use the Symbulate package. You have seen most of the commands that you will use in previos labs, but remember to refer to the [documentation](https://dlsun.github.io/symbulate/index.html) for help. In particular, read the documentation on [Normal distributions](https://dlsun.github.io/symbulate/common_continuous.html#normal), the [standardize method](https://dlsun.github.io/symbulate/rv.html#standardize), and the [`**` (exponentiation) notation](https://dlsun.github.io/symbulate/probspace.html#Independent-probability-spaces) for drawing multiple values independently from a distribution. **You should Symbulate commands whenever possible.** If you find yourself writing long blocks of Python code, you are probably doing something wrong. For example, you should not need to write any `for` loops.\n", 22 | "\n", 23 | "**Warning:** You may notice that many of the cells in this notebook are not editable. This is intentional and for your own safety. We have made these cells read-only so that you don't accidentally modify or delete them. However, you should still be able to execute the code in these cells." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from symbulate import *\n", 33 | "%matplotlib inline" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Setup\n", 41 | "\n", 42 | "A random sample of $n$ customers at the Avenue is selected. Let $\\bar{X}$ represent the mean dollar amount spent by the $n$ customers in the sample. In this lab, you will investigate the distribution of $\\bar{X}$: how does the mean dollar amount spent vary over many samples of size $n$?\n", 43 | "\n", 44 | "Each of the parts assumes a different distribution for dollar amounts spent by individual customers. Within each part you will investigate how the distribution of the sample mean changes as the sample size increases.\n", 45 | "\n", 46 | "In each simulation, you should first define a probability space so that an outcome represents the $n$ individual dollar amounts spent by the customers in a random sample. You can assume the dollar amounts spent are independent from customer to customer, and each is amount is drawn from the specified distribution. (We say that the dollar amounts in a random sample are **independent and identically distributed (i.i.d.)**)\n", 47 | "\n", 48 | "In much of this lab, you will only need to make small modifications from question to question. But do make sure you take time to think about the output of each part before moving on. In particular, be sure to note the scale on the horizontal axis on your plots.\n", 49 | "\n", 50 | "You will run a simulation for each question, but there are some parts for which you should be able to derive the distribution analytically. You are encouraged to do this outside of class for practice.\n", 51 | "\n", 52 | "Some of the simulations will take some a minute or two to run, especially for the larger values of $n$, so please be patient. You might want to run `.sim(10)` first to make sure your code works, and then you can change to `.sim(10000)`." 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Part I\n", 60 | "\n", 61 | "Assume dollar amounts spent by individual customers can be modeled with a Normal distribution with mean 6.50 and standard deviation 1.71." 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## a)\n", 69 | "\n", 70 | "First assume just a single customer is selected at random, and let $X$ represent the dollar amount spent. Use simulation to:\n", 71 | "\n", 72 | "- Plot the approximate distribution of $X$\n", 73 | "- Estimate its expected value and standard deviation\n", 74 | "- Estimate the probability that $X$ is more than 2 standard deviations greater than its expected value." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 2, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# Type all of your code for this problem in this cell.\n", 84 | "# Feel free to add additional cells for scratch work, but they will not be graded." 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## b)\n", 92 | "\n", 93 | "Now $n=2$ customers are selected at random, and $\\bar{X}$ represents the mean dollar amount spent for the two customers. Use simulation to:\n", 94 | "\n", 95 | "- Plot the approximate distribution of $\\bar{X}$\n", 96 | "- Estimate its expected value and standard deviation\n", 97 | "- Estimate the probability that $\\bar{X}$ is more than 2 standard deviations greater than its expected value." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 3, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# Type all of your code for this problem in this cell.\n", 107 | "# Feel free to add additional cells for scratch work, but they will not be graded." 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## c)\n", 115 | "\n", 116 | "Repeat part b) with $n=5$" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 4, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Type all of your code for this problem in this cell.\n", 126 | "# Feel free to add additional cells for scratch work, but they will not be graded." 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "## d)\n", 134 | "\n", 135 | "Repeat part b) with $n=30$" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 5, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# Type all of your code for this problem in this cell.\n", 145 | "# Feel free to add additional cells for scratch work, but they will not be graded." 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## e)\n", 153 | "\n", 154 | "Repeat part b) with $n=100$" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 6, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "# Type all of your code for this problem in this cell.\n", 164 | "# Feel free to add additional cells for scratch work, but they will not be graded." 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## f)\n", 172 | "\n", 173 | "How does increasing the sample size $n$ affect the distribution of $\\bar{X}$? " 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "**TYPE YOUR RESPONSE HERE.**" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "## Part II\n", 188 | "\n", 189 | "Assume the dollar amount spent by any individual customer is equally likely to be 4, 5, 6, 7, 8, or 9." 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## a)\n", 197 | "\n", 198 | "First assume just a single customer is selected at random, and let $X$ represent the dollar amount spent. Use simulation to:\n", 199 | "\n", 200 | "- Plot the approximate distribution of $X$\n", 201 | "- Estimate its expected value and standard deviation\n", 202 | "- Estimate the probability that $X$ is more than 2 standard deviations greater than its expected value." 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 7, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "# Type all of your code for this problem in this cell.\n", 212 | "# Feel free to add additional cells for scratch work, but they will not be graded." 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## b)\n", 220 | "\n", 221 | "Now $n=2$ customers are selected at random, and $\\bar{X}$ represents the mean dollar amount spent for the two customers. Use simulation to:\n", 222 | "\n", 223 | "- Plot the approximate distribution of $\\bar{X}$\n", 224 | "- Estimate its expected value and standard deviation\n", 225 | "- Estimate the probability that $\\bar{X}$ is more than 2 standard deviations greater than its expected value." 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 8, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "# Type all of your code for this problem in this cell.\n", 235 | "# Feel free to add additional cells for scratch work, but they will not be graded." 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "## c)\n", 243 | "\n", 244 | "Repeat part b) with $n=5$" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 9, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "# Type all of your code for this problem in this cell.\n", 254 | "# Feel free to add additional cells for scratch work, but they will not be graded." 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "## d)\n", 262 | "\n", 263 | "Repeat part b) with $n=30$" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 10, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# Type all of your code for this problem in this cell.\n", 273 | "# Feel free to add additional cells for scratch work, but they will not be graded." 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "## e)\n", 281 | "\n", 282 | "Repeat part b) with $n=100$" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 11, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "# Type all of your code for this problem in this cell.\n", 292 | "# Feel free to add additional cells for scratch work, but they will not be graded." 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "## f)\n", 300 | "\n", 301 | "How does increasing the sample size $n$ affect the distribution of $\\bar{X}$? " 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "**TYPE YOUR RESPONSE HERE.**" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "## Part III\n", 316 | "\n", 317 | "Assume the dollar amount spent by any individual customer has an Exponential distribution with mean 6.50" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "## a)\n", 325 | "\n", 326 | "First assume just a single customer is selected at random, and let $X$ represent the dollar amount spent. Use simulation to:\n", 327 | "\n", 328 | "- Plot the approximate distribution of $X$\n", 329 | "- Estimate its expected value and standard deviation\n", 330 | "- Estimate the probability that $X$ is more than 2 standard deviations greater than its expected value." 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 12, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "# Type all of your code for this problem in this cell.\n", 340 | "# Feel free to add additional cells for scratch work, but they will not be graded." 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "## b)\n", 348 | "\n", 349 | "Now $n=2$ customers are selected at random, and $\\bar{X}$ represents the mean dollar amount spent for the two customers. Use simulation to:\n", 350 | "\n", 351 | "- Plot the approximate distribution of $\\bar{X}$\n", 352 | "- Estimate its expected value and standard deviation\n", 353 | "- Estimate the probability that $\\bar{X}$ is more than 2 standard deviations greater than its expected value." 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 13, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "# Type all of your code for this problem in this cell.\n", 363 | "# Feel free to add additional cells for scratch work, but they will not be graded." 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "## c)\n", 371 | "\n", 372 | "Repeat part b) with $n=5$" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 14, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "# Type all of your code for this problem in this cell.\n", 382 | "# Feel free to add additional cells for scratch work, but they will not be graded." 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": {}, 388 | "source": [ 389 | "## d)\n", 390 | "\n", 391 | "Repeat part b) with $n=30$" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 15, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "# Type all of your code for this problem in this cell.\n", 401 | "# Feel free to add additional cells for scratch work, but they will not be graded." 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "## e)\n", 409 | "\n", 410 | "Repeat part b) with $n=100$" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 16, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "# Type all of your code for this problem in this cell.\n", 420 | "# Feel free to add additional cells for scratch work, but they will not be graded." 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "## f)\n", 428 | "\n", 429 | "How does increasing the sample size $n$ affect the distribution of $\\bar{X}$? " 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "**TYPE YOUR RESPONSE HERE.**" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "## Part IV\n", 444 | "\n", 445 | "Now suppose that for 99% of the customers, the amounts spent follow the distribution in Part II, but the remaining 1% of customers spend 31 dollars (maybe they treat a few friends to lunch). (Hint: use a BoxModel.)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "## a)\n", 453 | "\n", 454 | "First assume just a single customer is selected at random, and let $X$ represent the dollar amount spent. Use simulation to:\n", 455 | "\n", 456 | "- Plot the approximate distribution of $X$\n", 457 | "- Estimate its expected value and standard deviation\n", 458 | "- Estimate the probability that $X$ is more than 2 standard deviations greater than its expected value." 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 17, 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "# Type all of your code for this problem in this cell.\n", 468 | "# Feel free to add additional cells for scratch work, but they will not be graded." 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "## b)\n", 476 | "\n", 477 | "Now $n=2$ customers are selected at random, and $\\bar{X}$ represents the mean dollar amount spent for the two customers. Use simulation to:\n", 478 | "\n", 479 | "- Plot the approximate distribution of $\\bar{X}$\n", 480 | "- Estimate its expected value and standard deviation\n", 481 | "- Estimate the probability that $\\bar{X}$ is more than 2 standard deviations greater than its expected value." 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 18, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "# Type all of your code for this problem in this cell.\n", 491 | "# Feel free to add additional cells for scratch work, but they will not be graded." 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": {}, 497 | "source": [ 498 | "## c)\n", 499 | "\n", 500 | "Repeat part b) with $n=5$" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 19, 506 | "metadata": {}, 507 | "outputs": [], 508 | "source": [ 509 | "# Type all of your code for this problem in this cell.\n", 510 | "# Feel free to add additional cells for scratch work, but they will not be graded." 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "## d)\n", 518 | "\n", 519 | "Repeat part b) with $n=30$" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 20, 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [ 528 | "# Type all of your code for this problem in this cell.\n", 529 | "# Feel free to add additional cells for scratch work, but they will not be graded." 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "## e)\n", 537 | "\n", 538 | "Repeat part b) with $n=100$" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "## f)\n", 546 | "\n", 547 | "How does increasing the sample size $n$ affect the distribution of $\\bar{X}$? " 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": {}, 553 | "source": [ 554 | "**TYPE YOUR RESPONSE HERE.**" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": {}, 560 | "source": [ 561 | "## Part V\n", 562 | "\n", 563 | "Review your work from the previous parts. Write a few sentences summarizing what you have learned about the distribution of the sample mean of a random sample. Be sure to consider shape, expected value, and standard deviation of the distribution." 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "**TYPE YOUR RESPONSE HERE.**" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "## Submission Instructions\n", 578 | "\n", 579 | "Before you submit this notebook, click the \"Kernel\" drop-down menu at the top of this page and select \"Restart & Run All\". This will ensure that all of the code in your notebook executes properly. Please fix any errors, and repeat the process until the entire notebook executes without any errors." 580 | ] 581 | } 582 | ], 583 | "metadata": { 584 | "kernelspec": { 585 | "display_name": "Python 3", 586 | "language": "python", 587 | "name": "python3" 588 | }, 589 | "language_info": { 590 | "codemirror_mode": { 591 | "name": "ipython", 592 | "version": 3 593 | }, 594 | "file_extension": ".py", 595 | "mimetype": "text/x-python", 596 | "name": "python", 597 | "nbconvert_exporter": "python", 598 | "pygments_lexer": "ipython3", 599 | "version": "3.6.6" 600 | } 601 | }, 602 | "nbformat": 4, 603 | "nbformat_minor": 2 604 | } 605 | -------------------------------------------------------------------------------- /labs/Lab 6 - Joint and Conditional Distributions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Lab 6 - Joint and Conditional Distributions" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This Jupyter notebook provides a template for you to fill in. Read the notebook from start to finish, completing the parts as indicated. To run a cell, make sure the cell is highlighted by clicking on it, then press SHIFT + ENTER on your keyboard. (Alternatively, you can click the \"play\" button in the toolbar above.)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "In this lab you will use the Symbulate package. Many of the commands are discussed in the [Multiple RV Section](https://dlsun.github.io/symbulate/joint.html), the [Conditioning Section](https://dlsun.github.io/symbulate/conditioning.html), or the [Graphics Section](https://dlsun.github.io/symbulate/graphics.html) of the [Symbulate documentation](https://dlsun.github.io/symbulate/index.html). **You should use Symbulate commands whenever possible.** If you find yourself writing long blocks of Python code, you are probably doing something wrong. For example, you should not need to write any `for` loops.\n", 22 | "\n", 23 | "There are 3 parts, and at the end of each part there are some reflection questions. There is no need to type a response to the reflection questions, but you should think about them and discuss them with your partner to try to make sense of your simulation results.\n", 24 | "\n", 25 | "**Warning:** You may notice that many of the cells in this notebook are not editable. This is intentional and for your own safety. We have made these cells read-only so that you don't accidentally modify or delete them. However, you should still be able to execute the code in these cells." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from symbulate import *\n", 35 | "%matplotlib inline" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "# Part I: Two Discrete random variables" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Roll a fair six-sided die five times and let $X$ be the largest of the five rolls and $Y$ the smallest.\n", 50 | "\n", 51 | "Before proceeding, make some guesses about how the following will behave.\n", 52 | "- Joint distribution of $X$ and $Y$\n", 53 | "- Conditional distribution of $Y$ given $X=5$." 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "# a)\n", 61 | "\n", 62 | "Define the random variables $X$ and $Y$." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 2, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Type all of your code for this problem in this cell.\n", 72 | "# Feel free to add additional cells for scratch work, but they will not be graded." 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "# b)\n", 80 | "\n", 81 | "Simulate 10000 $(X, Y)$ pairs and store the values as `xy`. Estimate the covariance and the correlation. ([Hint](https://dlsun.github.io/symbulate/joint.html#ampersand) and [hint](https://dlsun.github.io/symbulate/joint.html#cov) and [hint](https://dlsun.github.io/symbulate/joint.html#corr))" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 3, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# Type all of your code for this problem in this cell.\n", 91 | "# Feel free to add additional cells for scratch work, but they will not be graded." 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "# c)\n", 99 | "\n", 100 | "Make a scatterplot of the simulated values. ([Hint](https://dlsun.github.io/symbulate/joint.html#plot). Note that it is recommnded to use `jitter=True` when the variables involved are discrete.)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 4, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# Type all of your code for this problem in this cell.\n", 110 | "# Feel free to add additional cells for scratch work, but they will not be graded." 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "# d)\n", 118 | "\n", 119 | "Make a tile plot of the simulated values. ([Hint](https://dlsun.github.io/symbulate/graphics.html#tile))" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# Type all of your code for this problem in this cell.\n", 129 | "# Feel free to add additional cells for scratch work, but they will not be graded." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "# e)\n", 137 | "\n", 138 | "Use simulation to approximate the conditional distribution of $Y$ given $X=5$ and approximate the conditional mean $E(Y | X=5)$ and the conditional standard deviation. ([Hint](https://dlsun.github.io/symbulate/conditioning.html#pipe), but also see all of the [Conditioning Section](https://dlsun.github.io/symbulate/conditioning.html).)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 6, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "# Type all of your code for this problem in this cell.\n", 148 | "# Feel free to add additional cells for scratch work, but they will not be graded." 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "# f) Reflection questions\n", 156 | "\n", 157 | "Recall the guesses you made at the start of the problem, and inspect your results from the previous parts. Can you explain the behavior you observed for the following?\n", 158 | "\n", 159 | "- Joint distribution of $X$ and $Y$\n", 160 | "- Conditional distribution of $Y$ given $X=5$." 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "**TYPE YOUR RESPONSE HERE.**" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "# Part II: Two continuous random variables" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "source": [ 183 | "Suppose that the base $U$ and height $V$ of a random rectangle are independent random variables, with each following a Uniform(0, 1) distribution. Let $X$ be the perimeter of the rectangle and $Y$ its area. In this part you will investigate the joint distribution of $X$ and $Y$.\n", 184 | "\n", 185 | "Before proceeding, make some guesses about how the following will behave.\n", 186 | "- Joint distribution of $X$ and $Y$\n", 187 | "- Marginal distribution of $Y$\n", 188 | "- Conditional distribution of $Y$ given $X=2$." 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "# a)\n", 196 | "\n", 197 | "Define appropriate random variables $U, V, X, Y$. ([Hint](https://dlsun.github.io/symbulate/joint.html#unpack), but also see the [Multiple RV Section](https://dlsun.github.io/symbulate/joint.html) in general.)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 7, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# Type all of your code for this problem in this cell.\n", 207 | "# Feel free to add additional cells for scratch work, but they will not be graded." 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "# b)\n", 215 | "\n", 216 | "Simulate 10000 $(X, Y)$ pairs and store the values as `xy`. Estimate the covariance and the correlation. ([Hint](https://dlsun.github.io/symbulate/joint.html#ampersand) and [hint](https://dlsun.github.io/symbulate/joint.html#cov) and [hint](https://dlsun.github.io/symbulate/joint.html#corr))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 8, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "# Type all of your code for this problem in this cell.\n", 226 | "# Feel free to add additional cells for scratch work, but they will not be graded." 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "# c)\n", 234 | "\n", 235 | "Make a scatterplot of the simulated values. ([Hint](https://dlsun.github.io/symbulate/graphics.html#scatter))" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 9, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "# Type all of your code for this problem in this cell.\n", 245 | "# Feel free to add additional cells for scratch work, but they will not be graded." 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "# d)\n", 253 | "\n", 254 | "Make a two-dimensional histogram of the simulated values. ([Hint](https://dlsun.github.io/symbulate/graphics.html#hist2d))" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 10, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "# Type all of your code for this problem in this cell.\n", 264 | "# Feel free to add additional cells for scratch work, but they will not be graded." 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "# e)\n", 272 | "\n", 273 | "Make a two-dimensional density plot of the simulated values. ([Hint](https://dlsun.github.io/symbulate/graphics.html#density2d))" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 11, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "# Type all of your code for this problem in this cell.\n", 283 | "# Feel free to add additional cells for scratch work, but they will not be graded." 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "# f)\n", 291 | "\n", 292 | "Use simulation to approximate the marginal distribution of $Y$ and approximate its mean and standard deviation." 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 12, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "# Type all of your code for this problem in this cell.\n", 302 | "# Feel free to add additional cells for scratch work, but they will not be graded." 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "# g)\n", 310 | "\n", 311 | "Use simulation to approximate the conditional distribution of $Y$ given $X=2$ and approximate the conditional mean $E(Y | X=2)$ and the conditional standard deviation. (Warning: be careful! See this [hint](https://dlsun.github.io/symbulate/conditioning.html#pipe) and especially this [hint](https://dlsun.github.io/symbulate/conditioning.html#continuous).)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 13, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "# Type all of your code for this problem in this cell.\n", 321 | "# Feel free to add additional cells for scratch work, but they will not be graded." 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "# h) Reflection questions\n", 329 | "\n", 330 | "Recall the guesses you made at the start of the problem, and inspect your results from the previous parts. Can you explain the behavior you observed for the following?\n", 331 | "\n", 332 | "- Joint distribution of $X$ and $Y$\n", 333 | "- Marginal distribution of $Y$\n", 334 | "- Conditional distribution of $Y$ given $X=2$." 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "**TYPE YOUR RESPONSE HERE.**" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "# Part III: Joint Gaussian random variables" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "Just like Gaussian (Normal) distributions are the most important probability distributions, joint Gaussian (Multivariate Normal) distributions are the most important joint distributions. In this part you will investigate two random variables which have a joint Gaussian distribution.\n", 356 | "\n", 357 | "Suppose that SAT Math ($M$) and Reading ($R$) scores of CalPoly students have a Bivariate Normal\n", 358 | "(joint Gaussian) distribution.\n", 359 | "- Math scores have mean 635 and SD 85.\n", 360 | "- Reading scores have mean 595 and SD 70.\n", 361 | "- The correlation between scores is 0.6.\n", 362 | "\n", 363 | "Let $X = M + R$, the total of the two scores. Let $Y = M- R$, the difference between Math and Reading scores." 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "# a)\n", 371 | "\n", 372 | "Define RVs $M, R, X, Y$. ([Hint](https://dlsun.github.io/symbulate/common_joint.html#bvn))" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 14, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "# Type all of your code for this problem in this cell.\n", 382 | "# Feel free to add additional cells for scratch work, but they will not be graded." 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": {}, 388 | "source": [ 389 | "# b)\n", 390 | "\n", 391 | "Simulate 10000 $(M, R)$ pairs. Use the simulation results to approximate $E(M)$, $E(R)$, $SD(M)$, $SD(R)$, and $Corr(M, R)$. " 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 15, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "# Type all of your code for this problem in this cell.\n", 401 | "# Feel free to add additional cells for scratch work, but they will not be graded." 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "# c)\n", 409 | "\n", 410 | "Make a scatterplot of the simulated values. Add histograms of the marginal distributions. (Hint: `.plot(type=[\"scatter\", \"marginal\"])`." 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 16, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "# Type all of your code for this problem in this cell.\n", 420 | "# Feel free to add additional cells for scratch work, but they will not be graded." 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "# d)\n", 428 | "\n", 429 | "Make a density plot of the simulated values. Add density plots of the marginal distributions. (Hint: `.plot(type=[\"density\", \"marginal\"])`." 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 17, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "# Type all of your code for this problem in this cell.\n", 439 | "# Feel free to add additional cells for scratch work, but they will not be graded." 440 | ] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": {}, 445 | "source": [ 446 | "# e)\n", 447 | "\n", 448 | "Now simulate 10000 values of $X = M+R$. Plot the approximate distribution of $X$ and estimate $E(X)$ and $SD(X)$." 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 18, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [ 457 | "# Type all of your code for this problem in this cell.\n", 458 | "# Feel free to add additional cells for scratch work, but they will not be graded." 459 | ] 460 | }, 461 | { 462 | "cell_type": "markdown", 463 | "metadata": { 464 | "collapsed": true 465 | }, 466 | "source": [ 467 | "# f)\n", 468 | "\n", 469 | "Now simulate 10000 values of $Y = M - R$. Plot the approximate distribution of $Y$ and estimate $E(Y)$ and $SD(Y)$." 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 19, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [ 478 | "# Type all of your code for this problem in this cell.\n", 479 | "# Feel free to add additional cells for scratch work, but they will not be graded." 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "# g)\n", 487 | "\n", 488 | "Use simulation to approximate the distribution of $M$ given $R=700$. Make a plot of the approximate distribution and estimate the conditional mean $E(M | R = 700)$ and the conditional standard deviation. (Warning: be careful! See this [hint](https://dlsun.github.io/symbulate/conditioning.html#pipe) and especially this [hint](https://dlsun.github.io/symbulate/conditioning.html#continuous).)" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 20, 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "# Type all of your code for this problem in this cell.\n", 498 | "# Feel free to add additional cells for scratch work, but they will not be graded." 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "# h) Reflection questions\n", 506 | "\n", 507 | "Inspect your results from the previous parts.\n", 508 | "\n", 509 | "- How would you describe the shape of the scatterplot/density plot of $M$ and $R$?\n", 510 | "- How would you describe the marginal distributions of $M$ and $R$?\n", 511 | "- How does the distribution of $M+R$ compare to the distribution of $M-R$? In particular, how do the SDs compare? How do the SDs compare to the case when $M$ and $R$ are independent? Can you explain why this makes sense?\n", 512 | "- How would you describe the conditional distribution of $M$ given $R=700$? How does it compare to the marginal distribution of $M$? Can you explain why this makes sense? Be sure to consider mean and sd." 513 | ] 514 | }, 515 | { 516 | "cell_type": "markdown", 517 | "metadata": {}, 518 | "source": [ 519 | "**TYPE YOUR RESPONSE HERE.**" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": {}, 525 | "source": [ 526 | "## Submission Instructions\n", 527 | "\n", 528 | "Before you submit this notebook, click the \"Kernel\" drop-down menu at the top of this page and select \"Restart & Run All\". This will ensure that all of the code in your notebook executes properly. Please fix any errors, and repeat the process until the entire notebook executes without any errors." 529 | ] 530 | } 531 | ], 532 | "metadata": { 533 | "kernelspec": { 534 | "display_name": "Python 3", 535 | "language": "python", 536 | "name": "python3" 537 | }, 538 | "language_info": { 539 | "codemirror_mode": { 540 | "name": "ipython", 541 | "version": 3 542 | }, 543 | "file_extension": ".py", 544 | "mimetype": "text/x-python", 545 | "name": "python", 546 | "nbconvert_exporter": "python", 547 | "pygments_lexer": "ipython3", 548 | "version": "3.6.6" 549 | } 550 | }, 551 | "nbformat": 4, 552 | "nbformat_minor": 2 553 | } 554 | -------------------------------------------------------------------------------- /labs/Lab 7 - Stochastic Processes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Symbulate Lab 7 - Stochastic Processes" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This Jupyter notebook provides a template for you to fill in. Read the notebook from start to finish, completing the parts as indicated. To run a cell, make sure the cell is highlighted by clicking on it, then press SHIFT + ENTER on your keyboard. (Alternatively, you can click the \"play\" button in the toolbar above.)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "In this lab you will use the Symbulate package. Many of the new commands are discussed in the [Random processes](https://dlsun.github.io/symbulate/process.html) section of the [Symbulate documentation](https://dlsun.github.io/symbulate/index.html). **You should use Symbulate commands whenever possible.** If you find yourself writing long blocks of Python code, you are probably doing something wrong. For example, you should not need to write any *long* `for` loops (though you will need to write a simple `for` loop in Problem 1 part a).\n", 22 | "\n", 23 | "There are 2 parts, and at the end of each part there are some reflection questions. There is no need to type a response to the reflection questions, but you should think about them and discuss them with your partner to try to make sense of your simulation results.\n", 24 | "\n", 25 | "**Warning:** You may notice that many of the cells in this notebook are not editable. This is intentional and for your own safety. We have made these cells read-only so that you don't accidentally modify or delete them. However, you should still be able to execute the code in these cells." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from symbulate import *\n", 35 | "%matplotlib inline" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Problem 1.\n", 43 | "\n", 44 | "Here is one example of a discrete time, continuous state process. Suppose that $X_0 = 0$ and for $n = 0, 1, 2, \\ldots$,\n", 45 | "$$\n", 46 | "X_{n+1} = 0.5 X_n + Z_{n}\n", 47 | "$$\n", 48 | "where $Z_1, Z_2, \\ldots$ are i.i.d. $N(0,1)$. Such a process is called an *autoregressive* process (of order 1)." 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### a)\n", 56 | "\n", 57 | "Define in Symbulate the $X$ process, for time steps $n= 0, 1,2, \\ldots, 20$. Hint: [this example](https://dlsun.github.io/symbulate/process.html#rw) should be very helpful.\n", 58 | "\n", 59 | "- Define a probability space `P` corresponding to an infinite sequence of i.i.d. $N(0, 1)$ values. (Hint: [bottom of this page](https://dlsun.github.io/symbulate/probspace.html#indep).)\n", 60 | "- Define an `RV` `Z` on the probability space `P`; each component of `Z` can be indexed with brackets `[]` , e.g. `Z[0]`, `Z[1]`, etc.\n", 61 | "- Define a [`RandomProcess`](https://dlsun.github.io/symbulate/process.html#time) `X` on `P`.\n", 62 | "Write a simple `for` loop to define the value of `X[n+1]` based `X[n]` and `Z[n]`." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 2, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Type all of your code for this problem in this cell.\n", 72 | "# Feel free to add additional cells for scratch work, but they will not be graded." 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "### b)\n", 80 | "\n", 81 | "Simulate and plot a single sample path for n= 0, ..., 20. (Hint: [see the plots here](https://dlsun.github.io/symbulate/process.html#Xt). You might need to change `alpha = ` if the plot is too light.)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 3, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# Type all of your code for this problem in this cell.\n", 91 | "# Feel free to add additional cells for scratch work, but they will not be graded." 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### c)\n", 99 | "\n", 100 | "Simulate and plot 100 sample paths." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 4, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# Type all of your code for this problem in this cell.\n", 110 | "# Feel free to add additional cells for scratch work, but they will not be graded." 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### d)\n", 118 | "\n", 119 | "Simulate and plot the distribution of $X_5$, and approximate its mean and variance. ([Hint](https://dlsun.github.io/symbulate/process.html#value).)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# Type all of your code for this problem in this cell.\n", 129 | "# Feel free to add additional cells for scratch work, but they will not be graded." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### e)\n", 137 | "\n", 138 | "Simulate and plot the distribution of $X_{10}$, and approximate its mean and variance.\n", 139 | "\n", 140 | "**Reflection question:** How does the distribution of $X_{10}$ compare to that of $X_5$?" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 6, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "# Type all of your code for this problem in this cell.\n", 150 | "# Feel free to add additional cells for scratch work, but they will not be graded." 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "### f) \n", 158 | "\n", 159 | "Simulate and make a histogram or density plot of the joint distribution of $X_5$ and $X_{10}$, and approximate its correlation and covariance." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 7, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# Type all of your code for this problem in this cell.\n", 169 | "# Feel free to add additional cells for scratch work, but they will not be graded." 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "### g) \n", 177 | "\n", 178 | "Simulate and make a histogram or density plot of the joint distribution of $X_5$ and $X_6$, and approximate its correlation and covariance.\n", 179 | "\n", 180 | "**Reflection question:** How does the joint distribution of $X_5$ and $X_6$ compare to that of $X_5$ and $X_{10}$?" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 8, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "# Type all of your code for this problem in this cell.\n", 190 | "# Feel free to add additional cells for scratch work, but they will not be graded." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "### h) \n", 198 | "\n", 199 | "Simulate and make a histogram of density plot of the joint distribution of $X_{10}$ and $X_{15}$, and approximate its correlation and covariance.\n", 200 | "\n", 201 | "**Reflection question:** How does the joint distribution of $X_{10}$ and $X_{15}$ compare to that of $X_{5}$ and $X_{10}$?" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 9, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "# Type all of your code for this problem in this cell.\n", 211 | "# Feel free to add additional cells for scratch work, but they will not be graded." 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "### i) \n", 219 | "\n", 220 | "Simulate and make a histogram or density plot of the joint distribution of $X_{10}$ and $X_{11}$, and approximate its correlation and covariance.\n", 221 | "\n", 222 | "**Reflection question:** How does the joint distribution of $X_{10}$ and $X_{11}$ compare to that of $X_{5}$ and $X_{6}$?" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 10, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "# Type all of your code for this problem in this cell.\n", 232 | "# Feel free to add additional cells for scratch work, but they will not be graded." 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "### j) More reflection questions\n", 240 | "\n", 241 | "Does the $X$ process appear to be stationary? Note: The process starts with $X_0=0$ so it is technically not stationary. But aside from the first few times, does it appear that the process is stationary?" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## Problem 2)\n", 249 | "\n", 250 | "Consider a random signal with both a random amplitude and a random \"phase shift\".\n", 251 | "$$\n", 252 | "X(t) = A\\cos(2\\pi t + \\Theta)\n", 253 | "$$\n", 254 | "where $A$ and $\\Theta$ are independent, $A$ is equally likely to be 0.5, 1, or 2, and $\\Theta$ has a Uniform(0,$2\\pi$) distribution.\n", 255 | "\n", 256 | "Note: to define $\\cos(2\\pi t)$ in Python, use `cos(2 * pi * t)`" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "### a)\n", 264 | "\n", 265 | "\n", 266 | "Define in Symbulate the $X$ process.\n", 267 | "\n", 268 | "- Hint: use `ContinuousTimeFunction` to define the deterministic process $f(t)=t$.\n", 269 | "- However, to do some of the parts below, you'll need to explicitly define `RV` for $A$ and $\\Theta$ like [here](https://dlsun.github.io/symbulate/joint.html#unpack).\n", 270 | "- Define the probability space `P` for $A$ and $\\Theta$ and then define `RV` on that space and also `RandomProcess` on that space." 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 11, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "# Type all of your code for this problem in this cell.\n", 280 | "# Feel free to add additional cells for scratch work, but they will not be graded." 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "### b)\n", 288 | "\n", 289 | "Simulate and plot a single sample path for $0\\le t \\le 3$. (You might need to change `alpha = ` if the plot is too light.)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 12, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "# Type all of your code for this problem in this cell.\n", 299 | "# Feel free to add additional cells for scratch work, but they will not be graded." 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "### c)\n", 307 | "\n", 308 | "Simulate and plot 100 sample paths." 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 13, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "# Type all of your code for this problem in this cell.\n", 318 | "# Feel free to add additional cells for scratch work, but they will not be graded." 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "### d)\n", 326 | "\n", 327 | "Simulate and plot the conditional distribution of $X(1)$ given $A = 1$, and approximate its mean and variance. (Remember [this](https://dlsun.github.io/symbulate/conditioning.html#conditioning).)\n", 328 | "\n", 329 | "**Reflection question:** You might suspect that since $\\Theta$ has a uniform distribution then $X(1)$ has a uniform distribution when $A=1$. But is that true?" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 14, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "# Type all of your code for this problem in this cell.\n", 339 | "# Feel free to add additional cells for scratch work, but they will not be graded." 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "### e)\n", 347 | "\n", 348 | "Simulate and plot the distribution of $X(1)$, and approximate its mean and variance.\n", 349 | "\n", 350 | "**Reflection question:** Can you explain the shape?" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 15, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "# Type all of your code for this problem in this cell.\n", 360 | "# Feel free to add additional cells for scratch work, but they will not be graded." 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "### f)\n", 368 | "\n", 369 | "Simulate and plot the joint distribution of $X(1)$ and $X(2)$ and approximate the covariance and correlation.\n", 370 | "\n", 371 | "**Reflection question:** Can you explain why the scatterplot looks the way it does?)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 16, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "# Type all of your code for this problem in this cell.\n", 381 | "# Feel free to add additional cells for scratch work, but they will not be graded." 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "### g)\n", 389 | "\n", 390 | "Simulate and plot the joint distribution of $X(1)$ and $X(1.5)$ and approximate the covariance and correlation.\n", 391 | "\n", 392 | "**Reflection question:** Can you explain why the scatterplot looks the way it does?" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 17, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "# Type all of your code for this problem in this cell.\n", 402 | "# Feel free to add additional cells for scratch work, but they will not be graded." 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": {}, 408 | "source": [ 409 | "### h)\n", 410 | "\n", 411 | "Simulate and plot the joint distribution of $X(1)$ and $X(1.25)$ and approximate the covariance and correlation.\n", 412 | "\n", 413 | "**Reflection question:** Can you explain why the scatterplot looks the way it does?" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 18, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "# Type all of your code for this problem in this cell.\n", 423 | "# Feel free to add additional cells for scratch work, but they will not be graded." 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "## Submission Instructions\n", 431 | "\n", 432 | "Before you submit this notebook, click the \"Kernel\" drop-down menu at the top of this page and select \"Restart & Run All\". This will ensure that all of the code in your notebook executes properly. Please fix any errors, and repeat the process until the entire notebook executes without any errors." 433 | ] 434 | } 435 | ], 436 | "metadata": { 437 | "kernelspec": { 438 | "display_name": "Python 3", 439 | "language": "python", 440 | "name": "python3" 441 | }, 442 | "language_info": { 443 | "codemirror_mode": { 444 | "name": "ipython", 445 | "version": 3 446 | }, 447 | "file_extension": ".py", 448 | "mimetype": "text/x-python", 449 | "name": "python", 450 | "nbconvert_exporter": "python", 451 | "pygments_lexer": "ipython3", 452 | "version": "3.6.6" 453 | } 454 | }, 455 | "nbformat": 4, 456 | "nbformat_minor": 2 457 | } 458 | -------------------------------------------------------------------------------- /labs/README.md: -------------------------------------------------------------------------------- 1 | This folder contains some sample Symbulate lab activities. 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="symbulate", 5 | version="0.5.5", 6 | 7 | description="A symbolic algebra for specifying simulations.", 8 | 9 | url="https://github.com/dlsun/symbulate", 10 | 11 | author="Dennis Sun", 12 | author_email="dsun09@calpoly.edu", 13 | 14 | license="GPLv3", 15 | 16 | classifiers=[ 17 | 'Development Status :: 3 - Alpha', 18 | 'Intended Audience :: Education', 19 | 'Topic :: Scientific/Engineering :: Mathematics', 20 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 21 | 'Programming Language :: Python :: 3', 22 | ], 23 | 24 | keywords='probability simulation', 25 | 26 | packages=find_packages(), 27 | 28 | install_requires=[ 29 | 'numpy', 30 | 'scipy', 31 | 'matplotlib' 32 | ] 33 | ) 34 | -------------------------------------------------------------------------------- /symbulate/__init__.py: -------------------------------------------------------------------------------- 1 | from .probability_space import ProbabilitySpace, BoxModel, DeckOfCards 2 | from .random_variables import RV 3 | from .random_processes import RandomProcess 4 | from .distributions import ( 5 | Bernoulli, 6 | Binomial, 7 | Hypergeometric, 8 | Geometric, 9 | NegativeBinomial, 10 | Pascal, 11 | Poisson, 12 | DiscreteUniform, 13 | Uniform, 14 | Normal, 15 | Exponential, 16 | Gamma, 17 | Beta, 18 | StudentT, 19 | ChiSquare, 20 | F, 21 | Cauchy, 22 | LogNormal, 23 | Pareto, 24 | Rayleigh, 25 | MultivariateNormal, 26 | BivariateNormal, 27 | Multinomial 28 | ) 29 | from .independence import AssumeIndependent 30 | from .index_sets import ( 31 | Naturals, 32 | Integers, 33 | Reals, 34 | DiscreteTimeSequence 35 | ) 36 | from .result import ( 37 | Scalar, 38 | Vector, 39 | InfiniteVector, 40 | DiscreteTimeFunction, 41 | ContinuousTimeFunction, 42 | concat 43 | ) 44 | from .gaussian_process import ( 45 | GaussianProcess, 46 | GaussianProcessProbabilitySpace, 47 | BrownianMotion, 48 | BrownianMotionProbabilitySpace 49 | ) 50 | from .poisson_process import ( 51 | PoissonProcess, 52 | PoissonProcessProbabilitySpace 53 | ) 54 | from .markov_chains import ( 55 | MarkovChain, 56 | MarkovChainProbabilitySpace, 57 | ContinuousTimeMarkovChain, 58 | ContinuousTimeMarkovChainProbabilitySpace 59 | ) 60 | from .plot import figure, xlabel, ylabel, xlim, ylim, plot 61 | from .math import * 62 | -------------------------------------------------------------------------------- /symbulate/base.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | class Arithmetic: 7 | """A class with operations such as +, -, *, /. 8 | 9 | Subclasses must implement the _operation_factory method, 10 | which specifies how each operation acts on instances of 11 | that class. 12 | """ 13 | 14 | # e.g., X + Y or X + 3 15 | def __add__(self, other): 16 | op_func = self._operation_factory(lambda x, y: x + y) 17 | return op_func(self, other) 18 | 19 | # e.g., 3 + X 20 | def __radd__(self, other): 21 | return self.__add__(other) 22 | 23 | # e.g., X - Y or X - 3 24 | def __sub__(self, other): 25 | op_func = self._operation_factory(lambda x, y: x - y) 26 | return op_func(self, other) 27 | 28 | # e.g., 3 - X 29 | def __rsub__(self, other): 30 | return -1 * self.__sub__(other) 31 | 32 | # e.g., -X 33 | def __neg__(self): 34 | return -1 * self 35 | 36 | # e.g., X * Y or X * 2 37 | def __mul__(self, other): 38 | op_func = self._operation_factory(lambda x, y: x * y) 39 | return op_func(self, other) 40 | 41 | # e.g., 2 * X 42 | def __rmul__(self, other): 43 | return self.__mul__(other) 44 | 45 | # e.g., X / Y or X / 2 46 | def __truediv__(self, other): 47 | op_func = self._operation_factory(lambda x, y: x / y) 48 | return op_func(self, other) 49 | 50 | # e.g., 2 / X 51 | def __rtruediv__(self, other): 52 | op_func = self._operation_factory(lambda x, y: y / x) 53 | return op_func(self, other) 54 | 55 | # e.g., X ** 2 56 | def __pow__(self, other): 57 | op_func = self._operation_factory(lambda x, y: x ** y) 58 | return op_func(self, other) 59 | 60 | # e.g., 2 ** X 61 | def __rpow__(self, other): 62 | op_func = self._operation_factory(lambda x, y: y ** x) 63 | return op_func(self, other) 64 | 65 | # Alternative notation for powers: e.g., X ^ 2 66 | def __xor__(self, other): 67 | return self.__pow__(other) 68 | 69 | # Alternative notation for powers: e.g., 2 ^ X 70 | def __rxor__(self, other): 71 | return self.__rpow__(other) 72 | 73 | 74 | class Comparable: 75 | """A class with comparison operators such as <, >, and ==. 76 | 77 | Subclasses must implement the _comparison_factory method, 78 | which specifies how each comparison acts on instances of 79 | that class. 80 | """ 81 | 82 | def __eq__(self, other): 83 | op_func = self._comparison_factory(lambda x, y: x == y) 84 | return op_func(self, other) 85 | 86 | def __ne__(self, other): 87 | op_func = self._comparison_factory(lambda x, y: x != y) 88 | return op_func(self, other) 89 | 90 | def __lt__(self, other): 91 | op_func = self._comparison_factory(lambda x, y: x < y) 92 | return op_func(self, other) 93 | 94 | def __le__(self, other): 95 | op_func = self._comparison_factory(lambda x, y: x <= y) 96 | return op_func(self, other) 97 | 98 | def __gt__(self, other): 99 | op_func = self._comparison_factory(lambda x, y: x > y) 100 | return op_func(self, other) 101 | 102 | def __ge__(self, other): 103 | op_func = self._comparison_factory(lambda x, y: x >= y) 104 | return op_func(self, other) 105 | 106 | 107 | class Statistical: 108 | """A class with statistical functions, such as mean, var, etc. 109 | 110 | Subclasses must implement the _statistic_factory and 111 | _multivariate_statistic_factory methods, which specify how 112 | (univariate) statistics (e.g., mean and variance), as well as 113 | multivariate statistics (e.g., covariance and correlation) 114 | are calculated on the object. 115 | """ 116 | 117 | def sum(self): 118 | r"""Calculate the sum. 119 | 120 | .. math:: \frac{1}{n} \sum_{i=1}^n x_i 121 | 122 | Returns: 123 | The sum of the numbers. 124 | """ 125 | op_func = self._statistic_factory(np.sum) 126 | return op_func(self) 127 | 128 | def mean(self): 129 | r"""Calculate the mean (a.k.a. average). 130 | 131 | The mean, or average, is a measure of center. 132 | 133 | .. math:: \mu = \frac{1}{n} \sum_{i=1}^n x_i 134 | 135 | Returns: 136 | float: The mean of the numbers. 137 | """ 138 | op_func = self._statistic_factory(np.mean) 139 | return op_func(self) 140 | 141 | def quantile(self, q): 142 | r"""Calculate a specified quantile (percentile). 143 | 144 | The (100q)th quantile is the value x such that 145 | 146 | .. math:: \frac{\#\{ i: x_i \leq x \}}{n} = q 147 | 148 | Args: 149 | q (float): A number between 0 and 1 specifying 150 | the desired quantile or percentile. 151 | 152 | Returns: 153 | The (100q)th quantile of the numbers. 154 | """ 155 | op_func = self._statistic_factory( 156 | lambda **kwargs: np.percentile(q=q * 100, **kwargs) 157 | ) 158 | return op_func(self) 159 | 160 | def percentile(self, q): 161 | r"""Calculate a specified percentile. 162 | 163 | Alias for .quantile(). 164 | """ 165 | return self.quantile(q) 166 | 167 | def iqr(self): 168 | r"""Calculate the interquartile range (IQR). 169 | 170 | The IQR is the 75th percentile minus the 25th percentile. 171 | 172 | Returns: 173 | The interquartile range. 174 | """ 175 | return self.quantile(.75) - self.quantile(.25) 176 | 177 | def median(self): 178 | r"""Calculate the median. 179 | 180 | The median is the middle number in a *sorted* list. 181 | It is a measure of center. 182 | 183 | Returns: 184 | The median of the numbers. 185 | """ 186 | op_func = self._statistic_factory(np.median) 187 | return op_func(self) 188 | 189 | def std(self): 190 | r"""Calculate the standard deviation. 191 | 192 | The standard deviation is the square root of the variance. 193 | It is a measure of spread. 194 | 195 | .. math:: 196 | 197 | \sigma &= \sqrt{\frac{1}{n} \sum_{i=1}^n (x_i - \mu)^2} \\ 198 | &= \sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2 - \mu^2} 199 | 200 | Returns: 201 | float: The standard deviation of the numbers. 202 | """ 203 | op_func = self._statistic_factory(np.std) 204 | return op_func(self) 205 | 206 | def sd(self): 207 | r"""Calculate the standard deviation. 208 | 209 | The standard deviation is the square root of the variance. 210 | It is a measure of spread. 211 | 212 | .. math:: 213 | 214 | \sigma &= \sqrt{\frac{1}{n} \sum_{i=1}^n (x_i - \mu)^2} \\ 215 | &= \sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2 - \mu^2} 216 | 217 | Returns: 218 | float: The standard deviation of the numbers. 219 | """ 220 | return self.std() 221 | 222 | def var(self): 223 | r"""Calculate the variance. 224 | 225 | The variance is the average squared distance between 226 | each number and the mean. It is a measure of spread. 227 | 228 | .. math:: 229 | 230 | \sigma^2 &= \frac{1}{n} \sum_{i=1}^n (x_i - \mu)^2 \\ 231 | &= \frac{1}{n} \sum_{i=1}^n x_i^2 - \mu^2 232 | 233 | Returns: 234 | float: The variance of the numbers. 235 | """ 236 | op_func = self._statistic_factory(np.var) 237 | return op_func(self) 238 | 239 | def skew(self): 240 | r"""Calculate the skewness. 241 | 242 | Returns: 243 | The skewness of the numbers. 244 | """ 245 | op_func = self._statistic_factory(stats.skew) 246 | return op_func(self) 247 | 248 | def skewness(self): 249 | """Calculate the skewness. Alias for .skew()""" 250 | return self.skew() 251 | 252 | def kurtosis(self): 253 | r"""Calculate the kurtosis. 254 | 255 | Returns: 256 | The kurtosis of the numbers. 257 | """ 258 | op_func = self._statistic_factory(stats.kurtosis) 259 | return op_func(self) 260 | 261 | def max(self): 262 | r"""Calculate the maximum. 263 | 264 | Returns: 265 | The maximum of the numbers. 266 | """ 267 | op_func = self._statistic_factory(np.amax) 268 | return op_func(self) 269 | 270 | def min(self): 271 | r"""Calculate the minimum. 272 | 273 | Returns: 274 | The minimum of the numbers. 275 | """ 276 | op_func = self._statistic_factory(np.amin) 277 | return op_func(self) 278 | 279 | def min_max_diff(self): 280 | r"""Calculate the difference between the min and max. 281 | 282 | .. math:: \max - \min 283 | 284 | The min-max diff is also called the range. It is 285 | a measure of spread. 286 | 287 | Returns: 288 | The difference between the min and the max. 289 | """ 290 | return self.max() - self.min() 291 | 292 | def cov(self): 293 | r"""Calculate the pairwise covariances. 294 | 295 | The covariance is a measure of the relationship between two variables. 296 | The sign of the covariance indicates the direction of the relationship. 297 | 298 | .. math:: 299 | 300 | \sigma_{XY} = \frac{1}{n} \sum_{i=1}^n (x_i - \mu_X) (y_i - \mu_Y) 301 | 302 | Returns: 303 | The pairwise covariances between all dimensions. This is usually 304 | a scalar when there are only 2 dimensions and a matrix when 305 | there are more than 2 dimensions. 306 | """ 307 | op_func = self._multivariate_statistic_factory( 308 | lambda a: np.cov(a, rowvar=False, ddof=0) 309 | ) 310 | return op_func(self) 311 | 312 | def corr(self): 313 | r"""Calculate the pairwise correlations. 314 | 315 | The correlation is the covariance normalized by the standard deviations. 316 | 317 | .. math:: 318 | 319 | \rho_{XY} = \frac{1}{n} \sum_{i=1}^n \frac{x_i - \mu_X}{\sigma_X} \frac{y_i - \mu_Y}{\sigma_Y} 320 | 321 | Returns: 322 | The pairwise correlations between all dimensions. This is usually 323 | a scalar when there are only 2 dimensions and a matrix when 324 | there are more than 2 dimensions. 325 | """ 326 | op_func = self._multivariate_statistic_factory( 327 | lambda a: np.corrcoef(a, rowvar=False, ddof=0) 328 | ) 329 | return op_func(self) 330 | 331 | def corrcoef(self): 332 | r"""An alias for .corr()""" 333 | return self.corr() 334 | 335 | 336 | class Logical: 337 | """A class that supports logical operations: and, or, and not. 338 | 339 | Subclasses must implement the _logical_factory method, which 340 | specifies how the logical operator operates on two objects 341 | of that type. 342 | """ 343 | 344 | def __and__(self, other): 345 | op_func = self._logical_factory(lambda x, y: x and y) 346 | return op_func(self, other) 347 | 348 | def __or__(self, other): 349 | op_func = self._logical_factory(lambda x, y: x or y) 350 | return op_func(self, other) 351 | 352 | def __invert__(self): 353 | op_func = self._logical_factory(lambda x: not x) 354 | return op_func(self) 355 | 356 | 357 | class Filterable: 358 | """A class with filtering and counting methods. 359 | 360 | Subclasses must implement the filter method, which specifies how to 361 | construct a new instance containing only those elements that satisfy 362 | a given criterion. 363 | """ 364 | 365 | def filter_eq(self, value): 366 | """Get all elements equal to a particular value. 367 | 368 | Args: 369 | value: A value of the same type as the elements in the object. 370 | 371 | Returns: 372 | All of the elements that were equal to value. 373 | """ 374 | return self.filter(lambda x: x == value) 375 | 376 | def filter_neq(self, value): 377 | """Get all elements _not_ equal to a particular value. 378 | 379 | Args: 380 | value: A value of the same type as the elements in the object. 381 | 382 | Returns: 383 | All of the elements that were _not_ equal to value. 384 | """ 385 | return self.filter(lambda x: x != value) 386 | 387 | def filter_lt(self, value): 388 | """Get all elements less than a particular value. 389 | 390 | N.B. lt stands for "less than". For elements that are 391 | less than _or equal to_ the given value, use .filter_leq(value). 392 | 393 | Args: 394 | value: A value of the same type as the elements in the object. 395 | 396 | Returns: 397 | All of the elements that were less than value. 398 | """ 399 | return self.filter(lambda x: x < value) 400 | 401 | def filter_leq(self, value): 402 | """Get all elements less than or equal to a particular value. 403 | 404 | N.B. leq stands for "less than or equal to". For elements 405 | that are strictly less than the given value, use .filter_lt(value). 406 | 407 | Args: 408 | value: A value of the same type as the elements in the object. 409 | 410 | Returns: 411 | All of the elements that were less than _or equal to_ value. 412 | """ 413 | return self.filter(lambda x: x <= value) 414 | 415 | def filter_gt(self, value): 416 | """Get all elements greater than a particular value. 417 | 418 | N.B. gt stands for "greater than". For elements that are 419 | greater than _or equal to_ the given value, use .filter_geq(value). 420 | 421 | Args: 422 | value: A value of the same type as the elements in the object. 423 | 424 | Returns: 425 | All of the elements that were greater than value. 426 | """ 427 | 428 | return self.filter(lambda x: x > value) 429 | 430 | def filter_geq(self, value): 431 | """Get all elements greater than or equal to a particular value. 432 | 433 | N.B. geq stands for "greater than or equal to". For elements 434 | that are strictly greater than the given value, use .filter_gt(value). 435 | 436 | Args: 437 | value: A value of the same type as the elements in the object. 438 | 439 | Returns: 440 | All of the elements that were greater than _or equal to_ value. 441 | """ 442 | return self.filter(lambda x: x >= value) 443 | 444 | 445 | # The following functions return an integer indicating 446 | # how many elements passed a given criterion. 447 | 448 | def count(self, func=lambda x: True): 449 | """Counts the number of elements satisfying a given criterion. 450 | 451 | Args: 452 | func (element -> bool): A function that takes in an element 453 | and returns a boolean (True/False). Only those elements 454 | that return True will be counted. 455 | 456 | Returns: 457 | int: The number of elements e for which func(e) is True. 458 | """ 459 | return len(self.filter(func)) 460 | 461 | def count_eq(self, value): 462 | """Count the number of elements equal to a particular value. 463 | 464 | Args: 465 | value: A value of the same type as the elements in the object. 466 | 467 | Returns: 468 | int: The number of elements that were equal to value. 469 | """ 470 | return len(self.filter_eq(value)) 471 | 472 | def count_neq(self, value): 473 | """Count the number of elements _not_ equal to a particular value. 474 | 475 | Args: 476 | value: A value of the same type as the elements in the object. 477 | 478 | Returns: 479 | int: The number of elements that were not equal to value. 480 | """ 481 | return len(self.filter_neq(value)) 482 | 483 | def count_lt(self, value): 484 | """Count the number of elements less than a particular value. 485 | 486 | N.B. lt stands for "greater than". For the number of elements 487 | that are less than _or equal to_ the given value, use 488 | .count_leq(value). 489 | 490 | Args: 491 | value: A value of the same type as the elements in the object. 492 | 493 | Returns: 494 | int: The number of elements that were less than value. 495 | """ 496 | return len(self.filter_lt(value)) 497 | 498 | def count_leq(self, value): 499 | """Count the number of elements less than or equal to a particular value. 500 | 501 | N.B. leq stands for "less than or equal to". For the number of 502 | elements that are strictly greater than the given value, use 503 | .count_lt(value). 504 | 505 | Args: 506 | value: A value of the same type as the elements in the object. 507 | 508 | Returns: 509 | int: The number of elements that were less than _or equal to_ value. 510 | """ 511 | return len(self.filter_leq(value)) 512 | 513 | def count_gt(self, value): 514 | """Count the number of elements greater than a particular value. 515 | 516 | N.B. gt stands for "greater than". For the number of elements 517 | that are greater than _or equal to_ the given value, use 518 | .count_geq(value). 519 | 520 | Args: 521 | value: A value of the same type as the elements in the object. 522 | 523 | Returns: 524 | int: The number of elements that were greater than value. 525 | """ 526 | return len(self.filter_gt(value)) 527 | 528 | def count_geq(self, value): 529 | """Count the number of elements greater than or equal to a particular value. 530 | 531 | N.B. geq stands for "greater than or equal to". For the number of 532 | elements that are strictly greater than the given value, use 533 | .count_gt(value). 534 | 535 | Args: 536 | value: A value of the same type as the elements in the object. 537 | 538 | Returns: 539 | int: The number of elements that were greater than _or equal to_ value. 540 | """ 541 | return len(self.filter_geq(value)) 542 | 543 | 544 | class Transformable: 545 | """A class that supports transformations. 546 | 547 | Subclasses must implement the apply method, which specifies how to 548 | apply a function to the object. 549 | """ 550 | 551 | def __abs__(self): 552 | return self.apply(abs) 553 | 554 | def __round__(self): 555 | return self.apply(round) 556 | 557 | def __floor__(self): 558 | return self.apply(math.floor) 559 | 560 | def __ceil__(self): 561 | return self.apply(math.ceil) 562 | 563 | 564 | -------------------------------------------------------------------------------- /symbulate/gaussian_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .index_sets import ( 4 | DiscreteTimeSequence, 5 | Reals 6 | ) 7 | from .probability_space import ProbabilitySpace 8 | from .result import ( 9 | DiscreteTimeFunction, 10 | ContinuousTimeFunction, 11 | Vector, 12 | is_number, 13 | is_numeric_vector 14 | ) 15 | from .random_variables import RV 16 | from .random_processes import RandomProcess 17 | 18 | MACHINE_EPS = 1e-12 19 | 20 | 21 | def get_gaussian_process_result(mean_func, cov_func, index_set=Reals()): 22 | 23 | # Determine whether the process is discrete-time or continous-time 24 | if isinstance(index_set, DiscreteTimeSequence): 25 | base_class = DiscreteTimeFunction 26 | elif isinstance(index_set, Reals): 27 | base_class = ContinuousTimeFunction 28 | else: 29 | raise Exception( 30 | "Index set for Gaussian process must be Reals or " 31 | "DiscreteTimeSequence." 32 | ) 33 | 34 | class GaussianProcessResult(base_class): 35 | 36 | def __init__(self, mean_func, cov_func): 37 | 38 | self.mean = np.empty(shape=0) 39 | self.cov = np.empty(shape=(0, 0)) 40 | self.observed = {} 41 | 42 | def _vfunc(ts): 43 | # This function assumes that t is an array of times. 44 | ts = list(ts) 45 | 46 | # Get current times 47 | times = list(self.observed.keys()) 48 | 49 | # If this is a discrete process, t will be an index. 50 | # Convert it to a time. 51 | if isinstance(index_set, DiscreteTimeSequence): 52 | ts = [t / index_set.fs for t in ts] 53 | 54 | # Check that every t is in the index set 55 | for t in ts: 56 | if t not in index_set: 57 | raise KeyError( 58 | "Gaussian process is not defined at time %.2f." % t0 59 | ) 60 | 61 | # Create an object to store the results 62 | n = len(ts) 63 | values = np.empty(shape=n) 64 | values[:] = np.nan 65 | 66 | # Handle times that have already been calculated, 67 | # as well as times where the variance is 0 68 | i_delete = [] 69 | for i, t in enumerate(ts): 70 | if cov_func(t, t) == 0: 71 | values[i] = mean_func(t) 72 | i_delete.append(i) 73 | elif t in self.observed: 74 | values[i] = self.observed[t] 75 | i_delete.append(i) 76 | ts = [t for i, t in enumerate(ts) if i not in i_delete] 77 | if not ts: 78 | return values 79 | 80 | # Simulate values for the remaining times 81 | mean2 = np.array([mean_func(t) for t in ts]) 82 | cov11 = self.cov + MACHINE_EPS * np.identity(len(times)) 83 | cov12 = np.empty(shape=(len(times), len(ts))) 84 | for i, s in enumerate(times): 85 | for j, t in enumerate(ts): 86 | cov12[i, j] = cov_func(s, t) 87 | cov22 = np.empty(shape=(len(ts), len(ts))) 88 | for i, s in enumerate(ts): 89 | for j, t in enumerate(ts): 90 | cov22[i, j] = cov_func(s, t) 91 | 92 | cond_mean = (mean2 + ( 93 | cov12.T @ 94 | np.linalg.solve(cov11, list(self.observed.values()) - self.mean) 95 | )) 96 | cond_var = (cov22 - ( 97 | cov12.T @ 98 | np.linalg.solve(cov11, cov12) 99 | )) 100 | 101 | # update mean vector and covariance matrix 102 | self.mean = np.append(self.mean, mean2) 103 | self.cov = np.block([[cov11, cov12], [cov12.T, cov22]]) 104 | 105 | # simulate normal with given mean and variance 106 | new_values = np.random.multivariate_normal(cond_mean, cond_var) 107 | 108 | # store the new values 109 | for t, v in zip(ts, new_values): 110 | self.observed[t] = v 111 | values[np.isnan(values)] = new_values 112 | 113 | return values 114 | 115 | self.vfunc = _vfunc 116 | 117 | def _func(t): 118 | return _vfunc([t])[0] 119 | 120 | super().__init__(func=_func) 121 | self.index_set = index_set 122 | 123 | return GaussianProcessResult(mean_func, cov_func) 124 | 125 | 126 | class GaussianProcessProbabilitySpace(ProbabilitySpace): 127 | 128 | def __init__(self, mean_func, cov_func, index_set=Reals()): 129 | """Initialize probability space for a Gaussian process. 130 | 131 | Args: 132 | mean_func: mean function (function of one argument) 133 | cov_func: (auto)covariance function (function of two arguments) 134 | index_set: index set for the Gaussian process 135 | (by default, all real numbers) 136 | """ 137 | 138 | def draw(): 139 | return get_gaussian_process_result( 140 | mean_func, 141 | cov_func, 142 | index_set) 143 | 144 | super().__init__(draw) 145 | 146 | 147 | class GaussianProcess(RandomProcess, RV): 148 | 149 | def __init__(self, mean_func, cov_func, index_set=Reals()): 150 | """Initialize Gaussian process. 151 | 152 | Args: 153 | mean_func: mean function (function of one argument) 154 | cov_func: (auto)covariance function (function of two arguments) 155 | index_set: index set for the Gaussian process 156 | (by default, all real numbers) 157 | """ 158 | 159 | prob_space = GaussianProcessProbabilitySpace(mean_func, 160 | cov_func, 161 | index_set) 162 | RandomProcess.__init__(self, prob_space) 163 | RV.__init__(self, prob_space) 164 | 165 | 166 | # Define convenience class for Brownian motion 167 | class BrownianMotionProbabilitySpace(GaussianProcessProbabilitySpace): 168 | 169 | def __init__(self, drift=0, scale=1): 170 | """Initialize probability space for Brownian motion. 171 | 172 | Args: 173 | drift: drift parameter of Brownian motion 174 | scale: scale parameter of Brownian motion 175 | """ 176 | super().__init__( 177 | mean_func=lambda t: drift * t, 178 | cov_func=lambda s, t: (scale ** 2) * min(s, t) 179 | ) 180 | 181 | 182 | class BrownianMotion(RandomProcess, RV): 183 | 184 | def __init__(self, drift=0, scale=1): 185 | """Initialize Brownian motion. 186 | 187 | Args: 188 | drift: drift parameter of Brownian motion 189 | scale: scale parameter of Brownian motion 190 | """ 191 | prob_space = BrownianMotionProbabilitySpace( 192 | drift=drift, scale=scale 193 | ) 194 | RandomProcess.__init__(self, prob_space) 195 | RV.__init__(self, prob_space) 196 | -------------------------------------------------------------------------------- /symbulate/independence.py: -------------------------------------------------------------------------------- 1 | from .probability_space import ProbabilitySpace 2 | from .random_variables import RV 3 | 4 | def AssumeIndependent(*args): 5 | """Make RVs independent. 6 | 7 | Args: 8 | *args: Any number of RVs 9 | 10 | Returns: 11 | RVs with the same marginal distributions 12 | as the inputs, but defined on a common 13 | probability space so as to be independent. 14 | """ 15 | 16 | # Check that none of the RVs are defined on 17 | # the same probability space. 18 | for i in range(len(args)): 19 | if not isinstance(args[i], RV): 20 | raise Exception( 21 | "AssumeIndependent(...) can only be " 22 | "used with RVs, but you passed in a " 23 | "%s." % type(args[i]).__name__) 24 | for j in range(i + 1, len(args)): 25 | if args[i].prob_space == args[j].prob_space: 26 | raise Exception( 27 | "AssumeIndependent(...) can only be " 28 | "called on RVs that are initially " 29 | "defined on different probability " 30 | "spaces." 31 | ) 32 | 33 | def draw(): 34 | outcome = [] 35 | for arg in args: 36 | outcome.append(arg.prob_space.draw()) 37 | return outcome 38 | P = ProbabilitySpace(draw) 39 | 40 | outputs = [] 41 | for i, arg in enumerate(args): 42 | # i=i forces Python to bind i now 43 | def _func(x, func=arg.func, i=i): 44 | return func(x[i]) 45 | outputs.append(RV(P, _func)) 46 | 47 | return tuple(outputs) 48 | -------------------------------------------------------------------------------- /symbulate/index_sets.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | 3 | 4 | class IndexSet(object): 5 | 6 | def __init__(self): 7 | return 8 | 9 | def __getitem__(self, t): 10 | if t in self: 11 | return t 12 | else: 13 | raise KeyError("Time %.2f not in index set." % t) 14 | 15 | def __contains__(self, value): 16 | return False 17 | 18 | def __eq__(self, other): 19 | return type(other) == type(self) 20 | 21 | 22 | class Reals(IndexSet): 23 | 24 | def __init__(self): 25 | return 26 | 27 | def __contains__(self, value): 28 | try: 29 | return -float("inf") < value < float("inf") 30 | except: 31 | return False 32 | 33 | 34 | class Naturals(IndexSet): 35 | 36 | def __init__(self): 37 | return 38 | 39 | def __contains__(self, value): 40 | try: 41 | return ( 42 | value >= 0 and 43 | (isinstance(value, numbers.Integral) or 44 | value.is_integer()) 45 | ) 46 | except: 47 | return False 48 | 49 | 50 | class DiscreteTimeSequence(IndexSet): 51 | 52 | def __init__(self, fs): 53 | self.fs = fs 54 | 55 | def __getitem__(self, n): 56 | return n / self.fs 57 | 58 | def __contains__(self, value): 59 | return float(value * self.fs).is_integer() 60 | 61 | def __eq__(self, index): 62 | return ( 63 | isinstance(index, DiscreteTimeSequence) and 64 | (self.fs == index.fs) 65 | ) 66 | 67 | class Integers(DiscreteTimeSequence): 68 | 69 | def __init__(self): 70 | self.fs = 1 71 | -------------------------------------------------------------------------------- /symbulate/markov_chains.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .distributions import Exponential 4 | from .math import inf 5 | from .probability_space import ProbabilitySpace 6 | from .random_variables import RV 7 | from .result import ( 8 | InfiniteVector, ContinuousTimeFunction, DiscreteValued 9 | ) 10 | 11 | EPS = 1e-15 12 | 13 | 14 | class MarkovChainResult(InfiniteVector, DiscreteValued): 15 | 16 | def __init__(self, transition_matrix, initial_dist, state_labels=None): 17 | # Check transition matrix 18 | for row in transition_matrix: 19 | if abs(sum(row) - 1) > EPS: 20 | raise Exception("Rows of a transition matrix must sum to 1.") 21 | for p in row: 22 | if p < 0: 23 | raise Exception("Probabilities cannot be negative.") 24 | # Check that dimensions agree 25 | self.transition_matrix = np.asarray(transition_matrix) 26 | m, n = self.transition_matrix.shape 27 | if m != n: 28 | raise Exception("Transition matrix must be square.") 29 | if len(initial_dist) != n: 30 | raise Exception("Initial distribution must be a vector whose " 31 | "length matches the dimensions of the " 32 | "transition matrix.") 33 | self.initial_dist = initial_dist 34 | # Process state labels 35 | if state_labels is not None: 36 | if len(state_labels) != n: 37 | raise Exception("There must be as many state labels as " 38 | "there are states.") 39 | self.state_labels = state_labels 40 | else: 41 | self.state_labels = range(n) 42 | self.n_states = n 43 | 44 | # Generate initial state. 45 | # (self.states stores the indexes of the states, while 46 | # self.values stores the labels of the states.) 47 | state = np.random.choice(range(n), p=self.initial_dist) 48 | self.states = [state] 49 | 50 | def _func(n): 51 | m = len(self.states) 52 | # If nth state not generated yet, generate it. 53 | if n >= m: 54 | state = self.states[m - 1] 55 | for _ in range(m, n + 1): 56 | state = np.random.choice( 57 | range(self.n_states), 58 | p=self.transition_matrix[state, :] 59 | ) 60 | self.states.append(state) 61 | else: 62 | state = self.states[n] 63 | return self.state_labels[state] 64 | 65 | super().__init__(_func) 66 | 67 | def get_states(self): 68 | return self 69 | 70 | 71 | class MarkovChainProbabilitySpace(ProbabilitySpace): 72 | 73 | def __init__(self, transition_matrix, initial_dist, state_labels=None): 74 | """Initialize probability space for a (discrete-time) Markov chain. 75 | 76 | Args: 77 | transition_matrix: n x n transition matrix 78 | initial_dist: length n vector of the initial distribution 79 | state_labels: length n vector of the labels of each state 80 | (defaults to 0, 1, ..., n-1) 81 | """ 82 | 83 | def _draw(): 84 | return MarkovChainResult(transition_matrix, 85 | initial_dist, 86 | state_labels) 87 | 88 | super().__init__(_draw) 89 | 90 | 91 | class MarkovChain(RV): 92 | 93 | def __init__(self, transition_matrix, initial_dist, state_labels=None): 94 | """Initialize a (discrete-time) Markov chain. 95 | 96 | Args: 97 | transition_matrix: n x n transition matrix 98 | initial_dist: length n vector of the initial distribution 99 | state_labels: length n vector of the labels of each state 100 | (defaults to 0, 1, ..., n-1) 101 | """ 102 | 103 | prob_space = MarkovChainProbabilitySpace( 104 | transition_matrix, 105 | initial_dist, 106 | state_labels) 107 | super().__init__(prob_space) 108 | 109 | 110 | class ContinuousTimeMarkovChainResult(ContinuousTimeFunction, 111 | DiscreteValued): 112 | 113 | def __init__(self, states, rates, 114 | unscaled_interarrival_times, 115 | state_labels): 116 | self.states = states 117 | self.rates = rates 118 | self.times = unscaled_interarrival_times 119 | self.state_labels = state_labels 120 | 121 | # Define an InfiniteVector of the interarrival times. 122 | def interarrival_times(n): 123 | for i in range(n + 1): 124 | state = self.states[i] 125 | interarrival_time = self.times[i] / self.rates[state] 126 | return interarrival_time 127 | self.interarrival_times = InfiniteVector(interarrival_times) 128 | 129 | def _func(t): 130 | total_time = 0 131 | n = 0 132 | while True: 133 | state = self.states[n] 134 | total_time += self.times[n] / self.rates[state] 135 | if total_time > t: 136 | return self.state_labels[state] 137 | n += 1 138 | super().__init__(_func) 139 | 140 | 141 | class ContinuousTimeMarkovChainProbabilitySpace(ProbabilitySpace): 142 | 143 | def __init__(self, generator_matrix, initial_dist, state_labels=None): 144 | """Initialize a probability space for a continuous-time Markov chain. 145 | 146 | Args: 147 | generator_matrix: n x n generator matrix whose rows sum to 0 148 | initial_dist: length n vector of the initial distribution 149 | state_labels: length n vector of the labels of each state 150 | (defaults to 0, 1, ..., n-1) 151 | """ 152 | 153 | # Check generator matrix 154 | for i, row in enumerate(generator_matrix): 155 | if abs(sum(row)) > EPS: 156 | raise Exception("Rows of a generator matrix must sum to 0.") 157 | for j, q in enumerate(row): 158 | if j == i: 159 | if q > 0: 160 | raise Exception("Diagonal elements of a generator matrix " + 161 | "cannot be positive.") 162 | else: 163 | if q < 0: 164 | raise Exception("Off-diagonal elements of a generator matrix " + 165 | "cannot be negative.") 166 | # Check that dimensions agree 167 | self.generator_matrix = np.array(generator_matrix) 168 | m, n = self.generator_matrix.shape 169 | if m != n: 170 | raise Exception("Transition matrix must be square.") 171 | if len(initial_dist) != n: 172 | raise Exception("Initial distribution must be a vector whose " 173 | "length matches the dimensions of the " 174 | "transition matrix.") 175 | self.initial_dist = initial_dist 176 | # Process state labels 177 | if state_labels is not None: 178 | if len(state_labels) != n: 179 | raise Exception("There must be as many state labels as " 180 | "there are states.") 181 | self.state_labels = state_labels 182 | else: 183 | self.state_labels = range(n) 184 | self.n_states = n 185 | 186 | # determine transition matrix 187 | transition_matrix = [] 188 | for i, row in enumerate(self.generator_matrix): 189 | rate = -row[i] 190 | transition_matrix.append( 191 | [p / rate if j != i else 0 for j, p in enumerate(row)] 192 | ) 193 | self.transition_matrix = np.array(transition_matrix) 194 | 195 | # A continuous-time Markov chain is specified by the 196 | # sequence of states and the unscaled interarrival times. 197 | def _draw(): 198 | states = MarkovChain(self.transition_matrix, 199 | self.initial_dist).draw() 200 | rates = -np.diag(self.generator_matrix) 201 | unscaled_interarrival_times = (Exponential(1) ** inf).draw() 202 | return ContinuousTimeMarkovChainResult( 203 | states, 204 | rates, 205 | unscaled_interarrival_times, 206 | self.state_labels) 207 | 208 | super().__init__(_draw) 209 | 210 | 211 | class ContinuousTimeMarkovChain(RV): 212 | 213 | def __init__(self, generator_matrix, initial_dist, state_labels=None): 214 | """Initialize a continuous-time Markov chain. 215 | 216 | Args: 217 | generator_matrix: n x n generator matrix whose rows sum to 0 218 | initial_dist: length n vector of the initial distribution 219 | state_labels: length n vector of the labels of each state 220 | (defaults to 0, 1, ..., n-1) 221 | """ 222 | 223 | prob_space = ContinuousTimeMarkovChainProbabilitySpace( 224 | generator_matrix, 225 | initial_dist, 226 | state_labels) 227 | super().__init__(prob_space) 228 | -------------------------------------------------------------------------------- /symbulate/math.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numbers 3 | import operator as op 4 | 5 | import numpy as np 6 | import scipy.stats as stats 7 | 8 | from .random_variables import RV 9 | from .result import ( 10 | Tuple, 11 | TimeFunction, 12 | ContinuousTimeFunction, 13 | DiscreteValued 14 | ) 15 | from .results import Results 16 | 17 | pi = math.pi 18 | e = math.e 19 | inf = float("inf") 20 | 21 | floor = math.floor 22 | ceil = math.ceil 23 | 24 | def operation_factory(operation): 25 | 26 | def _op_func(x): 27 | if isinstance(x, (RV, Tuple, TimeFunction)): 28 | # recursively call op_fun until x is a scalar 29 | return x.apply(_op_func) 30 | elif isinstance(x, Results): 31 | return x.apply(_op_func) 32 | else: 33 | return operation(x) 34 | 35 | return _op_func 36 | 37 | sqrt = operation_factory(math.sqrt) 38 | exp = operation_factory(math.exp) 39 | sin = operation_factory(math.sin) 40 | cos = operation_factory(math.cos) 41 | tan = operation_factory(math.tan) 42 | factorial = operation_factory(math.factorial) 43 | 44 | def log(value, base=e): 45 | return operation_factory(lambda x: math.log(x, base))(value) 46 | 47 | def mean(x): 48 | if isinstance(x, numbers.Real): 49 | raise Exception("Taking the mean with one value is unnecessary.") 50 | else: 51 | return sum(x) / len(x) 52 | 53 | def cumsum(x): 54 | return x.cumsum() 55 | 56 | def var(x): 57 | return mean([(i - mean(x)) ** 2 for i in x]) 58 | 59 | def sd(x): 60 | return math.sqrt(var(x)) 61 | 62 | def median(x): 63 | if isinstance(x, numbers.Real): 64 | raise Exception("Taking the median of one value is unnecessary.") 65 | else: 66 | return np.median(x) 67 | 68 | def min_max_diff(x): 69 | if isinstance(x, numbers.Real): 70 | raise Exception("Taking the range of one value is unnecessary.") 71 | else: 72 | return max(x) - min(x) 73 | 74 | def med_abs_dev(x): 75 | return median(list(abs(i-median(x)) for i in x)) 76 | 77 | def quantile(q): 78 | return lambda x: np.percentile(x, q * 100) 79 | 80 | def iqr(x): 81 | if isinstance(x, numbers.Real): 82 | raise Exception("Taking the iqr of one value is unnecessary.") 83 | else: 84 | q75, q25 = np.percentile(x, [75, 25]) 85 | return q75 - q25 86 | 87 | def orderstatistics(n): 88 | if n <= 0: 89 | raise Exception("Out of bounds. Lowest order is 1.") 90 | else: 91 | return lambda x: np.partition(x, n - 1)[n - 1] 92 | 93 | def skewness(x): 94 | if isinstance(x, numbers.Real): 95 | raise Exception("Finding the skenewss of one value is unnecessary,") 96 | else: 97 | return stats.skew(x) 98 | 99 | def kurtosis(x): 100 | if isinstance(x, numbers.Real): 101 | raise Exception("Finding the kurtosis of one value is unnecessary.") 102 | else: 103 | return stats.kurtosis(x) 104 | 105 | def moment(k): 106 | return lambda x: stats.moment(x, k) 107 | 108 | def trimmed_mean(alpha): 109 | return lambda x: stats.trim_mean(x, alpha) 110 | 111 | def comparefun(x, compare, value): 112 | count = 0 113 | for i in x: 114 | if compare(i, value): 115 | count += 1 116 | return count 117 | 118 | def count(func=lambda x: True): 119 | def _func(x): 120 | val = 0 121 | for i in x: 122 | if func(i): 123 | val += 1 124 | return val 125 | return _func 126 | 127 | def count_eq(value): 128 | def func(x): 129 | return comparefun(x, op.eq, value) 130 | return func 131 | 132 | def count_neq(value): 133 | def func(x): 134 | return comparefun(x, op.ne, value) 135 | return func 136 | 137 | def count_lt(value): 138 | def func(x): 139 | return comparefun(x, op.lt, value) 140 | return func 141 | 142 | def count_gt(value): 143 | def func(x): 144 | return comparefun(x, op.gt, value) 145 | return func 146 | 147 | def count_geq(value): 148 | def func(x): 149 | return comparefun(x, op.ge, value) 150 | return func 151 | 152 | def count_leq(value): 153 | def func(x): 154 | return comparefun(x, op.le, value) 155 | return func 156 | 157 | def interarrival_times(continuous_time_function): 158 | """Given a realization of a continuous-time, 159 | discrete-state process, returns the interarrival 160 | times (i.e., the times between each state change). 161 | 162 | Args: 163 | continuous_time_function: A ContinuousTimeFunction 164 | object, such as ContinuousTimeMarkovChainResult or 165 | PoissonProcessResult. 166 | """ 167 | if not (isinstance(continuous_time_function, 168 | ContinuousTimeFunction) and 169 | isinstance(continuous_time_function, 170 | DiscreteValued)): 171 | raise TypeError( 172 | "Interarrival times are only defined for " 173 | "continuous-time, discrete-valued functions." 174 | ) 175 | return continuous_time_function.get_interarrival_times() 176 | 177 | def arrival_times(continuous_time_function): 178 | """Given a realization of a continuous-time, 179 | discrete-state process, returns the arrival 180 | times (i.e., the times when the state changes). 181 | 182 | Args: 183 | continuous_time_function: A ContinuousTimeFunction 184 | object, such as ContinuousTimeMarkovChainResult or 185 | PoissonProcessResult. 186 | """ 187 | if not (isinstance(continuous_time_function, 188 | ContinuousTimeFunction) and 189 | isinstance(continuous_time_function, 190 | DiscreteValued)): 191 | raise TypeError( 192 | "Interarrival times are only defined for " 193 | "continuous-time, discrete-valued functions." 194 | ) 195 | return continuous_time_function.get_arrival_times() 196 | 197 | def states(discrete_valued_function): 198 | """Given a realization of a discrete-valued function, 199 | returns an InfiniteVector of the sequence of 200 | values (or states). 201 | 202 | Args: 203 | discrete_valued_function: A DiscreteValued object 204 | (e.g., MarkovChainResult or 205 | PoissonProcessResult) 206 | """ 207 | if not isinstance(discrete_valued_function, DiscreteValued): 208 | raise TypeError( 209 | "States are only defined for discrete-valued " 210 | "functions." 211 | ) 212 | return discrete_valued_function.get_states() 213 | -------------------------------------------------------------------------------- /symbulate/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.colors as colors 3 | import matplotlib.pyplot as plt 4 | from scipy.stats import gaussian_kde 5 | from cycler import cycler 6 | 7 | figure = plt.figure 8 | 9 | xlabel = plt.xlabel 10 | ylabel = plt.ylabel 11 | 12 | xlim = plt.xlim 13 | ylim = plt.ylim 14 | 15 | def init_color(): 16 | hex_list = [colors.rgb2hex(rgb) for rgb in plt.cm.get_cmap('tab10').colors] 17 | plt.rcParams["axes.prop_cycle"] = cycler('color', hex_list) 18 | 19 | def get_next_color(axes): 20 | color_cycle = axes._get_lines.prop_cycler 21 | color = next(color_cycle)["color"] 22 | return color 23 | 24 | def configure_axes(axes, xdata, ydata, xlabel = None, ylabel = None): 25 | # Create 5% buffer on either end of plot so that leftmost and rightmost 26 | # lines are visible. However, if current axes are already bigger, 27 | # keep current axes. 28 | buff = .05 * (max(xdata) - min(xdata)) 29 | xmin, xmax = axes.get_xlim() 30 | xmin = min(xmin, min(xdata) - buff) 31 | xmax = max(xmax, max(xdata) + buff) 32 | plt.xlim(xmin, xmax) 33 | 34 | _, ymax = axes.get_ylim() 35 | ymax = max(ymax, 1.05 * max(ydata)) 36 | plt.ylim(0, ymax) 37 | 38 | if xlabel is not None: 39 | plt.xlabel(xlabel) 40 | if ylabel is not None: 41 | plt.ylabel(ylabel) 42 | 43 | def plot(*args, **kwargs): 44 | try: 45 | args[0].plot(**kwargs) 46 | except: 47 | plt.plot(*args, **kwargs) 48 | 49 | def is_discrete(heights): 50 | return sum([(i > 1) for i in heights]) > .8 * len(heights) 51 | 52 | def count_var(x): 53 | counts = {} 54 | for val in x: 55 | if val in counts: 56 | counts[val] += 1 57 | else: 58 | counts[val] = 1 59 | return counts 60 | 61 | def compute_density(values): 62 | density = gaussian_kde(values) 63 | density.covariance_factor = lambda: 0.25 64 | density._compute_covariance() 65 | return density 66 | 67 | def setup_ticks(pos, lab, ax): 68 | ax.set_ticks(pos) 69 | ax.set_ticklabels(lab) 70 | 71 | def add_colorbar(fig, type, mappable, label): 72 | #create axis for cbar to place on left 73 | if 'marginal' not in type: 74 | caxes = fig.add_axes([0, 0.1, 0.05, 0.8]) 75 | else: #adjust height if marginals 76 | caxes = fig.add_axes([0, 0.1, 0.05, 0.57]) 77 | cbar = plt.colorbar(mappable=mappable, cax=caxes) 78 | caxes.yaxis.set_ticks_position('left') 79 | cbar.set_label(label) 80 | caxes.yaxis.set_label_position('left') 81 | return caxes 82 | 83 | def setup_tile(v, bins, discrete): 84 | if not discrete: 85 | v_lab = np.linspace(min(v), max(v), bins + 1) 86 | v_pos = np.arange(0, len(v_lab)) - 0.5 87 | v_vect = np.digitize(v, v_lab, right=True) - 1 88 | else: 89 | v_lab = np.unique(v) #returns sorted array 90 | v_pos = range(len(v_lab)) 91 | v_map = dict(zip(v_lab, v_pos)) 92 | v_vect = np.vectorize(v_map.get)(v) 93 | return v_vect, v_lab, v_pos 94 | 95 | def make_tile(x, y, bins, discrete_x, discrete_y, ax): 96 | x_vect, x_lab, x_pos = setup_tile(x, bins, discrete_x) 97 | y_vect, y_lab, y_pos = setup_tile(y, bins, discrete_y) 98 | nums = len(x_vect) 99 | counts = count_var(list(zip(y_vect, x_vect))) 100 | y_shape = len(y_lab) if discrete_y else len(y_lab) - 1 101 | x_shape = len(x_lab) if discrete_x else len(x_lab) - 1 102 | intensity = np.zeros(shape=(y_shape, x_shape)) 103 | 104 | for key, val in counts.items(): 105 | intensity[key] = val / nums 106 | if not discrete_x: x_lab = np.around(x_lab, decimals=1) 107 | if not discrete_y: y_lab = np.around(y_lab, decimals=1) 108 | hm = ax.matshow(intensity, cmap='Blues', origin='lower', aspect='auto', vmin=0) 109 | ax.xaxis.set_ticks_position('bottom') 110 | setup_ticks(x_pos, x_lab, ax.xaxis) 111 | setup_ticks(y_pos, y_lab, ax.yaxis) 112 | return hm 113 | 114 | def make_violin(data, positions, ax, axis, alpha): 115 | values = [] 116 | i, j = (0, 1) if axis == 'x' else (1, 0) 117 | values = [data[data[:, i] == pos, j].tolist() for pos in positions] 118 | violins = ax.violinplot(dataset=values, showmedians=True, 119 | vert=False if axis == 'y' else True) 120 | setup_ticks(np.array(positions) + 1, positions, 121 | ax.xaxis if axis == 'x' else ax.yaxis) 122 | for part in violins['bodies']: 123 | part.set_edgecolor('black') 124 | part.set_alpha(alpha) 125 | for component in ('cbars', 'cmins', 'cmaxes', 'cmedians'): 126 | vp = violins[component] 127 | vp.set_edgecolor('black') 128 | vp.set_linewidth(1) 129 | 130 | def make_marginal_impulse(count, color, ax_marg, alpha, axis): 131 | key, val = list(count.keys()), list(count.values()) 132 | tot = sum(val) 133 | val = [i / tot for i in val] 134 | if axis == 'x': 135 | ax_marg.vlines(key, 0, val, color=color, alpha=alpha) 136 | elif axis == 'y': 137 | ax_marg.hlines(key, 0, val, color=color, alpha=alpha) 138 | 139 | def make_density2D(x, y, ax): 140 | res = np.vstack([x, y]) 141 | density = gaussian_kde(res) 142 | xmax, xmin = max(x), min(x) 143 | ymax, ymin = max(y), min(y) 144 | Xgrid, Ygrid = np.meshgrid(np.linspace(xmin, xmax, 100), 145 | np.linspace(ymin, ymax, 100)) 146 | Z = density.evaluate(np.vstack([Xgrid.ravel(), Ygrid.ravel()])) 147 | den = ax.imshow(Z.reshape(Xgrid.shape), origin='lower', cmap='Blues', 148 | aspect='auto', extent=[xmin, xmax, ymin, ymax] 149 | ) 150 | return den 151 | -------------------------------------------------------------------------------- /symbulate/poisson_process.py: -------------------------------------------------------------------------------- 1 | from .distributions import Exponential 2 | from .math import inf 3 | from .probability_space import ProbabilitySpace 4 | from .result import ( 5 | InfiniteVector, 6 | ContinuousTimeFunction, 7 | DiscreteValued 8 | ) 9 | from .random_variables import RV 10 | from .random_processes import RandomProcess 11 | 12 | 13 | class PoissonProcessResult(ContinuousTimeFunction, 14 | DiscreteValued): 15 | 16 | def __init__(self, interarrival_times): 17 | self.interarrival_times = interarrival_times 18 | 19 | def func(t): 20 | total_time = 0 21 | for n, time in enumerate(self.interarrival_times): 22 | total_time += time 23 | if t < total_time: 24 | return n 25 | 26 | super().__init__(func) 27 | 28 | def get_states(self): 29 | return InfiniteVector(lambda n: n) 30 | 31 | 32 | class PoissonProcessProbabilitySpace(ProbabilitySpace): 33 | 34 | def __init__(self, rate): 35 | """Initialize probability space for a Poisson process. 36 | 37 | Args: 38 | rate: rate of the Poisson process 39 | """ 40 | self.rate = rate 41 | 42 | def draw(): 43 | interarrival_times = (Exponential(rate=self.rate) ** inf).draw() 44 | return PoissonProcessResult(interarrival_times) 45 | 46 | super().__init__(draw) 47 | 48 | 49 | class PoissonProcess(RandomProcess, RV): 50 | 51 | def __init__(self, rate): 52 | """Initialize a Poisson process. 53 | 54 | Args: 55 | rate: rate of the Poisson process 56 | """ 57 | self.rate = rate 58 | prob_space = PoissonProcessProbabilitySpace(self.rate) 59 | RandomProcess.__init__(self, prob_space) 60 | RV.__init__(self, prob_space) 61 | -------------------------------------------------------------------------------- /symbulate/probability_space.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .base import Logical 4 | from .result import Vector, InfiniteVector, join 5 | from .results import Results 6 | 7 | 8 | class ProbabilitySpace: 9 | """Defines a probability space. 10 | 11 | Attributes: 12 | draw (function): A function explaining how to draw one 13 | outcome from the probability space. 14 | """ 15 | 16 | def __init__(self, draw): 17 | self.draw = draw 18 | 19 | def sim(self, n): 20 | """Simulate n draws from probability space. 21 | 22 | Args: 23 | n (int): How many draws to make. 24 | 25 | Returns: 26 | Results: A list-like object containing the simulation results. 27 | """ 28 | return Results(self.draw() for _ in range(n)) 29 | 30 | def check_same(self, other): 31 | if self != other: 32 | raise Exception("Events must be defined on same probability space.") 33 | 34 | def apply(self, func): 35 | """Define a new probability space. 36 | 37 | Args: 38 | func: function to apply to each realization. 39 | 40 | Returns: 41 | A new ProbabilitySpace where each realization is func applied to 42 | a realization from the current probability space. 43 | """ 44 | def draw(): 45 | return func(self.draw()) 46 | return ProbabilitySpace(draw) 47 | 48 | def __mul__(self, other): 49 | def draw(): 50 | return join(self.draw(), other.draw()) 51 | return ProbabilitySpace(draw) 52 | 53 | def __pow__(self, exponent): 54 | if exponent == float("inf"): 55 | def draw(): 56 | def _func(_): 57 | return self.draw() 58 | return InfiniteVector(_func) 59 | else: 60 | def draw(): 61 | return Vector(self.draw() for _ in range(exponent)) 62 | return ProbabilitySpace(draw) 63 | 64 | 65 | class Event(Logical): 66 | 67 | def __init__(self, prob_space, func): 68 | self.prob_space = prob_space 69 | self.func = func 70 | 71 | def check_same_prob_space(self, other): 72 | self.prob_space.check_same(other.prob_space) 73 | 74 | # The Logical superclass will use this to define the three 75 | # logical operations: and (&), or (|), not (~). 76 | def _logical_factory(self, op): 77 | 78 | def _op_func(self, other=None): 79 | # other will be None when op is the "not" operator 80 | if other is None: 81 | return Event(self.prob_space, 82 | lambda outcome: op(self.func(outcome))) 83 | else: 84 | if isinstance(other, Event): 85 | self.check_same_prob_space(other) 86 | else: 87 | raise TypeError( 88 | "Logical operations are only defined " 89 | "between two Events, not between an Event " 90 | "and a %s." % type(other).__name__) 91 | return Event(self.prob_space, 92 | lambda outcome: op(self.func(outcome), 93 | other.func(outcome))) 94 | 95 | return _op_func 96 | 97 | # This prevents users from writing expressions like 2 < X < 5, 98 | # which evaluate to ((2 < X) and (X < 5)). This unfortunately 99 | # is not well-defined in Python and cannot be overloaded. 100 | def __bool__(self): 101 | raise Exception("Cannot cast an Event to a boolean. " 102 | "You may be getting this error if you " 103 | "wrote an expression like (2 < X < 5). " 104 | "Try ((2 < X) & (X < 5)) instead.") 105 | 106 | def draw(self): 107 | return self.func(self.prob_space.draw()) 108 | 109 | def sim(self, n): 110 | return Results(self.draw() for _ in range(n)) 111 | 112 | 113 | class BoxModel(ProbabilitySpace): 114 | """Defines a probability space from a box model. 115 | 116 | Attributes: 117 | box (list-like or dict-like): The box to sample from. 118 | The box can be specified either directly as a list 119 | of objects or indirectly as a dict of objects and 120 | their counts. 121 | size (int): How many draws to make. 122 | replace (bool-like): Sample with replacement or without? 123 | probs (list): Probabilities of sampling each ticket 124 | (by default, all tickets are equally likely). Note 125 | that this is ignored if box is specified as a dict. 126 | order_matters (bool): Should we count different 127 | orderings of the same tickets as different outcomes? 128 | Essentially, this determines whether the draws are 129 | sorted before returning or not. 130 | """ 131 | 132 | def __init__(self, box, size=None, replace=True, probs=None, order_matters=True): 133 | if isinstance(box, list): 134 | self.box = box 135 | self.probs = probs 136 | elif isinstance(box, dict): 137 | self.box = [] 138 | for ticket, count in box.items(): 139 | self.box.extend([ticket] * count) 140 | self.probs = None 141 | else: 142 | raise Exception( 143 | "Box must be specified either as a list or a dict." 144 | ) 145 | self.size = None if size == 1 else size 146 | self.replace = replace 147 | self.order_matters = order_matters 148 | self.output_type = Vector 149 | self.infinite_output_type = InfiniteVector 150 | 151 | # If drawing without replacement, check that the number 152 | # of draws does not exceed the number of tickets in the box. 153 | if not self.replace and self.size > len(self.box): 154 | raise Exception( 155 | "Cannot draw more tickets (without replacement) " 156 | "than there are tickets in the box." 157 | ) 158 | 159 | def draw(self): 160 | """ 161 | A function that takes no arguments and returns a value(s) from the 162 | "box" argument of the BoxModel. 163 | 164 | Based on BoxModel inputs: 165 | Number of values returned depends on the input of the "size" 166 | argument. 167 | Whether or not a value in the box can appear multiple times 168 | depends on the "replace" argument. 169 | If a list of probabilities is specified, values drawn will be drawn 170 | with the specified probabilities. 171 | """ 172 | 173 | def draw_inds(size): 174 | return np.random.choice(len(self.box), size, self.replace, self.probs) 175 | 176 | if self.size is None: 177 | return self.box[draw_inds(None)] 178 | elif self.size == float("inf"): 179 | def _func(_): 180 | return self.box[draw_inds(None)] 181 | return self.infinite_output_type(_func) 182 | else: 183 | draws = [self.box[i] for i in draw_inds(self.size)] 184 | if not self.order_matters: 185 | draws.sort() 186 | return self.output_type(draws) 187 | 188 | 189 | class DeckOfCards(BoxModel): 190 | """Defines the probability space for drawing from a deck of cards. 191 | 192 | Attributes: 193 | size (int): How many draws to make. 194 | replace (bool): Sample with replacement or without? 195 | order_matters (bool): Should we count different orderings of 196 | the same cards as different outcomes or the same outcome? 197 | """ 198 | 199 | def __init__(self, size=None, replace=False, order_matters=True): 200 | box = [] 201 | for rank in list(range(2, 11)) + ["J", "Q", "K", "A"]: 202 | for suit in ["Diamonds", "Hearts", "Clubs", "Spades"]: 203 | box.append((rank, suit)) 204 | super().__init__(box, size, replace, 205 | probs=None, order_matters=order_matters) 206 | -------------------------------------------------------------------------------- /symbulate/random_processes.py: -------------------------------------------------------------------------------- 1 | from .index_sets import Naturals 2 | from .random_variables import RV 3 | from .result import TimeFunction, is_scalar 4 | 5 | 6 | class RandomProcess(RV): 7 | """Defines a random process. 8 | 9 | A random process defines a random variable X(t) 10 | for each time t in an collection of times 11 | (called an index set). 12 | 13 | Args: 14 | prob_space (ProbabilitySpace): the underlying 15 | probability space for the random process. 16 | index_set (IndexSet): the index set for the 17 | random process. (By default, the index set 18 | is the natural numbers 0, 1, 2, 3, ....) 19 | func: a function that takes in an outcome from 20 | the probability space and a time from the 21 | index set and returns the value of the 22 | random process at that time. (By default, 23 | func is the canonical function. That is, 24 | we assume that every outcome x from the 25 | probability space is a function of time and 26 | the value of the process is simply x(t).) 27 | """ 28 | 29 | def __init__(self, prob_space, index_set=Naturals(), 30 | func=lambda outcome, t: outcome[t]): 31 | self.index_set = index_set 32 | # This dict stores random variables at specific times. 33 | self.rvs = {} 34 | 35 | # Define the function for the RV. 36 | def _func(outcome): 37 | def x(t): 38 | # First, check if the time is in self.rvs. 39 | if t in self.rvs: 40 | return self.rvs[t].func(outcome) 41 | return func(outcome, t) 42 | return TimeFunction.from_index_set(self.index_set, x) 43 | 44 | super().__init__(prob_space, _func) 45 | 46 | def __setitem__(self, t, value): 47 | if t not in self.index_set: 48 | raise KeyError( 49 | "Time %s is not in the index set for this " 50 | "random process." % str(t) 51 | ) 52 | # If value is a RV, store it in self.rvs. 53 | if isinstance(value, RV): 54 | self.rvs[t] = value 55 | # If value is a scalar, create and store a constant random variable 56 | elif is_scalar(value): 57 | self.rvs[t] = RV(self.prob_space, lambda outcome: value) 58 | 59 | def __getitem__(self, t): 60 | # First, check if the time is in self.rvs. 61 | if t in self.rvs: 62 | return self.rvs[t] 63 | return super().__getitem__(t) 64 | 65 | def __call__(self, t): 66 | return RV(self.prob_space, lambda outcome: self.func(outcome)(t)) 67 | -------------------------------------------------------------------------------- /symbulate/random_variables.py: -------------------------------------------------------------------------------- 1 | from .base import Arithmetic, Transformable, Comparable 2 | from .probability_space import Event 3 | from .result import Vector, join, is_scalar, is_numeric_vector 4 | from .results import RVResults 5 | 6 | class RV(Arithmetic, Transformable, Comparable): 7 | """Defines a random variable. 8 | 9 | A random variable is a function which maps an outcome of 10 | a probability space to a number. Simulating a random 11 | variable is a two-step process: first, a draw is taken 12 | from the underlying probability space; then, the function 13 | is applied to that draw to obtain the realized value of 14 | the random variable. 15 | 16 | Args: 17 | prob_space (ProbabilitySpace): the underlying probability space 18 | of the random variable. 19 | func (function, optional): a function that maps draws from the 20 | probability space to numbers. (By default, the function is the 21 | identity function. For named distributions, a draw from the 22 | underlying probability space is the value of the random 23 | variable itself, which is why the identity function is the 24 | most frequently used.) 25 | 26 | Attributes: 27 | prob_space (ProbabilitySpace): the underlying probability space 28 | of the random variable. 29 | func (function): a function that maps draws from the probability 30 | space to numbers. 31 | 32 | Examples: 33 | # a single draw is a sequence of 0s and 1s, e.g., (0, 0, 1, 0, 1) 34 | P = BoxModel([0, 1], size=5) 35 | # X counts the number of 1s in the draw, e.g., 5 36 | X = RV(P, sum) 37 | 38 | # the function is the identity, so Y has a Normal(0, 1) distribution 39 | Y = RV(Normal(0, 1)) 40 | 41 | # a single draw from BivariateNormal is a tuple of two numbers 42 | P = BivariateNormal() 43 | # Z is the smaller of the two numbers 44 | Z = RV(P, min) 45 | """ 46 | 47 | def __init__(self, prob_space, func=lambda x: x): 48 | self.prob_space = prob_space 49 | self.func = func 50 | 51 | def draw(self): 52 | """A function that takes no arguments and returns a single 53 | realization of the random variable. 54 | 55 | Example: 56 | X = RV(Normal(0, 1)) 57 | X.draw() might return -0.9, for example. 58 | """ 59 | return self.func(self.prob_space.draw()) 60 | 61 | def sim(self, n): 62 | """Simulate n draws from probability space described by the random 63 | variable. 64 | 65 | Args: 66 | n (int): How many draws to make. 67 | 68 | Returns: 69 | RVResults: A list-like object containing the simulation results. 70 | """ 71 | 72 | return RVResults(self.draw() for _ in range(n)) 73 | 74 | def __call__(self, outcome): 75 | print("Warning: Calling an RV as a function simply applies the " 76 | "function that defines the RV to the input, regardless of " 77 | "whether that input is a possible outcome in the underlying " 78 | "probability space.") 79 | return self.func(outcome) 80 | 81 | def check_same_prob_space(self, other): 82 | if hasattr(other, "prob_space"): 83 | self.prob_space.check_same(other.prob_space) 84 | 85 | def apply(self, func): 86 | """Transform a random variable by a function. 87 | 88 | Args: 89 | func: function to apply to the random variable 90 | 91 | Example: 92 | X = RV(Exponential(1)) 93 | Y = X.apply(log) 94 | 95 | Note: For most standard functions, you can apply the function to 96 | the random variable directly. For example, in the example above, 97 | Y = log(X) would have been equivalent and more readable. 98 | 99 | User defined functions can also be applied. 100 | 101 | Example: 102 | def g(x): 103 | return log(x ** 2) 104 | Y = X.apply(g) 105 | """ 106 | def _func(outcome): 107 | return func(self.func(outcome)) 108 | return RV(self.prob_space, _func) 109 | 110 | # This allows us to unpack a random vector, 111 | # e.g., X, Y = RV(BoxModel([0, 1], size=2)) 112 | def __iter__(self): 113 | test = self.draw() 114 | if hasattr(test, "__iter__"): 115 | for i in range(len(test)): 116 | yield self[i] 117 | else: 118 | raise Exception( 119 | "To unpack a random vector, the RV needs to " 120 | "have multiple components." 121 | ) 122 | 123 | def __getitem__(self, n): 124 | # if n is an RV, return a new random variable 125 | if isinstance(n, RV): 126 | return RV(self.prob_space, 127 | lambda x: self.func(x)[n.func(x)]) 128 | # if the indices are a list, return a random vector 129 | elif is_numeric_vector(n): 130 | return self.apply( 131 | lambda x: Vector(x[i] for i in n) 132 | ) 133 | # if the indices are a slice, return a random vector 134 | elif isinstance(n, slice): 135 | return self.apply( 136 | lambda x: Vector(x[i] for i in 137 | range(n.start, n.stop, n.step or 1)) 138 | ) 139 | # otherwise, return the nth value 140 | return self.apply(lambda x: x[n]) 141 | 142 | # The Arithmetic superclass will use this to define all of the 143 | # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.) 144 | def _operation_factory(self, op): 145 | 146 | def _op_func(self, other): 147 | # operations between this RV and another RV 148 | if isinstance(other, RV): 149 | self.check_same_prob_space(other) 150 | def _func(outcome): 151 | return op(self.func(outcome), other.func(outcome)) 152 | return RV(self.prob_space, _func) 153 | # operations between this RV and a scalar 154 | return self.apply(lambda x: op(x, other)) 155 | 156 | return _op_func 157 | 158 | # The Comparison superclass will use this to define all of the 159 | # usual comparison operations (e.g., <, >, ==, !=, etc.). 160 | # Note that a comparison of a random variable returns an Event. 161 | def _comparison_factory(self, op): 162 | 163 | def _op_func(self, other): 164 | if is_scalar(other): 165 | return Event(self.prob_space, 166 | lambda x: op(self.func(x), other)) 167 | elif isinstance(other, RV): 168 | self.check_same_prob_space(other) 169 | return Event(self.prob_space, 170 | lambda x: op(self.func(x), other.func(x))) 171 | raise NotImplementedError( 172 | "Comparisons are only defined between two RVs or " 173 | "between an RV and a scalar." 174 | ) 175 | 176 | return _op_func 177 | 178 | 179 | # Define a joint distribution of two random variables: e.g., X & Y 180 | def __and__(self, other): 181 | self.check_same_prob_space(other) 182 | if isinstance(other, RV): 183 | def _func(outcome): 184 | return join(self.func(outcome), other.func(outcome)) 185 | elif is_scalar(other): 186 | def _func(outcome): 187 | return join(self.func(outcome), other) 188 | else: 189 | raise Exception("Joint distributions are only defined for RVs.") 190 | return RV(self.prob_space, _func) 191 | 192 | def __rand__(self, other): 193 | self.check_same_prob_space(other) 194 | if is_scalar(other): 195 | def _func(outcome): 196 | return join(other, self.func(outcome)) 197 | return RV(self.prob_space, _func) 198 | 199 | # Define conditional distribution of random variable. 200 | # e.g., X | (X > 3) 201 | def __or__(self, condition_event): 202 | # Check that the random variable and event are 203 | # defined on the same probability space. 204 | self.check_same_prob_space(condition_event) 205 | if isinstance(condition_event, Event): 206 | return RVConditional(self, condition_event) 207 | else: 208 | raise NotImplementedError 209 | 210 | 211 | class RVConditional(RV): 212 | """Defines a random variable conditional on an event. 213 | 214 | RVConditionals are typically produced when you condition a 215 | RV on an Event object. 216 | 217 | Args: 218 | random_variable (RV): the random variable whose conditional 219 | distribution is desired 220 | condition_event (Event): the event to condition on 221 | 222 | Attributes: 223 | random_variable (RV): the random variable whose conditional 224 | distribution is desired 225 | condition_event (Event): the event to condition on 226 | 227 | Examples: 228 | X, Y = RV(Binomial(10, 0.4) ** 2) 229 | (X | (X + Y == 5)).draw() # returns a value between 0 and 5. 230 | """ 231 | 232 | def __init__(self, random_variable, condition_event): 233 | self.condition_event = condition_event 234 | super().__init__(random_variable.prob_space, 235 | random_variable.func) 236 | 237 | def draw(self): 238 | """A function that takes no arguments and returns a value from 239 | the conditional distribution of the random variable. 240 | 241 | Example: 242 | X, Y = RV(Binomial(10, 0.4) ** 2) 243 | (X | (X + Y == 5)).draw() might return a value of 4, for example. 244 | """ 245 | while True: 246 | outcome = self.prob_space.draw() 247 | if self.condition_event.func(outcome): 248 | return self.func(outcome) 249 | -------------------------------------------------------------------------------- /symbulate/result.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | import symbulate 6 | from .base import Arithmetic, Transformable, Statistical, Filterable 7 | from .index_sets import DiscreteTimeSequence, Reals, Naturals 8 | 9 | 10 | class Scalar(numbers.Number): 11 | 12 | def __new__(cls, value, *args, **kwargs): 13 | if isinstance(value, numbers.Integral): 14 | return Int(value) 15 | elif isinstance(value, (float, np.floating)): 16 | return Float(value) 17 | else: 18 | raise Exception("Scalar type not understood.") 19 | 20 | 21 | class Int(int, Scalar): 22 | 23 | def __new__(cls, value, *args, **kwargs): 24 | return super(Int, cls).__new__(cls, value) 25 | 26 | 27 | class Float(float, Scalar): 28 | 29 | def __new__(cls, value, *args, **kwargs): 30 | return super(Float, cls).__new__(cls, value) 31 | 32 | 33 | class Tuple(Arithmetic, Transformable, Statistical, Filterable): 34 | """A collapsible data structure. 35 | """ 36 | 37 | def __init__(self, values): 38 | if is_scalar(values): 39 | self.values = (values, ) 40 | elif hasattr(values, "__len__") or hasattr(values, "__next__"): 41 | self.values = tuple(values) 42 | else: 43 | raise Exception( 44 | "Tuples can only be created from " 45 | "finite iterable data." 46 | ) 47 | 48 | def __getitem__(self, n): 49 | # if n is a numeric array, return a Tuple of those values 50 | if is_numeric_vector(n): 51 | return type(self)(self.values[i] for i in n) 52 | # otherwise, return the value at n 53 | return self.values[n] 54 | 55 | def __len__(self): 56 | return len(self.values) 57 | 58 | def __iter__(self): 59 | for value in self.values: 60 | yield value 61 | 62 | def __hash__(self): 63 | return hash(tuple(self.values)) 64 | 65 | # Define comparison operators to handle sorting. 66 | def __eq__(self, other): 67 | if not hasattr(other, "__len__"): 68 | return False 69 | if len(self) != len(other): 70 | return False 71 | return all(a == b for a, b in zip(self, other)) 72 | 73 | def __lt__(self, other): 74 | return tuple(self.values) < tuple(other.values) 75 | 76 | def apply(self, func): 77 | """Apply function to every element of a Tuple. 78 | 79 | Args: 80 | func: function to apply to the Tuple 81 | 82 | Example: 83 | x = Tuple([1, 2, 3]) 84 | y = x.apply(log) 85 | 86 | Note: For most standard functions, you can apply the function to 87 | the Tuple directly. For example, in the example above, 88 | y = log(x) would have been equivalent and more readable. 89 | 90 | User defined functions can also be applied. 91 | 92 | Example: 93 | def log_squared(n): 94 | return log(n) ** 2 95 | y = x.apply(log_squared) 96 | """ 97 | return type(self)(func(e) for e in self) 98 | 99 | # The Filterable superclass will use this to define all of the 100 | # .filter_*() and .count_*() methods. 101 | def filter(self, filt): 102 | """Get only the elements that satisfy the given criterion. 103 | 104 | Args: 105 | filt: A function that takes in an element and returns 106 | a boolean. 107 | 108 | Returns: 109 | Tuple: Another Tuple containing only those elements e 110 | where filt(e) is True. 111 | """ 112 | return type(self)(e for e in self if filt(e)) 113 | 114 | # The Arithmetic superclass will use this to define all of the 115 | # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). 116 | def _operation_factory(self, op): 117 | 118 | def _op_func(self, other): 119 | if is_number(other): 120 | return type(self)(op(value, other) for value in self) 121 | elif is_vector(other): 122 | # check that other is the same length as the Tuple 123 | if len(self) != len(other): 124 | raise Exception( 125 | "Arithmetic operations between a %s and a %s " 126 | "are only valid if they are the same length. " 127 | "You attempted to combine a %s of length %d " 128 | "with a %s of length %d." % ( 129 | type(self).__name__, 130 | type(other).__name__, 131 | type(self).__name__, 132 | len(self), 133 | type(other).__name__, 134 | len(other) 135 | )) 136 | # return a new Tuple/Vector of the same length 137 | return type(self)(op(a, b) for a, b in zip(self, other)) 138 | else: 139 | return NotImplemented 140 | 141 | return _op_func 142 | 143 | # The Statistical superclass will use this to define all of the 144 | # usual statistical functions (e.g., mean, var, etc.) 145 | def _statistic_factory(self, op): 146 | def _op_func(self): 147 | return op(self.values) 148 | return _op_func 149 | 150 | def cumsum(self): 151 | return type(self)(np.cumsum(self.values)) 152 | 153 | def plot(self, **kwargs): 154 | plt.plot(range(len(self)), self.values, '.--', **kwargs) 155 | 156 | def __str__(self): 157 | if len(self) <= 6: 158 | return "(" + ", ".join(str(x) for x in self) + ")" 159 | else: 160 | first_few = ", ".join(str(x) for x in self[:5]) 161 | last = str(self[-1]) 162 | return "(" + first_few + ", ..., " + last + ")" 163 | 164 | def __repr__(self): 165 | return self.__str__() 166 | 167 | 168 | class Vector(Tuple): 169 | """A data structure like a Tuple, except it does not collapse. 170 | """ 171 | pass 172 | 173 | 174 | class TimeFunction(Arithmetic): 175 | 176 | @classmethod 177 | def from_index_set(cls, index_set, func=None): 178 | if isinstance(index_set, DiscreteTimeSequence): 179 | return DiscreteTimeFunction(func, index_set=index_set) 180 | elif isinstance(index_set, Reals): 181 | return ContinuousTimeFunction(func) 182 | elif isinstance(index_set, Naturals): 183 | return InfiniteVector(func) 184 | 185 | def check_same_index_set(self, other): 186 | if isinstance(other, (numbers.Number, symbulate.RV)): 187 | return 188 | elif isinstance(other, TimeFunction): 189 | if self.index_set != other.index_set: 190 | raise Exception( 191 | "Operations can only be performed on " 192 | "TimeFunctions with the same index set." 193 | ) 194 | else: 195 | raise Exception("Cannot combine %s with %s." % ( 196 | type(self).__name__, type(other).__name__ 197 | )) 198 | 199 | 200 | class InfiniteTuple(TimeFunction): 201 | 202 | def __init__(self, func=lambda n: n): 203 | """Initializes a (lazy) data structure for an infinite vector. 204 | 205 | Args: 206 | func: A function of n that returns the value in position n. 207 | n is assumed to be a natural number (integer >= 0). 208 | This function can be defined at initialization time, 209 | or later. By default, it is not set at initialization. 210 | """ 211 | if func is not None: 212 | self.func = func 213 | self.index_set = Naturals() 214 | self.values = [] 215 | 216 | def __getitem__(self, n): 217 | m = len(self.values) 218 | # Add necessary elements to self.values 219 | n0 = None 220 | # handle the case where n is a slice 221 | if isinstance(n, slice): 222 | if n.stop is None: 223 | if n.start is None: 224 | return self 225 | else: 226 | return type(self)(lambda i: self[i + n.start]) 227 | if n.stop >= m: 228 | n0 = n.stop 229 | elif isinstance(n, numbers.Integral) and n >= m: 230 | n0 = n 231 | if n0 is not None: 232 | for i in range(m, n0 + 1): 233 | self.values.append(self.func(i)) 234 | # Return the corresponding value(s) 235 | return self.values[n] 236 | 237 | def __call__(self, n): 238 | return self[n] 239 | 240 | def __str__(self): 241 | first_few = [str(self[i]) for i in range(6)] 242 | return "(" + ", ".join(first_few) + ", ...)" 243 | 244 | def __repr__(self): 245 | return self.__str__() 246 | 247 | def apply(self, func): 248 | """Apply function to every element of an InfiniteTuple. 249 | 250 | Args: 251 | func: function to apply to the InfiniteTuple 252 | 253 | Example: 254 | x = InfiniteTuple(lambda n: n) 255 | y = x.apply(log) 256 | 257 | Note: For most standard functions, you can apply the function to 258 | the InfiniteTuple directly. For example, in the example above, 259 | y = log(x) would have been equivalent and more readable. 260 | 261 | User defined functions can also be applied. 262 | 263 | Example: 264 | def log_squared(n): 265 | return log(n) ** 2 266 | y = x.apply(log_squared) 267 | """ 268 | return type(self)(lambda n: func(self[n])) 269 | 270 | # The Arithmetic superclass will use this to define all of the 271 | # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). 272 | def _operation_factory(self, op): 273 | 274 | def _op_func(self, other): 275 | self.check_same_index_set(other) 276 | if is_number(other): 277 | return type(self)(lambda n: op(self[n], other)) 278 | elif isinstance(other, InfiniteTuple): 279 | return type(self)(lambda n: op(self[n], other[n])) 280 | else: 281 | return NotImplemented 282 | 283 | return _op_func 284 | 285 | 286 | class InfiniteVector(InfiniteTuple): 287 | 288 | def cumsum(self): 289 | def _func(n): 290 | return sum(self[i] for i in range(n + 1)) 291 | return InfiniteVector(_func) 292 | 293 | def plot(self, tmin=0, tmax=10, **kwargs): 294 | xs = range(tmin, tmax) 295 | ys = [self[t] for t in range(tmin, tmax)] 296 | plt.plot(xs, ys, '.--', **kwargs) 297 | 298 | 299 | class DiscreteTimeFunction(TimeFunction): 300 | 301 | def __init__(self, func=None, fs=1, index_set=None): 302 | """Initializes a data structure for a discrete-time function. 303 | 304 | Args: 305 | func: A function of n that returns the value at time n / fs. 306 | n is assumed to be any integer (postive or negative). 307 | By default, it is set to the identity function f[n] = n / fs. 308 | fs (int): The sampling rate of the function, in Hertz (samples 309 | per second). 310 | index_set (IndexSet): The index set of the discrete-time function 311 | (fs is ignored if this is specified.) 312 | """ 313 | if func is not None: 314 | self.func = func 315 | else: 316 | self.func = lambda n: n / fs 317 | if index_set is None: 318 | self.index_set = DiscreteTimeSequence(fs) 319 | else: 320 | self.index_set = index_set 321 | self.array_pos = [] # stores values for t >= 0 322 | self.array_neg = [] # stores values for t < 0 323 | 324 | def _get_value_at_index(self, n): 325 | if not isinstance(n, numbers.Integral): 326 | raise KeyError( 327 | "For a DiscreteTimeFunction f, f[n] returns the " 328 | "the nth time sample, so n must be an integer. " 329 | "If you want the value at time t, try f(t) instead.") 330 | 331 | if n >= 0: 332 | m = len(self.array_pos) 333 | if n >= m: 334 | for i in range(m, n + 1): 335 | self.array_pos.append(self.func(i)) 336 | return self.array_pos[n] 337 | else: 338 | m = len(self.array_neg) 339 | if -n > m: 340 | for i in range(-m - 1, n - 1, -1): 341 | self.array_neg.append(self.func(i)) 342 | return self.array_neg[-n - 1] 343 | 344 | def _get_value_at_time(self, t): 345 | fs = self.index_set.fs 346 | if not t in self.index_set: 347 | raise KeyError(( 348 | "No value at time %.2f for a function with " 349 | "a sampling rate of %d Hz.") % (t, fs)) 350 | return self._get_value_at_index(int(t * fs)) 351 | 352 | def __getitem__(self, n): 353 | if is_number(n): 354 | return self._get_value_at_index(n) 355 | elif is_numeric_vector(n): 356 | return Vector(self._get_value_at_index(e) for e in n) 357 | elif isinstance(n, slice): 358 | return Vector(self._get_value_at_index(e) for e in 359 | range(n.start, n.stop, n.step or 1)) 360 | else: 361 | raise TypeError("Cannot evaluate DiscreteTimeFunction at " 362 | "index %s (type %s)." % (n, type(n).__name__)) 363 | 364 | def __call__(self, t): 365 | if is_number(t): 366 | return self._get_value_at_time(t) 367 | elif is_numeric_vector(t): 368 | return Vector(self._get_value_at_time(e) for e in t) 369 | elif isinstance(t, DiscreteTimeFunction): 370 | self.check_same_index_set(t) 371 | return DiscreteTimeFunction(func=lambda n: self(t[n]), 372 | index_set=self.index_set) 373 | else: 374 | raise TypeError("Cannot evaluate DiscreteTimeFunction at " 375 | "time %s (type %s)." % (t, type(t).__name__)) 376 | 377 | def apply(self, func): 378 | """Compose function with the TimeFunction. 379 | 380 | Args: 381 | func: function to compose with the TimeFunction 382 | 383 | Example: 384 | f = DiscreteTimeFunction(lambda t: t, fs=1) 385 | g = f.apply(log) 386 | 387 | Note: For most standard functions, you can apply the function to 388 | the TimeFunction directly. For example, in the example above, 389 | g = log(f) would have been equivalent and more readable. 390 | 391 | User-defined functions can also be applied. 392 | 393 | Example: 394 | def log_squared(f): 395 | return log(f) ** 2 396 | g = f.apply(log_squared) 397 | """ 398 | return DiscreteTimeFunction(lambda n: func(self[n]), 399 | index_set=self.index_set) 400 | 401 | # The Arithmetic superclass will use this to define all of the 402 | # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). 403 | def _operation_factory(self, op): 404 | 405 | def _op_func(self, other): 406 | self.check_same_index_set(other) 407 | if is_number(other): 408 | return DiscreteTimeFunction( 409 | lambda n: op(self[n], other), 410 | index_set=self.index_set 411 | ) 412 | elif isinstance(other, DiscreteTimeFunction): 413 | return DiscreteTimeFunction( 414 | lambda n: op(self[n], other[n]), 415 | index_set=self.index_set 416 | ) 417 | else: 418 | return NotImplemented 419 | 420 | return _op_func 421 | 422 | def __str__(self): 423 | first_few = ", ".join(str(self[n]) for n in range(-2, 3)) 424 | return "(..., " + first_few + ", ...)" 425 | 426 | def __repr__(self): 427 | return self.__str__() 428 | 429 | def plot(self, tmin=0, tmax=10, **kwargs): 430 | nmin = int(np.floor(tmin * self.index_set.fs)) 431 | nmax = int(np.ceil(tmax * self.index_set.fs)) 432 | ts = [self.index_set[n] for n in range(nmin, nmax)] 433 | ys = [self[n] for n in range(nmin, nmax)] 434 | plt.plot(ts, ys, ".--", **kwargs) 435 | 436 | 437 | class ContinuousTimeFunction(TimeFunction): 438 | 439 | def __init__(self, func=lambda t: t): 440 | """Initializes a data structure for a discrete-time function. 441 | 442 | Args: 443 | func: A function of n that returns the value in position n. 444 | n is assumed to be any integer (postive or negative). 445 | This function can be defined at initialization time, 446 | or later. By default, it is not set at initialization. 447 | """ 448 | self.index_set = Reals() 449 | if func is not None: 450 | self.func = func 451 | 452 | def __call__(self, t): 453 | if is_number(t): 454 | return self.func(t) 455 | elif is_numeric_vector(t): 456 | try: 457 | # Use vectorized function if it exists 458 | return Vector(self.vfunc(t)) 459 | except: 460 | return Vector(self.func(e) for e in t) 461 | elif isinstance(t, ContinuousTimeFunction): 462 | return ContinuousTimeFunction(func=lambda s: self(t(s))) 463 | else: 464 | raise TypeError("Cannot evaluate ContinuousTimeFunction at " 465 | "time %s (type %s)." % (t, type(t).__name__)) 466 | 467 | def __getitem__(self, t): 468 | return self(t) 469 | 470 | def apply(self, func): 471 | """Compose function with the TimeFunction. 472 | 473 | Args: 474 | func: function to compose with the TimeFunction 475 | 476 | 477 | Example: 478 | f = ContinuousTimeFunction(lambda t: t) 479 | g = f.apply(log) 480 | 481 | Note: For most standard functions, you can apply the function to 482 | the TimeFunction directly. For example, in the example above, 483 | g = log(f) would have been equivalent and more readable. 484 | 485 | User-defined functions can also be applied. 486 | 487 | Example: 488 | def log_squared(f): 489 | return log(f) ** 2 490 | g = f.apply(log_squared) 491 | """ 492 | return ContinuousTimeFunction(lambda t: func(self(t))) 493 | 494 | # The Arithmetic superclass will use this to define all of the 495 | # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). 496 | def _operation_factory(self, op): 497 | 498 | def _op_func(self, other): 499 | self.check_same_index_set(other) 500 | if is_number(other): 501 | return ContinuousTimeFunction( 502 | lambda t: op(self(t), other) 503 | ) 504 | elif isinstance(other, ContinuousTimeFunction): 505 | return ContinuousTimeFunction( 506 | lambda t: op(self(t), other(t)) 507 | ) 508 | else: 509 | return NotImplemented 510 | 511 | return _op_func 512 | 513 | def __str__(self): 514 | return "[continuous-time function]" 515 | 516 | def __repr__(self): 517 | return self.__str__() 518 | 519 | def plot(self, tmin=0, tmax=10, **kwargs): 520 | ts = np.linspace(tmin, tmax, 200) 521 | ys = [self(t) for t in ts] 522 | plt.plot(ts, ys, "-", **kwargs) 523 | 524 | 525 | class DiscreteValued: 526 | 527 | def get_states(self): 528 | if not hasattr(self, "states"): 529 | raise NameError("States not defined for " 530 | "function.") 531 | return self.states 532 | 533 | def get_interarrival_times(self): 534 | if not hasattr(self, "interarrival_times"): 535 | raise NameError("Interarrival times not " 536 | "defined for function.") 537 | return self.interarrival_times 538 | 539 | def get_arrival_times(self): 540 | if not hasattr(self, "interarrival_times"): 541 | raise NameError("Interarrival times not " 542 | "defined for function.") 543 | return self.interarrival_times.cumsum() 544 | 545 | 546 | def join(result1, result2): 547 | """Joins two result objects into a single result object. 548 | 549 | Args: 550 | result1: The first result. 551 | result2: The second result. 552 | """ 553 | 554 | a = tuple(result1.values) if type(result1) == Tuple else (result1, ) 555 | b = tuple(result2.values) if type(result2) == Tuple else (result2, ) 556 | 557 | return Tuple(a + b) 558 | 559 | 560 | def concat(*args): 561 | """Concatenates scalars and vectors into one data structure. 562 | 563 | Args: 564 | *args: Any number of scalar or vector objects. The last 565 | argument can be an InfiniteTuple. 566 | 567 | Returns: 568 | A Vector or an InfiniteTuple, depending on whether the 569 | last argument is an InfiniteTuple. 570 | """ 571 | values = [] 572 | for i, arg in enumerate(args): 573 | if is_scalar(arg): 574 | values.append(arg) 575 | elif is_vector(arg): 576 | values.extend(arg) 577 | elif isinstance(arg, InfiniteTuple): 578 | # check that InfiniteTuple is the last arg 579 | if i == len(args) - 1: 580 | # define concatenated InfiniteTuple 581 | def _func(n): 582 | if n < len(values): 583 | return values[n] 584 | else: 585 | return arg[n - len(values)] 586 | return type(arg)(_func) 587 | 588 | raise Exception("InfiniteTuple must be the last " 589 | "argument to concat().") 590 | else: 591 | raise TypeError("Every argument to concat() must be either " 592 | "a scalar, a vector, or an InfiniteTuple.") 593 | 594 | return Vector(values) 595 | 596 | 597 | def is_scalar(x): 598 | return isinstance(x, (numbers.Number, str)) 599 | 600 | 601 | def is_vector(x): 602 | return hasattr(x, "__len__") 603 | 604 | 605 | def is_number(x): 606 | return isinstance(x, numbers.Number) 607 | 608 | 609 | def is_numeric_vector(x): 610 | return hasattr(x, "__len__") and all(is_number(i) for i in x) 611 | -------------------------------------------------------------------------------- /symbulate/table.py: -------------------------------------------------------------------------------- 1 | """Data structure for storing tabulated results. 2 | 3 | This module defines a data structure, Table, that stores the 4 | output of a .tabulate() operation. Typically, Table stores 5 | the possible outcomes and their counts or relative frequencies. 6 | """ 7 | from .base import Arithmetic 8 | 9 | 10 | TABLE_TEMPLATE = ''' 11 | 12 | 13 | 14 | 15 | 16 | 17 | {table_body} 18 | 19 |
{outcome_column}{value_column}
20 | ''' 21 | 22 | 23 | def _get_row_html(outcome, count): 24 | return "%s%s" % (outcome, count) 25 | 26 | 27 | class Table(dict, Arithmetic): 28 | 29 | def __init__(self, hash_map, outcomes=None, normalize=False, 30 | outcome_column="Outcome"): 31 | self.outcomes = outcomes 32 | self.outcome_column = outcome_column 33 | if outcomes is None: 34 | for outcome, count in hash_map.items(): 35 | self[outcome] = count 36 | else: 37 | for outcome in outcomes: 38 | self[outcome] = ( 39 | hash_map[outcome] if outcome in hash_map 40 | else 0 41 | ) 42 | 43 | if normalize: 44 | for key in self.ordered_keys(): 45 | self[key] /= sum(hash_map.values()) 46 | self.value_column = 'Relative Frequency' 47 | else: 48 | self.value_column = 'Frequency' 49 | 50 | def ordered_keys(self): 51 | # get keys in order 52 | if self.outcomes is None: 53 | keys = list(self.keys()) 54 | try: 55 | keys.sort() 56 | except Exception: 57 | pass 58 | else: 59 | # preserve ordering of outcomes, if specified 60 | keys = self.outcomes 61 | 62 | return keys 63 | 64 | def __repr__(self): 65 | keys = self.ordered_keys() 66 | keys_strings = [str(x) for x in keys] 67 | max_key_length = len(max(keys_strings, key=len)) 68 | 69 | table_rows = [] 70 | 71 | for i, key in enumerate(keys): 72 | if len(str(key)) <= len(self.outcome_column): 73 | outcome_space = ' ' * (len(self.outcome_column) - len(str(key))) 74 | else: 75 | outcome_space = ' ' * (max_key_length - len(str(key))) 76 | table_rows.append(f"{key}{outcome_space} {self[key]}") 77 | 78 | if i >= 18: 79 | last_outcome = str(keys[-1]) 80 | last_value = str(self[keys[-1]]) 81 | table_rows.append(f"{'.' * len(last_outcome)}{outcome_space} " 82 | f"{'.' * len(last_value)}") 83 | table_rows.append(f"{last_outcome}{outcome_space} " 84 | f"{last_value}") 85 | break 86 | 87 | if max_key_length <= len(self.outcome_column): 88 | outcome_header_space = ' ' 89 | total_row_space = ' ' * (len(self.outcome_column) - len('Total')) 90 | else: 91 | outcome_header_space = ' ' * (max_key_length - 92 | len(self.outcome_column) + 1) 93 | total_row_space = ' ' * (max_key_length - len('Total')) 94 | 95 | total = str(sum(self.values())) 96 | table_rows.append(f"{total_row_space}Total {total}") 97 | table_rows.insert(0, f"{self.outcome_column}{outcome_header_space}" 98 | f"{self.value_column}") 99 | 100 | return '\n'.join(table_rows) 101 | 102 | def _repr_html_(self): 103 | keys = self.ordered_keys() 104 | 105 | # get HTML for table body 106 | table_body = "" 107 | for i, key in enumerate(keys): 108 | table_body += _get_row_html(key, self[key]) 109 | # if we've already printed 19 rows, skip to end 110 | if i >= 18: 111 | table_body += _get_row_html("...", "...") 112 | table_body += _get_row_html(keys[-1], self[keys[-1]]) 113 | break 114 | total = str(sum(self.values())) 115 | table_body += _get_row_html("Total", "%s" % total) 116 | 117 | # return HTML for entire table 118 | return TABLE_TEMPLATE.format(outcome_column = self.outcome_column, 119 | value_column = self.value_column, 120 | table_body=table_body) 121 | 122 | # The Arithmetic superclass will use this to define all of the 123 | # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). 124 | def _operation_factory(self, op): 125 | 126 | def _op_func(self, other): 127 | return Table( 128 | {outcome: op(count, other) for outcome, count in self.items()}, 129 | self.outcomes 130 | ) 131 | 132 | return _op_func 133 | -------------------------------------------------------------------------------- /symbulate/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlsun/symbulate/3ec60a1d316332d02317661d1ebde8a442a33016/symbulate/tests/__init__.py -------------------------------------------------------------------------------- /tutorial/README.md: -------------------------------------------------------------------------------- 1 | # Symbulate tutorial 2 | 3 | The Python package Symbulate provides a user friendly framework for conducting simulations involving probability models. The syntax of Symbulate mirrors the "language of probability" and makes it intuitive to specify, run, analyze, and visualize the results of a simulation. 4 | 5 | This tutorial contains short examples and exercises to help you get started using Symbulate. Completing each section takes about 20 to 30 minutes. The tutorial requires no previous background in probability or familiarity with Python. 6 | -------------------------------------------------------------------------------- /tutorial/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Getting Started with Symbulate" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "The Python package [Symbulate](https://github.com/dlsun/symbulate) provides a user friendly framework for conducting simulations involving probability models. The syntax of Symbulate mirrors the \"language of probability\" and makes it intuitive to specify, run, analyze, and visualize the results of a simulation." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "This tutorial contains short examples and exercises to help you get started using Symbulate. Completing each section takes about 2o to 30 minutes. The tutorial requires no previous background in probability or familiarity with Python." 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "\n", 38 | "1. [**Probability spaces**](gs_probspace.ipynb)\n", 39 | "1. [**Random variables**](gs_rv.ipynb)\n", 40 | "1. [**Multiple random variables and joint distributions**](gs_joint.ipynb)\n", 41 | "1. [**Conditioning**](gs_conditioning.ipynb)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "Instructions for downloading and installing Symbulate can be found [here](http://calpoly.edu/~dsun09/python.html). \n", 49 | "**Every time you start Symbulate**, you must first run (SHIFT-ENTER) the following commands." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "from symbulate import *\n", 61 | "%matplotlib inline" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "This tutorial is a series of Jupyter notebooks which contain text, code, and output. You can run the code in a particular cell using (SHIFT-ENTER). Any output is displayed below." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "### Type your commands in this cell and then run using SHIFT-ENTER.\n", 80 | "1 + 2" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "Information about Jupyter notebooks can be found [here](http://jupyter.org/index.html). Full documentation of the Symbulate package cab be found [here](https://dlsun.github.io/symbulate/index.html). Links to documentation on particular topics are provided throughout the tutorial. Help documentation for a particular command can be accessed in a Jupyter notebook using the question mark `?` followed by the named of the object for which help is desired." 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "?BoxModel" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "[Contents](#contents) | [Probability spaces](gs_probspace.ipynb)>" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.6.1" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 2 139 | } 140 | --------------------------------------------------------------------------------