├── LICENSE.md
├── README.md
├── dash
    ├── regression_bootstrap.py
    ├── sampling_and_stderr.py
    ├── simple_regression.py
    └── ttest_simulation.py
└── shiny
    ├── collinearity
        ├── screenshot.png
        ├── server.R
        └── ui.R
    ├── logistic_regression
        ├── screenshot.png
        ├── server.R
        ├── shinyapps
        │   └── mwaskom
        │   │   └── logistic_regression.dcf
        └── ui.R
    ├── mediation
        ├── screenshot.png
        ├── server.R
        ├── shinyapps
        │   └── mwaskom
        │   │   └── mediation.dcf
        └── ui.R
    ├── multi_regression
        ├── screenshot.png
        ├── server.R
        ├── shinyapps
        │   └── mwaskom
        │   │   └── multi_regression.dcf
        └── ui.R
    ├── regression_bootstrap
        ├── screenshot.png
        ├── server.R
        ├── shinyapps
        │   └── mwaskom
        │   │   └── regression_bootstrap.dcf
        └── ui.R
    ├── sampling_and_stderr
        ├── .Rhistory
        ├── screenshot.png
        ├── server.R
        ├── shinyapps
        │   └── mwaskom
        │   │   └── sampling_and_stderr.dcf
        └── ui.R
    ├── simple_regression
        ├── screenshot.png
        ├── server.R
        ├── shinyapps
        │   └── mwaskom
        │   │   └── simple_regression.dcf
        └── ui.R
    └── ttest_simulation
        ├── .Rhistory
        ├── screenshot.png
        ├── server.R
        ├── shinyapps
            └── mwaskom
            │   └── ttest_simulation.dcf
        └── ui.R


/LICENSE.md:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2013, Michael Waskom
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Interactive apps for building statistical intuition
  2 | ===================================================
  3 | 
  4 | This is a collection of web apps built using
  5 | [Shiny](http://www.rstudio.com/shiny/) and [Dash](https://plotly.com/dash/)
  6 | that illustrate statistical concepts and help build intuitions about how they
  7 | manifest. The Shiny apps were originally created while teaching
  8 | [Psych252](https://psych252.github.io/) at Stanford.
  9 | 
 10 | Sampling and standard error
 11 | ---------------------------
 12 | 
 13 | ![](shiny/sampling_and_stderr/screenshot.png)
 14 | 
 15 | [Link to Shiny app](https://supsych.shinyapps.io/sampling_and_stderr/)
 16 | 
 17 | This example demonstrates the relationship between the standard
 18 | deviation of a population, the standard deviation and standard error of
 19 | the mean for a sample drawn from that population, and the expected
 20 | distribution of means that we would obtain if we took many samples (of
 21 | the same size) from the population. It is meant to emphasize how the
 22 | standard error of the mean, as calculated from the sample statistics for
 23 | a single sample, corresponds to the width of the expected distribution
 24 | of means (under normal assumptions).
 25 | 
 26 | Simulating t tests
 27 | ------------------
 28 | 
 29 | ![](shiny/ttest_simulation/screenshot.png)
 30 | 
 31 | [Link to Shiny app](https://supsych.shinyapps.io/ttest_simulation/)
 32 | 
 33 | This example performs 1000 one-sample t tests (with different samples
 34 | from the same distribution) and plots the resulting histograms of t
 35 | statistics and p values. It is possible to control both the true effect
 36 | size (Cohen's D) and the number of observations in a sample to show how
 37 | these two parameters relate the expected distribution of scores. When
 38 | the effect size is 0, the simulation shows what happens when the null
 39 | hypothesis is true.
 40 | 
 41 | Simple linear regression
 42 | ------------------------
 43 | 
 44 | ![](shiny/simple_regression/screenshot.png)
 45 | 
 46 | [Link to Shiny app](https://gallery.shinyapps.io/simple_regression/)
 47 | 
 48 | This example demonstrates the key objective of linear regression:
 49 | finding the coefficients for a linear model that minimize the squared
 50 | distance from each observation to the prediction made by the model at
 51 | the same value of x.
 52 | 
 53 | Simple logistic regression
 54 | --------------------------
 55 | 
 56 | ![](shiny/logistic_regression/screenshot.png)
 57 | 
 58 | [Link to Shiny app](https://supsych.shinyapps.io/logistic_regression/)
 59 | 
 60 | Similar to the linear regression example, this app shows how the goal of
 61 | logistic regression is to find a model (expressed in linear coefficients
 62 | -- here just the intercept and a slope term) that maximizes the
 63 | likelihood of the data you are fitting the model to.
 64 | 
 65 | Regression uncertainty
 66 | ----------------------
 67 | 
 68 | ![](shiny/regression_bootstrap/screenshot.png)
 69 | 
 70 | [Link to Shiny app](https://gallery.shinyapps.io/regression_bootstrap/)
 71 | 
 72 | This app plots a simple linear regression and allows the user to
 73 | visualize the distribution of regression estimates from bootstrap
 74 | resamples of the dataset. The user can also plot a normal density with
 75 | mean at y-hat and standard deviation equal to the standard error of the
 76 | regression estimate at that point. The app thus draws a comparison
 77 | between the bootstrap procedure, the expected sampling characteristics
 78 | of the regression line, and a common way of visualizing the uncertainty
 79 | of a regression.
 80 | 
 81 | Modeling choices in multiple regression
 82 | ---------------------------------------
 83 | 
 84 | ![](shiny/multi_regression/screenshot.png)
 85 | 
 86 | [Link to Shiny app](https://gallery.shinyapps.io/multi_regression/)
 87 | 
 88 | This app plots a basic multiple regression with two variables: x, a
 89 | continuous measure, and group, a categorical measure. The app lets the
 90 | user choose whether to fit a simple regression, an additive multiple
 91 | regression, or an interactive multiple regression, and it shows the
 92 | `lm()` output and a visualization for each choice. The app also lets the
 93 | user control the true effect size for each component of the data to help
 94 | build intuition about the visual and statistical consequences of
 95 | different relationships between variables in a multiple regression.
 96 | 
 97 | Multicollinearity in multiple regression
 98 | ----------------------------------------
 99 | 
100 | ![](shiny/collinearity/screenshot.png)
101 | 
102 | [Link to Shiny app](https://gallery.shinyapps.io/collinearity/)
103 | 
104 | This app shows what happens to multiple regression results when there is
105 | considerable covariance between two continuous predictor variables. Although
106 | the overall model fit does not change as the covariance is increased (as
107 | visualized by the regression of y onto yhat and the R squared in the model
108 | summary), the parameter estimates become unstable and the confidence intervals
109 | expand, which yields large p values even though the relationship between the
110 | predictors and the response variable does not change.
111 | 
112 | 
113 | Simple mediation structure
114 | --------------------------
115 | 
116 | ![](shiny/mediation/screenshot.png)
117 | 
118 | [Link to Shiny app](https://supsych.shinyapps.io/mediation)
119 | 
120 | This app is intended to provide some intuition about simple mediation models.
121 | It allows you to specify a range of causal structures by changing the strength
122 | (and direction) of the relationships between three variables. Once you have
123 | constructed a structure, you can observe the effects of manipulating the
124 | system. Finally, you can simulate data from a model with the specified
125 | structure and observe how changing the strength of the relationships influences
126 | the regression parameters and inferential statistics.
127 | 
128 | 


--------------------------------------------------------------------------------
/dash/regression_bootstrap.py:
--------------------------------------------------------------------------------
  1 | import dash
  2 | import dash_core_components as dcc
  3 | import dash_html_components as html
  4 | from dash.dependencies import Input, Output
  5 | import dash_bootstrap_components as dbc
  6 | 
  7 | import numpy as np
  8 | from scipy import stats
  9 | 
 10 | 
 11 | # --- Define the underlying regression model
 12 | 
 13 | rng = np.random.default_rng()
 14 | n_obs = 30
 15 | n_boot = 20
 16 | 
 17 | # Simple y = ax + b + noise model
 18 | x = rng.uniform(-3, 3, n_obs)
 19 | y = 2 + .75 * x + rng.normal(0, 1.5, n_obs)
 20 | 
 21 | # Fit a regression using OLS
 22 | fit = np.polyfit(x, y, 1)
 23 | xx = np.linspace(-3.5, 3.5, 101)
 24 | yhat = np.polyval(fit, xx)
 25 | 
 26 | # Compute the analytic confidence interval for the regression
 27 | dof = n_obs - 2
 28 | xbar = x.mean()
 29 | ss_x = np.sum(np.square(x - xbar))
 30 | s = np.sqrt(np.sum(np.square(y - np.polyval(fit, x))) / dof)
 31 | se_x = s * np.sqrt(1 / n_obs + np.square(xx - xbar) / ss_x)
 32 | z = stats.t(dof).ppf(.975)
 33 | ci = yhat - z * se_x, yhat + z * se_x
 34 | 
 35 | # Fit the regression for a small number of bootstrap samples
 36 | boot_samples = []
 37 | yhat_boots = []
 38 | for _ in range(n_boot):
 39 |     sampler = np.random.randint(0, n_obs, n_obs)
 40 |     x_boot = x[sampler]
 41 |     y_boot = y[sampler]
 42 |     fit_boot = np.polyfit(x_boot, y_boot, 1)
 43 |     boot_samples.append(sampler)
 44 |     yhat_boots.append(np.polyval(fit_boot, xx))
 45 | 
 46 | 
 47 | # --- Define the layout of the app
 48 | 
 49 | external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
 50 | app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
 51 | 
 52 | app.layout = dbc.Container([
 53 | 
 54 |     html.H1("Confidence intervals on regression model"),
 55 | 
 56 |     dcc.Graph(
 57 |         id="plot",
 58 |         clear_on_unhover=True,
 59 |     ),
 60 | 
 61 |     dcc.RadioItems(
 62 |         id="hover-action",
 63 |         options=[
 64 |             {"label": "Show observations included in bootstrap sample",
 65 |              "value": "bootstrap"},
 66 |             {"label": "Show analytic distribution of error around estimate",
 67 |              "value": "error"},
 68 |         ],
 69 |         value="bootstrap",
 70 |     ),
 71 | 
 72 | ])
 73 | 
 74 | 
 75 | # --- Define the interaction with the graph
 76 | 
 77 | @app.callback(
 78 |     Output("plot", "figure"),
 79 |     [Input("hover-action", "value"),
 80 |      Input("plot", "hoverData")],
 81 | )
 82 | def plot_scatter(hover_action, hover_data):
 83 | 
 84 |     # Set up the figure
 85 |     layout = {
 86 |         "width": 800,
 87 |         "height": 600,
 88 |         "xaxis": {"range": (-4, 4), "title": "x"},
 89 |         "yaxis": {"range": (-3, 7), "title": "y"},
 90 |         "hovermode": 'closest',
 91 |     }
 92 | 
 93 |     # Set default element parameters
 94 |     line_color = "#222299"
 95 |     boot_red = "#cc2222"
 96 |     boot_gray = "#999999"
 97 |     scatter_color = "#222222"
 98 |     scatter_size = 10
 99 | 
100 |     # Process the hover action
101 |     hover_line = None
102 |     hover_point = None
103 |     if hover_data is not None:
104 |         hover_element = hover_data["points"][0]
105 |         hover_line = hover_element["curveNumber"]
106 |         hover_point = hover_element["pointIndex"]
107 | 
108 |     # Set up the list of graph elements
109 |     data = []
110 | 
111 |     # Define the parameters of the bootstrap sample lines, based on hover
112 |     show_bootstrap_sample = (
113 |         hover_action == "bootstrap"
114 |         and hover_line is not None
115 |         and hover_line < n_boot
116 |     )
117 |     if show_bootstrap_sample:
118 |         hover_sample = hover_line
119 |         used, count = np.unique(boot_samples[hover_sample], return_counts=True)
120 |         show_obs = np.in1d(np.arange(n_obs), used)
121 |         scatter_color = np.where(show_obs, boot_red, boot_gray)
122 |         scatter_size = np.full(n_obs, 10)
123 |         scatter_size[show_obs] = 10 * np.sqrt(count)
124 | 
125 |     # Plot the regression line for each bootstrap sample
126 |     for i, yhat_boot in enumerate(yhat_boots):
127 | 
128 |         width = 1.5
129 |         color = boot_gray
130 | 
131 |         if hover_action == "bootstrap" and i == hover_line:
132 |             width = 2.5
133 |             color = boot_red
134 | 
135 |         data.append({
136 |             "x": xx, "y": yhat_boot,
137 |             "mode": "lines", "showlegend": False,
138 |             "line": {"color": color, "width": width},
139 |             "hoverinfo": "none" if hover_action == "bootstrap" else "skip",
140 |         })
141 | 
142 |     # Plot the regression estimate and its confidence interval
143 |     data.extend([
144 |         {
145 |             "x": xx, "y": yhat,
146 |             "mode": "lines", "showlegend": False,
147 |             "line": {"color": line_color, "width": 3},
148 |             "hoverinfo": "none" if hover_action == "error" else "skip",
149 |         },
150 |         {
151 |             "x": xx, "y": ci[0],
152 |             "mode": "lines", "showlegend": False,
153 |             "line": {"color": line_color, "width": 0},
154 |             "hoverinfo": "skip",
155 |         },
156 |         {
157 |             "x": xx, "y": ci[1],
158 |             "mode": "lines", "showlegend": False,
159 |             "line": {"color": line_color, "width": 0},
160 |             "fill": "tonexty", "fillcolor": line_color + "33",
161 |             "hoverinfo": "skip",
162 |         }
163 |     ])
164 | 
165 |     # Plot the observations
166 |     data.append({
167 |         "x": x, "y": y,
168 |         "mode": "markers", "showlegend": False,
169 |         "marker": {"color": scatter_color, "size": scatter_size},
170 |         "hoverinfo": "skip",
171 |     })
172 | 
173 |     # Plot the error distribution around the regression estimate
174 |     show_yhat_error = (
175 |         hover_action == "error"
176 |         and hover_point is not None
177 |         and hover_line == n_boot
178 |     )
179 |     if show_yhat_error:
180 | 
181 |         err_loc = yhat[hover_point]
182 |         err_sd = se_x[hover_point]
183 |         err_y = np.linspace(err_loc - err_sd * 5, err_loc + err_sd * 5, 100)
184 |         err_dist = stats.t(dof, loc=err_loc, scale=err_sd)
185 |         err_x = xx[hover_point] + err_dist.pdf(err_y) * .5
186 | 
187 |         data.extend([
188 |             {
189 |                 "x": np.full_like(err_y, xx[hover_point]), "y": err_y,
190 |                 "mode": "lines", "showlegend": False,
191 |                 "line": {"color": boot_red, "width": 1, "dash": "dash"},
192 |                 "hoverinfo": "skip",
193 |             },
194 |             {
195 |                 "x": err_x, "y": err_y,
196 |                 "mode": "lines", "showlegend": False,
197 |                 "line": {"color": boot_red, "width": 3},
198 |                 "hoverinfo": "skip",
199 |             },
200 |         ])
201 | 
202 |     fig = {
203 |         "data": data,
204 |         "layout": layout,
205 |     }
206 | 
207 |     return fig
208 | 
209 | 
210 | if __name__ == '__main__':
211 |     app.run_server(debug=True)
212 | 


--------------------------------------------------------------------------------
/dash/sampling_and_stderr.py:
--------------------------------------------------------------------------------
  1 | import dash
  2 | import dash_core_components as dcc
  3 | import dash_html_components as html
  4 | from dash.dependencies import Input, Output
  5 | 
  6 | from plotly.subplots import make_subplots
  7 | import plotly.graph_objects as go
  8 | 
  9 | import numpy as np
 10 | from scipy import stats
 11 | 
 12 | # --- Define the layout of the app
 13 | 
 14 | external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
 15 | app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
 16 | app.layout = html.Div([
 17 | 
 18 |     html.H1("Sampling and standard error"),
 19 | 
 20 |     dcc.Graph(id="plots"),
 21 | 
 22 | 
 23 |     html.Div([
 24 |         html.H4("Population standard deviation", id="population-label"),
 25 |         dcc.Slider(
 26 |             id="population-slider",
 27 |             min=4,
 28 |             max=16,
 29 |             value=10,
 30 |             step=1,
 31 |             marks={v: "" if v % 4 else f"{v/4:.0f}" for v in range(17)},
 32 |         ),
 33 |     ]),
 34 | 
 35 |     html.Div([
 36 |         html.H4("Sample size", id="sample-label"),
 37 |         dcc.Slider(
 38 |             id="sample-slider",
 39 |             min=0,
 40 |             max=200,
 41 |             value=100,
 42 |             step=10,
 43 |             marks={v: "" if v % 50 else str(v) for v in range(0, 210, 10)},
 44 |         ),
 45 |     ]),
 46 | ])
 47 | 
 48 | 
 49 | # --- Define the statistical simulation
 50 | 
 51 | @app.callback(
 52 |     Output("plots", "figure"),
 53 |     [Input("population-slider", "value"), Input("sample-slider", "value")],
 54 | )
 55 | def update_histograms(sd, sample_size):
 56 | 
 57 |     # Define the population distribution
 58 |     sd = sd / 4  # Because of bug in slider with float values
 59 |     d = stats.norm(0, sd)
 60 | 
 61 |     # Simulate n_sim experiments with a given true effect size and sample size
 62 |     n_sim = 1000
 63 | 
 64 |     # Set up the figure to show the results of the simulation
 65 |     fig = make_subplots(
 66 |         rows=1, cols=3,
 67 |         shared_xaxes=True,
 68 |         subplot_titles=[
 69 |             "Generating distribution",
 70 |             f"Distribution of one sample (N = {sample_size})",
 71 |             f"Distribution of means from {n_sim} samples",
 72 |         ]
 73 |     )
 74 | 
 75 |     # Plot the probability density function of the population
 76 |     x = np.linspace(-9, 9, 5001)
 77 |     y = d.pdf(x)
 78 |     t_hist = go.Scatter(x=x, y=y, mode="lines", showlegend=False)
 79 |     fig.add_trace(t_hist, row=1, col=1)
 80 |     fig.update_xaxes(range=[-9, 9], row=1, col=1)
 81 |     fig.update_yaxes(range=[0, .55], row=1, col=1)
 82 | 
 83 |     # Plot a histogram of one sample
 84 |     sample = d.rvs(sample_size)
 85 |     bins = dict(start=-9, end=9, size=1)
 86 |     hist = go.Histogram(x=sample, autobinx=False, xbins=bins, showlegend=False)
 87 |     fig.add_trace(hist, row=1, col=2)
 88 |     fig.update_xaxes(range=[-9, 9], row=1, col=2)
 89 |     fig.update_yaxes(range=[0, sample_size * .75], row=1, col=2)
 90 | 
 91 |     # Plot a histogram of the means from many samples
 92 |     samples = d.rvs((sample_size, n_sim))
 93 |     means = samples.mean(axis=0)
 94 |     bins = dict(start=-9, end=9, size=.2)
 95 |     hist = go.Histogram(x=means, autobinx=False, xbins=bins, showlegend=False)
 96 |     fig.add_trace(hist, row=1, col=3)
 97 |     fig.update_xaxes(range=[-9, 9], row=1, col=3)
 98 |     fig.update_yaxes(range=[0, n_sim * .55], row=1, col=3)
 99 | 
100 |     # Annotate with descriptive statistics
101 |     mean = sample.mean()
102 |     stdev = sample.std()
103 |     sem = stdev / np.sqrt(sample_size)
104 | 
105 |     annot_ys = .85, .8, .75
106 |     for col in [1, 2, 3]:
107 | 
108 |         # Add the population mean +/- sd to each plot
109 |         fig.add_shape(
110 |             type="line",
111 |             yref="paper",
112 |             xref=f"x{col}",
113 |             x0=-sd,
114 |             x1=+sd,
115 |             y0=annot_ys[0],
116 |             y1=annot_ys[0],
117 |         )
118 | 
119 |         # Add the sample mean +/- sd to each plot
120 |         fig.add_shape(
121 |             type="line",
122 |             yref="paper",
123 |             xref=f"x{col}",
124 |             x0=mean - stdev,
125 |             x1=mean + stdev,
126 |             y0=annot_ys[1],
127 |             y1=annot_ys[1],
128 |         )
129 | 
130 |         # Add the sample mean +/- sem to each plot
131 |         fig.add_shape(
132 |             type="line",
133 |             yref="paper",
134 |             xref=f"x{col}",
135 |             x0=mean + sem,
136 |             x1=mean - sem,
137 |             y0=annot_ys[2],
138 |             y1=annot_ys[2],
139 |         )
140 | 
141 |     annotations = list(fig["layout"]["annotations"])
142 |     annotations.extend([
143 |         dict(
144 |             x=0, xref="x1",
145 |             y=annot_ys[0], yref="paper",
146 |             text="Pop. mean+/-s.d.",
147 |             ax=-40, ay=-20,
148 |         ),
149 |         dict(
150 |             x=mean, xref="x1",
151 |             y=annot_ys[1], yref="paper",
152 |             text="Samp. mean+/-s.d.",
153 |             ax=-50, ay=30,
154 |         ),
155 |         dict(
156 |             x=mean, xref="x1",
157 |             y=annot_ys[2], yref="paper",
158 |             # showarrow=False,
159 |             text="Samp. mean+/-s.e.",
160 |             ax=50, ay=40,
161 |         ),
162 |     ])
163 |     fig["layout"]["annotations"] = annotations
164 | 
165 |     fig.update_xaxes(showgrid=False, zeroline=False)
166 | 
167 |     return fig
168 | 
169 | 
170 | if __name__ == '__main__':
171 |     app.run_server(debug=True)
172 | 


--------------------------------------------------------------------------------
/dash/simple_regression.py:
--------------------------------------------------------------------------------
  1 | import dash
  2 | import dash_core_components as dcc
  3 | import dash_html_components as html
  4 | from dash.dependencies import Input, Output
  5 | import dash_bootstrap_components as dbc
  6 | 
  7 | import plotly.graph_objects as go
  8 | 
  9 | import numpy as np
 10 | import statsmodels.api as sm
 11 | 
 12 | 
 13 | # --- Define the underlying regression model
 14 | 
 15 | true_intercept = 2
 16 | true_slope = 1.25
 17 | 
 18 | n_obs = 50
 19 | x = np.random.normal(0, 2, n_obs)
 20 | y = true_intercept + true_slope * x + np.random.normal(0, 1, n_obs)
 21 | 
 22 | intercept_options = np.arange(-2, 6.5, .5)
 23 | starting_intercept = np.random.choice(intercept_options)
 24 | 
 25 | slope_options = np.arange(-1, 3.25, .25)
 26 | starting_slope = np.random.choice(slope_options)
 27 | 
 28 | 
 29 | # --- Define the layout of the app
 30 | 
 31 | app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
 32 | 
 33 | app.layout = dbc.Container([
 34 | 
 35 |     html.H1("Simple linear regression"),
 36 | 
 37 |     html.Div([
 38 |         dbc.Row([
 39 |             dbc.Col(
 40 |                 dcc.Graph(id="scatter-plot"),
 41 |                 width=6,
 42 |             ),
 43 |             dbc.Col([
 44 |                 dbc.Row([
 45 |                     dcc.Graph(id="score-plot"),
 46 |                     dcc.Graph(id="resid-plot"),
 47 |                 ]),
 48 |             ], width=6),
 49 |         ]),
 50 |     ]),
 51 | 
 52 |     html.Div([
 53 |         html.H4("Intercept", id="intercept-label"),
 54 |         dcc.Slider(
 55 |             id="intercept-slider",
 56 |             min=-2,
 57 |             max=6,
 58 |             step=.5,
 59 |             value=starting_intercept,
 60 |             marks={val: "" for val in intercept_options},
 61 |         ),
 62 |     ]),
 63 | 
 64 |     html.Div([
 65 |         html.H4("Slope", id="slope-label"),
 66 |         dcc.Slider(
 67 |             id="slope-slider",
 68 |             min=-1,
 69 |             max=3,
 70 |             step=.25,
 71 |             value=starting_slope,
 72 |             marks={val: "" for val in slope_options},
 73 |         ),
 74 |     ]),
 75 | 
 76 |     dcc.Checklist(
 77 |         id="results-check",
 78 |         options=[
 79 |             {"label": " Show OLS fit results", "value": "true"},
 80 |         ],
 81 |     ),
 82 | 
 83 |     html.Pre(id="results-text"),
 84 | 
 85 | ])
 86 | 
 87 | 
 88 | @app.callback(
 89 |     Output("intercept-label", "children"),
 90 |     [Input("intercept-slider", "value")],
 91 | )
 92 | def label_intercept(intercept):
 93 |     return f"Intercept = {intercept:.1f}"
 94 | 
 95 | 
 96 | @app.callback(
 97 |     Output("slope-label", "children"),
 98 |     [Input("slope-slider", "value")],
 99 | )
100 | def label_slope(slope):
101 |     return f"Slope = {slope:.1f}"
102 | 
103 | 
104 | # --- Draw a scatter plot of the data and the specified regression line
105 | 
106 | 
107 | @app.callback(
108 |     Output("scatter-plot", "figure"),
109 |     [Input("intercept-slider", "value"), Input("slope-slider", "value")],
110 | )
111 | def plot_scatter(intercept, slope):
112 | 
113 |     fig = go.Figure()
114 |     fig.update_layout(
115 |         width=500, height=500,
116 |     )
117 |     fig.update_xaxes(range=(-5, 5), title="x")
118 |     fig.update_yaxes(range=(-3, 7), title="y")
119 | 
120 |     fig.add_trace(go.Scatter(x=x, y=y, mode="markers", showlegend=False))
121 | 
122 |     xx = np.linspace(-5, 5, 100)
123 |     yy = intercept + slope * xx
124 |     best_fit = intercept == true_intercept and slope == true_slope
125 |     color = "#636EFA" if best_fit else "#EF553B"
126 |     fig.add_trace(go.Scatter(x=xx, y=yy, mode="lines",
127 |                              line=dict(color=color),
128 |                              showlegend=False))
129 | 
130 |     return fig
131 | 
132 | 
133 | # --- Show the residual sum of squares and compare to fit using true values
134 | 
135 | 
136 | @app.callback(
137 |     Output("score-plot", "figure"),
138 |     [Input("intercept-slider", "value"), Input("slope-slider", "value")],
139 | )
140 | def plot_score(intercept, slope):
141 | 
142 |     yhat = intercept + slope * x
143 |     ss_res = np.sum(np.square(y - yhat))
144 | 
145 |     y_opt = true_intercept + true_slope * x
146 |     ss_res_opt = np.sum(np.square(y - y_opt))
147 | 
148 |     fig = go.Figure()
149 |     fig.update_layout(
150 |         width=500, height=200,
151 |     )
152 |     fig.update_xaxes(range=(0, 1000), title="Sum of squares of residuals")
153 |     fig.update_yaxes(range=(0, 1), showticklabels=False)
154 | 
155 |     best_fit = intercept == true_intercept and slope == true_slope
156 |     fig.add_trace(go.Scatter(x=[ss_res, ss_res_opt], y=[.5, .5],
157 |                   mode="markers", marker_size=10,
158 |                   marker_symbol=["asterisk-open", "circle-open"],
159 |                   marker_color=[
160 |                     "#636EFA" if best_fit else "#EF553B", "#636EFA",
161 |                   ],
162 |                   marker_line_width=2,
163 |                   showlegend=False))
164 | 
165 |     return fig
166 | 
167 | 
168 | # --- Show the distribution of the residuals
169 | 
170 | 
171 | @app.callback(
172 |     Output("resid-plot", "figure"),
173 |     [Input("intercept-slider", "value"), Input("slope-slider", "value")],
174 | )
175 | def plot_residuals(intercept, slope):
176 | 
177 |     yhat = intercept + slope * x
178 |     residuals = y - yhat
179 |     best_fit = intercept == true_intercept and slope == true_slope
180 | 
181 |     fig = go.Figure()
182 |     fig.update_layout(
183 |         width=500, height=300,
184 |     )
185 |     fig.update_xaxes(range=(-5, 5), title="Residuals")
186 |     fig.update_yaxes(range=(0, 20), title="Count")
187 | 
188 |     bins = dict(start=-5, end=5, size=.5)
189 |     fig.add_trace(
190 |         go.Histogram(x=residuals,
191 |                      marker_color="#636EFA" if best_fit else "#EF553B",
192 |                      autobinx=False, xbins=bins, showlegend=False),
193 |     )
194 | 
195 |     fig.update_layout(shapes=[
196 |         dict(
197 |           type="line",
198 |           yref="paper", y0=0, y1=1,
199 |           xref="x1", x0=0, x1=0,
200 |           line=dict(color="#999999", dash="dot")
201 |         ),
202 |     ])
203 | 
204 |     return fig
205 | 
206 | 
207 | @app.callback(
208 |     Output("results-text", "children"),
209 |     [Input("results-check", "value")],
210 | )
211 | def print_ols_fit(checked):
212 |     if checked:
213 |         m = sm.OLS(y, sm.add_constant(x)).fit()
214 |         return m.summary().as_text()
215 |     else:
216 |         return ""
217 | 
218 | 
219 | if __name__ == '__main__':
220 |     app.run_server(debug=True)
221 | 


--------------------------------------------------------------------------------
/dash/ttest_simulation.py:
--------------------------------------------------------------------------------
  1 | import dash
  2 | import dash_core_components as dcc
  3 | import dash_html_components as html
  4 | from dash.dependencies import Input, Output
  5 | 
  6 | from plotly.subplots import make_subplots
  7 | import plotly.graph_objects as go
  8 | 
  9 | import numpy as np
 10 | from scipy import stats
 11 | 
 12 | # --- Define the layout of the app
 13 | 
 14 | external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
 15 | app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
 16 | app.layout = html.Div([
 17 | 
 18 |     html.H1("Simulating t-tests"),
 19 | 
 20 |     dcc.Graph(id="hist-plots"),
 21 | 
 22 | 
 23 |     html.Div([
 24 |         html.H4(id="effect-label"),
 25 |         dcc.Slider(
 26 |             id="effect-slider",
 27 |             min=0,
 28 |             max=1,
 29 |             value=0,
 30 |             step=.05,
 31 |             marks={v: f"{v:.2f}" for v in np.arange(0, 1.05, .05)}
 32 |         ),
 33 |     ]),
 34 | 
 35 |     html.Div([
 36 |         html.H4(id="sample-label"),
 37 |         dcc.Slider(
 38 |             id="sample-slider",
 39 |             min=2,
 40 |             max=50,
 41 |             value=20,
 42 |             marks={v: "" if v % 10 else str(v) for v in range(2, 51)}
 43 | 
 44 |         ),
 45 |     ]),
 46 | ])
 47 | 
 48 | 
 49 | # --- Add callbacks to show values for the size of the effect and sample
 50 | 
 51 | 
 52 | @app.callback(
 53 |     Output("sample-label", "children"),
 54 |     [Input("sample-slider", "value")],
 55 | )
 56 | def set_sample_label(sample_size):
 57 |     return f"Sample size: {sample_size}"
 58 | 
 59 | 
 60 | @app.callback(
 61 |     Output("effect-label", "children"),
 62 |     [Input("effect-slider", "value")],
 63 | )
 64 | def set_effect_label(effect_size):
 65 |     return f"Effect size: {effect_size:.2f}"
 66 | 
 67 | 
 68 | # --- Define the statistical simulation
 69 | 
 70 | @app.callback(
 71 |     Output("hist-plots", "figure"),
 72 |     [Input("effect-slider", "value"), Input("sample-slider", "value")],
 73 | )
 74 | def update_histograms(effect_size, sample_size):
 75 | 
 76 |     # Simulate n_sim experiments with a given true effect size and sample size
 77 |     n_sim = 1000
 78 | 
 79 |     # Sample data for all experiments at once
 80 |     sample = np.random.normal(effect_size, 1, (sample_size, n_sim))
 81 | 
 82 |     # Compute the mean and standard error for each experiment
 83 |     means = sample.mean(axis=0)
 84 |     sems = sample.std(axis=0) / np.sqrt(sample_size)
 85 | 
 86 |     # Compute the t statistic and corresponding (one-tailed) p value
 87 |     dof = sample_size - 1
 88 |     t_dist = stats.t(dof)
 89 |     ts = means / sems
 90 |     ps = t_dist.sf(ts)
 91 | 
 92 |     # Compute the critical values
 93 |     alpha = .05
 94 |     t_crit = t_dist.ppf(1 - alpha)
 95 |     p_crit = alpha
 96 | 
 97 |     # Compute the theoretical power and empirical proportion of rejected nulls
 98 |     if effect_size:
 99 |         effect_dist = stats.t(dof, loc=effect_size * np.sqrt(sample_size))
100 |         theory_power = effect_dist.sf(t_crit)
101 |     else:
102 |         theory_power = np.nan
103 |     rejected_nulls = (ts > t_crit).mean()
104 |     titles = (
105 |         f"Theoretical power: {theory_power:.2f}",
106 |         f"Proportion rejected nulls: {rejected_nulls:.2f}",
107 |     )
108 | 
109 |     # Set up the figure to show the results of the simulation
110 |     fig = make_subplots(rows=1, cols=2,
111 |                         subplot_titles=titles)
112 | 
113 |     fig.update_layout(shapes=[
114 |         dict(
115 |           type="line",
116 |           yref="paper", y0=0, y1=1,
117 |           xref="x1", x0=t_crit, x1=t_crit,
118 |           line=dict(color="#999999", dash="dot")
119 |         ),
120 |         dict(
121 |           type="line",
122 |           yref="paper", y0=0, y1=1,
123 |           xref="x2", x0=p_crit, x1=p_crit,
124 |           line=dict(color="#999999", dash="dot")
125 |         )
126 |     ])
127 | 
128 |     # Plot a histogram of t statistics across all experiments
129 |     tbins = dict(start=-10, end=10, size=.5)
130 |     t_hist = go.Histogram(x=ts, autobinx=False, xbins=tbins, showlegend=False)
131 |     fig.add_trace(t_hist, row=1, col=1)
132 |     fig.update_xaxes(title="t statistic", range=[-10, 10], row=1, col=1)
133 |     fig.update_yaxes(range=[0, 250], row=1, col=1)
134 | 
135 |     # Plot a histogram of p values across all experiments
136 |     pbins = dict(start=0, end=1, size=.025)
137 |     p_hist = go.Histogram(x=ps, autobinx=False, xbins=pbins, showlegend=False)
138 |     fig.add_trace(p_hist, row=1, col=2)
139 |     fig.update_xaxes(title="p value", range=[0, 1], row=1, col=2)
140 |     fig.update_yaxes(range=[0, 1000], row=1, col=2)
141 | 
142 |     return fig
143 | 
144 | 
145 | if __name__ == '__main__':
146 |     app.run_server(debug=True)
147 | 


--------------------------------------------------------------------------------
/shiny/collinearity/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/collinearity/screenshot.png


--------------------------------------------------------------------------------
/shiny/collinearity/server.R:
--------------------------------------------------------------------------------
  1 | library(shiny)
  2 | library(MASS)
  3 | 
  4 | shinyServer(function(input, output) {
  5 |   
  6 |   # --------------------------------------------------------------------------
  7 |   # Simulate the dataset with correlated regressors
  8 |   random.sample <- reactive({
  9 |     
 10 |     # Dummy line to trigger off button-press
 11 |     foo <- input$resample
 12 |     n.obs <- 60
 13 |     cov <- input$pred.cov
 14 |     sigma <- matrix(c(1, cov, cov, 1), nrow=2)
 15 |     X <- mvrnorm(n.obs, c(0, 0), sigma)
 16 |     b <- c(1, 1)
 17 |     y <- X %*% b + rnorm(n.obs, sd=1.5)
 18 |     df <- cbind(y, X)
 19 |     df <- as.data.frame(df)
 20 |     names(df) <- c("y", "x.1", "x.2")
 21 |     
 22 |     return(df)
 23 |       
 24 |   })
 25 |     
 26 |   # --------------------------------------------------------------------------
 27 |   # Fit the the full and nested regression models
 28 |   fit.regressions <- reactive({
 29 |     
 30 |     # Get the current model structure
 31 |     df <- random.sample()
 32 |     
 33 |     reg.full <- lm(y ~ x.1 + x.2, df)
 34 |     reg.x.1 <- lm(y ~ x.1, df)
 35 |     reg.x.2 <- lm(y ~ x.2, df)
 36 |     
 37 |     return(list(reg.full=reg.full, reg.x.1=reg.x.1, reg.x.2=reg.x.2))
 38 |     
 39 |   })
 40 |     
 41 |   #---------------------------------------------------------------------------
 42 |   # Plot a scatter of the data with regression lines corresponding to the model
 43 |   output$reg.plots <- renderPlot({         
 44 |   
 45 |     # Get the current regression data
 46 |     df <- random.sample()
 47 |     reg.full <- fit.regressions()$reg.full
 48 |     y.hat <- predict(reg.full)
 49 |     
 50 |     # Set up the plots
 51 |     par(mfrow=c(1, 2))
 52 |     #colors = sample(brewer.pal(8, "Set2"), 2, FALSE)
 53 |     colors = c("#66C2A5", "#80B1D3")
 54 |     
 55 |     # Plot y on yhat
 56 |     plot(y.hat, df$y, type="p", main="Overall model fit",
 57 |          xlab="y hat", ylab="y", col=colors[1], pch=16,
 58 |          bty="n", xlim=c(-6, 6), ylim=c(-6, 6))
 59 |     abline(0, 1, lty="dotted")
 60 | 
 61 |     # Plot x.2 on x.2
 62 |     plot(df$x.1, df$x.2, type="p", main="Predictor variable correlation",
 63 |          xlab="x.1", ylab="x.2", col=colors[2], pch=16,
 64 |          bty="n", xlim=c(-3, 3), ylim=c(-3, 3))
 65 |     abline(0, 1, lty="dotted")
 66 |     
 67 |   })
 68 |   
 69 |   #---------------------------------------------------------------------------
 70 |   # Plot the model coefficients and standard errors
 71 |   output$coef.plots <- renderPlot({         
 72 |   
 73 |     # Get the current regression data
 74 |     reg.list <- fit.regressions()
 75 |     
 76 |     # Get more info about the regessions
 77 |     reg.full <- summary(reg.list$reg.full)
 78 |     reg.x.1 <- summary(reg.list$reg.x.1)
 79 |     reg.x.2 <- summary(reg.list$reg.x.2)
 80 |     
 81 |     # Set up the x positions
 82 |     x.pos <- c(1, 2, 4, 5)
 83 |     x.names <- rep(c("x.1", "x.2"), 2)
 84 |     #c.idx <- sample(1:5, 2, FALSE)
 85 |     #c.idx <- c.idx * 2
 86 |     #pal <- brewer.pal(10, "Paired")
 87 |     #colors <- c(pal[c.idx], pal[c.idx - 1])
 88 |     colors <- rep(c("#343434", "#767676"), 2)
 89 |     
 90 |     # Plot the point estimates of the coefficients
 91 |     full.coefs <- reg.full$coefficients[c("x.1", "x.2"), "Estimate"]
 92 |     x.1.coef <- reg.x.1$coefficients["x.1", "Estimate"]
 93 |     x.2.coef <- reg.x.2$coefficients["x.2", "Estimate"]
 94 |     coefs <- c(full.coefs, x.1.coef, x.2.coef)
 95 |     
 96 |     plot(x.pos, coefs, type="p", pch=16, xlab="", ylab="", cex=1.5,
 97 |          xlim=c(0, 6), ylim=c(-1.3, 3.3), bty="n", xaxt="n", col=colors)
 98 |     abline(h=0, lty="dashed")
 99 |     
100 |     text(1.5, 3.1, "Coefficients in full model")
101 |     text(4.5, 3.1, "Coefficients in separate models")
102 |     
103 |     # Plot the standard errors
104 |     full.ses <- reg.full$coefficients[c("x.1", "x.2"), "Std. Error"]
105 |     x.1.se <- reg.x.1$coefficients["x.1", "Std. Error"]
106 |     x.2.se <- reg.x.2$coefficients["x.2", "Std. Error"]
107 |     ses <- c(full.ses, x.1.se, x.2.se)
108 |     for (col in 1:4) {
109 |       x <- x.pos[col]
110 |       coef <- coefs[col]
111 |       ci <- 1.96 * ses[col]
112 |       lines(c(x, x), c(coef - ci, coef + ci), lwd=4, col=colors[col])
113 |       text(x, -1.15, x.names[col])
114 |     }
115 |     
116 |   })
117 |   
118 |   #---------------------------------------------------------------------------
119 |   # Show the lm() summary for the 
120 |   output$reg.summary <- renderPrint({
121 |     
122 |     return(summary(fit.regressions()$reg.full))
123 |     
124 |   })
125 |   
126 | })


--------------------------------------------------------------------------------
/shiny/collinearity/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | shinyUI(pageWithSidebar(
 4 |   
 5 |   headerPanel("Multicollinearity in multiple regression"),
 6 |   
 7 |   sidebarPanel(
 8 |     
 9 |     div(p("Explore the effects of multicollinearity on multiple regression results")),
10 |     
11 |     div(
12 |       br(),
13 |       sliderInput("pred.cov",
14 |                   "Predictor covariance",
15 |                   min=0, max=.95, step=.05, value=0, ticks=FALSE),
16 |       br(),
17 |       actionButton("resample", "New Sample")
18 |     )
19 |   ),
20 | 
21 |   mainPanel(
22 |     div(plotOutput("reg.plots", width=600, height=320)),
23 |     div(plotOutput("coef.plots", width=600, height=320)),
24 |     div(p(strong("Full model summary"))),
25 |     div(class="span7", verbatimTextOutput("reg.summary"))
26 |   )
27 |     
28 | ))
29 | 


--------------------------------------------------------------------------------
/shiny/logistic_regression/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/logistic_regression/screenshot.png


--------------------------------------------------------------------------------
/shiny/logistic_regression/server.R:
--------------------------------------------------------------------------------
  1 | library(shiny)
  2 | library(RColorBrewer)
  3 | 
  4 | shinyServer(function(input, output) {
  5 | 
  6 |   # --------------------------------------------------------------------------
  7 |   # Get a set of random data with a fixed true model
  8 |   draw.sample <- reactive({
  9 |     # This gets called whenever the app is reloaded
 10 |     
 11 |     # Hardcode the true relationship
 12 |     n.obs = 30
 13 |     true.a <- .75
 14 |     true.b <- 1.25
 15 |     
 16 |     x <- runif(n.obs, -5, 5)
 17 |     prob.x <- exp(true.a + true.b * x) / (1 + exp(true.a + true.b * x))
 18 |     y <- rbinom(n.obs, 1, prob.x)
 19 |     prob.data <- dbinom(y, 1, prob.x)
 20 |     
 21 |     model.summary <- summary(glm(y ~ x, family="binomial"))
 22 |     
 23 |     return(list(x=x, y=y, prob.x=prob.x, model.summary=model.summary))
 24 |     
 25 |   })
 26 |   
 27 |   # --------------------------------------------------------------------------
 28 |   # Calculate the current values of the model given the inputs
 29 |   regression <- reactive({
 30 |     
 31 |     # Get shorthand access to the attributes we care about
 32 |     data.vals <- draw.sample()
 33 |     x <- data.vals$x
 34 |     prob.x <- data.vals$prob.x
 35 |     y <- data.vals$y
 36 |     a <- input$intercept
 37 |     b <- input$slope
 38 |   
 39 |     # Calculate the probability of the data given the current model
 40 |     prob.hat <- exp(a + b * x) / (1 + exp(a + b * x))
 41 |     prob.data = dbinom(y, 1, prob.hat)
 42 |     log.like <- sum(log(prob.data))
 43 |     
 44 |     # Possibly inverse-logit transform the data
 45 |     if (input$logit){
 46 |       y <- log(prob.x / (1 - prob.x))
 47 |     }
 48 |     
 49 |     return(list(x=x, y=y, a=a, b=b,
 50 |                 prob.data=prob.data,
 51 |                 log.like=log.like))
 52 | 
 53 |   })
 54 | 
 55 |   #---------------------------------------------------------------------------
 56 |   # Plot the data along with the current logistic model
 57 |   output$reg.plot <- renderPlot({         
 58 |   
 59 |     # Get the current regression data
 60 |     reg.data <- regression()
 61 |     a <- reg.data$a
 62 |     b <- reg.data$b
 63 |     x <- reg.data$x
 64 |     y <- reg.data$y
 65 |     prob.data <- reg.data$prob.data
 66 |     
 67 |     # Plot the regression curve
 68 |     x.vals <- seq(-5, 5, .01)
 69 |     if (input$logit){
 70 |       y.vals <- a + b * x.vals
 71 |       y.lim <- c(-6, 6)
 72 |     }
 73 |     else{
 74 |       y.vals <- exp(a + b * x.vals) / (1 + exp(a + b * x.vals))
 75 |       y.lim <- c(0, 1)
 76 |     }
 77 |     plot(x.vals, y.vals, type="l", lwd=3, col="dimgray",
 78 |          bty="n", xlim=c(-5, 5), ylim=y.lim, xlab="x", ylab="y",
 79 |          main="Logistic model Y ~ X")
 80 |     
 81 |     # Plot the observations
 82 |     color.idx <- pmax(floor(prob.data * 10), 1)
 83 |     color.bin <- rev(brewer.pal(9, "RdPu"))[color.idx]
 84 |     points(x, y,  pch=21, cex=1.5, col="black", bg=color.bin)
 85 |   
 86 |     # Plot the current equation as a legend
 87 |     dv <- if(input$logit) "y" else "logit(y)"
 88 |     yloc <- if(input$logit) -3.6 else .2
 89 |     equation = sprintf("%s = %.3g + %.3g * x", dv, a, b)
 90 |     legend(1, yloc, equation, lty=1, lwd=2, bty="n")
 91 |     
 92 |   })
 93 |   
 94 |   #---------------------------------------------------------------------------
 95 |   # Plot the log likelihood of the data with the current model
 96 |   output$like.plot <- renderPlot({
 97 |     
 98 |     # Get the current regression data
 99 |     reg.data <- regression()
100 |     log.like <- reg.data$log.like
101 | 
102 |     # Plot the two points
103 |     plot(log.like, 1, cex=2, yaxt="n", bty="n", pch=16, col="#AE017E",
104 |          xlim=c(-50, 0), ylab="", xlab="", main="Log-likelihood of the data")
105 |     
106 |   })
107 |   
108 |   #---------------------------------------------------------------------------
109 |   # Print the glm() summary of the true model
110 |   output$summary <- renderPrint({
111 |     
112 |     if (input$summary){
113 |       return(draw.sample()$model.summary)
114 |     }
115 |     
116 |   })
117 |   
118 | })
119 | 


--------------------------------------------------------------------------------
/shiny/logistic_regression/shinyapps/mwaskom/logistic_regression.dcf:
--------------------------------------------------------------------------------
1 | name: logistic_regression
2 | account: mwaskom
3 | bundleId: 200327
4 | url: https://mwaskom.shinyapps.io/logistic_regression
5 | 


--------------------------------------------------------------------------------
/shiny/logistic_regression/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | fig.width <- 600
 4 | fig.height <- 450
 5 | 
 6 | shinyUI(pageWithSidebar(
 7 |   
 8 |   headerPanel("Simple Logistic Regression"),
 9 |   
10 |   sidebarPanel(
11 |     
12 |     div(p("Try to find values for the slope and intercept that maximize the likelihood of the data.")),
13 |     div(
14 |       
15 |       sliderInput("intercept",
16 |                   strong("Intercept"),
17 |                   min=-3, max=3, step=.25,
18 |                   value=sample(seq(-3, 3, .25), 1), ticks=FALSE),
19 |       br(),
20 |       sliderInput("slope", 
21 |                   strong("Slope"),
22 |                   min=-3, max=3, step=.25, 
23 |                   value=sample(seq(-2, 2, .25), 1), ticks=FALSE),
24 |       br(),
25 |       checkboxInput("logit",
26 |                     strong("Plot in logit domain"),
27 |                     value=FALSE),
28 |       br(),
29 |       checkboxInput("summary",
30 |                     strong("Show summary(glm(y ~ x))"),
31 |                     value=FALSE)
32 |       
33 |     )
34 |   ),
35 | 
36 |   mainPanel(
37 |     plotOutput("reg.plot", width=fig.width, height=fig.height),
38 |     plotOutput("like.plot", width=fig.width, height=fig.height / 3),
39 |     div(class="span7", conditionalPanel("input.summary == true",
40 |                                         p(strong("GLM Summary")),
41 |                                         verbatimTextOutput("summary")))
42 |   )
43 |     
44 | ))


--------------------------------------------------------------------------------
/shiny/mediation/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/mediation/screenshot.png


--------------------------------------------------------------------------------
/shiny/mediation/server.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | library(RColorBrewer)
 3 | 
 4 | shinyServer(function(input, output) {
 5 |   
 6 |   #-----------------------------------------------------------------------
 7 |   # Define the strength of each link in the path
 8 |   structure <- reactive({
 9 |     
10 |     b <- input$a.val * input$a.b
11 |     c <- b * input$b.c + input$a.val * input$a.c
12 |     
13 |     return(list(a=input$a.val, b=b, c=c))
14 |     
15 |   })
16 |   
17 |   #----------------------------------------------------------------------
18 |   # Plot the actual values of A, B, and C given the structure and A value
19 |   output$plots <- renderPlot({
20 |     
21 |     # Get the current model structure
22 |     s <- structure()
23 |     
24 |     # Initialize the plot
25 |     plot.new()
26 |     
27 |     # Plot the variable names with size as a function of "activation"
28 |     size <- function(x) { 2 + x * 1.5 }
29 |     text(.1, .155, "A", cex=size(s$a))
30 |     text(.5, .9, "B", cex=size(s$b))
31 |     text(.9, .155, "C", cex=size(s$c))
32 |     
33 |     # Plot the arrows with weight and color as a function of strength
34 |     pal <- brewer.pal(11, "RdYlGn")
35 |     weight <- function (x) { max(.1, 1 + abs(x) * 4) }
36 |     color <- function (y) { pal[max(1, round((y + 1) / 2 * 11))] }
37 |     arrows(.14, .20, .46, .83, col=color(input$a.b), lwd=weight(input$a.b))
38 |     arrows(.54, .83, .855, .26, col=color(input$b.c), lwd=weight(input$b.c))
39 |     arrows(.18, .15, .82, .15, col=color(input$a.c), lwd=weight(input$a.c))
40 |     
41 |   })
42 |   
43 |   #----------------------------------------------------------------------
44 |   # Source the noise separately with a reference to the resample button
45 |   sample.model <- reactive({
46 |     
47 |     # Dummy line
48 |     foo <- input$resample
49 |     direct <- rnorm(30)
50 |     full <- rnorm(30)
51 |     A <- rnorm(30)
52 |     return(list(A=A, direct.noise=direct, full.noise=full))
53 |     
54 |   })
55 |   
56 |   #----------------------------------------------------------------------
57 |   # Simulate and fit the two component models
58 |   make.model <- reactive({
59 |     
60 |     sample <- sample.model()
61 |     
62 |     A <- sample$A
63 |     B <- A * input$a.b + sample$direct.noise
64 |     C <- B * input$b.c + A * input$a.c + sample$full.noise
65 |     
66 |     return(list(A=A, B=B, C=C))
67 |     
68 |   })
69 |   
70 |   #----------------------------------------------------------------------
71 |   # Print the direct model
72 |   output$direct.model <- renderPrint({
73 |     
74 |     m <- make.model()
75 |     A <- m$A
76 |     C <- m$C
77 |     return(summary(lm(C ~ A)))
78 |     
79 |   })
80 |     
81 |   #----------------------------------------------------------------------
82 |   # Print the full model
83 |   output$full.model <- renderPrint({
84 |     
85 |     m <- make.model()
86 |     A <- m$A
87 |     B <- m$B
88 |     C <- m$C
89 |     return(summary(lm(C ~ A + B)))
90 |     
91 |   })
92 |     
93 | })


--------------------------------------------------------------------------------
/shiny/mediation/shinyapps/mwaskom/mediation.dcf:
--------------------------------------------------------------------------------
1 | name: mediation
2 | account: mwaskom
3 | bundleId: 200333
4 | url: https://mwaskom.shinyapps.io/mediation
5 | 


--------------------------------------------------------------------------------
/shiny/mediation/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | shinyUI(pageWithSidebar(
 4 |   
 5 |   headerPanel("Simple mediation structure"),
 6 |   
 7 |   sidebarPanel(
 8 |     
 9 |     div(
10 |       p(strong("Choose values to characterize the mediation structure")),
11 |       sliderInput("a.b",
12 |                   "Influence of A on B",
13 |                   min=-1, max=1, step=.05, value=0, ticks=FALSE),
14 |       sliderInput("b.c",
15 |                   "Influence of B on C",
16 |                   min=-1, max=1, step=.05, value=0, ticks=FALSE),
17 |       sliderInput("a.c",
18 |                   "Independent influence of A on C",
19 |                   min=-1, max=1, step=.05, value=0, ticks=FALSE),
20 |       br(),
21 |       br(),
22 |       p(strong("Manipulate A and observe the effects on B and C")),
23 |       sliderInput("a.val",
24 |                   "Strength of A",
25 |                   min=-1, max=1, step=.05, value=0, ticks=FALSE),
26 |       br(),
27 |       br(),
28 |       p(strong("Show summaries from simulated data with this structure")),
29 |       checkboxInput("models",
30 |                     "Simulate the mediation model",
31 |                     value=FALSE),
32 |       br(),
33 |       actionButton("resample", "New Sample")
34 |     )
35 |     
36 |     
37 |   ),
38 |   
39 |   mainPanel(
40 |     div(class="span8", plotOutput("plots", width=600, height=400),
41 |                        conditionalPanel("input.models == true",
42 |                                         p(strong("Direct model")),
43 |                                         verbatimTextOutput("direct.model")),
44 |                        conditionalPanel("input.models == true",
45 |                                         p(strong("Full model")),
46 |                                         verbatimTextOutput("full.model")))
47 |  
48 |   )
49 | ))


--------------------------------------------------------------------------------
/shiny/multi_regression/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/multi_regression/screenshot.png


--------------------------------------------------------------------------------
/shiny/multi_regression/server.R:
--------------------------------------------------------------------------------
  1 | library(shiny)
  2 | library(RColorBrewer)
  3 | 
  4 | shinyServer(function(input, output) {
  5 |   
  6 |   # --------------------------------------------------------------------------
  7 |   # Make x values and some normally distributed random noise
  8 |   random.sample <- reactive({
  9 |     
 10 |     # Dummy line to trigger off button-press
 11 |     foo <- input$resample
 12 |     n.obs <- 60
 13 |     x <- runif(n.obs, 0, 2)
 14 |     noise <- rnorm(n.obs) 
 15 |     color <- sample(brewer.pal(9, "Set1")[-6], 1)
 16 |     return(list(x=x, noise=noise, color=color))
 17 |       
 18 |   })
 19 |     
 20 |   # --------------------------------------------------------------------------
 21 |   # Set up the dataset based on the inputs 
 22 |   make.regression <- reactive({
 23 |     
 24 |     sample <- random.sample()
 25 |     
 26 |     # Set up the true model
 27 |     n.obs <- 60
 28 |     x.0 <- rep(1, n.obs)
 29 |     x.1 <- sample$x
 30 |     x.2 <- rep(c(0, 1), n.obs / 2)
 31 |     x.3 <- x.1 * x.2
 32 |     X <- matrix(c(x.0, x.1, x.2, x.3), ncol=4)
 33 |     b <- matrix(c(input$a, input$b, input$c, input$d))
 34 |     y <- X %*% b + sample$noise * input$e
 35 |     colnames(X) <- c("intercept", "x", "group", "interaction")
 36 |     df <- as.data.frame(X)
 37 |     df$y <- y
 38 |     
 39 |     return(list(df=df, X=X, y=y))    
 40 |     
 41 |   })  
 42 |   
 43 |   # --------------------------------------------------------------------------
 44 |   # Fit the specified regression model
 45 |   fit.regression <- reactive({
 46 |     
 47 |     # Get the current model structure
 48 |     data <- make.regression()
 49 |     df <- data$df
 50 |     
 51 |     # Conditionally fit the model
 52 |     if (input$model == "Simple regression") {
 53 |       fit.res <- lm(y ~ x, df)
 54 |     } else if (input$model == "Additive model") {
 55 |       fit.res <- lm(y ~ x + group, df)
 56 |     } else if (input$model == "Interactive model") {
 57 |       fit.res <- lm(y ~ x * group, df)
 58 |     } else {
 59 |       fit.res <- NULL
 60 |     }
 61 |     
 62 |     # Get the model summary
 63 |     if (is.null(fit.res)) {
 64 |       fit.summary <- NULL
 65 |     } else {
 66 |       fit.summary <- summary(fit.res)
 67 |     }
 68 |   
 69 |     return(list(fit.res=fit.res, fit.summary=fit.summary))
 70 |     
 71 |   })
 72 |     
 73 |   #---------------------------------------------------------------------------
 74 |   # Plot a scatter of the data with regression lines corresponding to the model
 75 |   output$reg.plot <- renderPlot({         
 76 |   
 77 |     # Get the current regression data
 78 |     data <- make.regression()
 79 |     x <- data$df$x
 80 |     y <- data$df$y
 81 |     g <- data$df$group
 82 |     coefs <- fit.regression()$fit.res$coefficients
 83 |     
 84 |     # Plot the true model
 85 |     other.color <- random.sample()$color
 86 |     plot(x[g == 0], y[g == 0], xlim=c(0, 2), ylim=c(-1, 8),
 87 |          pch=16, cex=1.2, col="#333333", bty="n", xlab="x", ylab="y")
 88 |     points(x[g == 1], y[g == 1], pch=16, cex=1.2, col=other.color)
 89 |     
 90 |     if (input$model == "Simple regression") {
 91 |       abline(coefs["(Intercept)"], coefs["x"], col="#333333", lwd=3)
 92 |     } else if (input$model == "Additive model") {
 93 |       abline(coefs["(Intercept)"], coefs["x"], col="#333333", lwd=3)
 94 |       abline(coefs["(Intercept)"] + coefs["group"], coefs["x"], col=other.color, lwd=3)
 95 |     } else if (input$model == "Interactive model") {
 96 |       abline(coefs["(Intercept)"], coefs["x"], col="#333333", lwd=3)
 97 |       abline(coefs["(Intercept)"] + coefs["group"],
 98 |              coefs["x"] + coefs["x:group"], col=other.color, lwd=3)
 99 |     }
100 |     
101 |   })
102 |   
103 |   #---------------------------------------------------------------------------
104 |   # Show the lm() summary for the 
105 |   output$reg.summary <- renderPrint({
106 |     
107 |     summary <- fit.regression()$fit.summary
108 |     if (!is.null(summary)) {
109 |       return(fit.regression()$fit.summary)
110 |     }
111 |     
112 |   })
113 |   
114 | })


--------------------------------------------------------------------------------
/shiny/multi_regression/shinyapps/mwaskom/multi_regression.dcf:
--------------------------------------------------------------------------------
1 | name: multi_regression
2 | account: mwaskom
3 | bundleId: 200326
4 | url: https://mwaskom.shinyapps.io/multi_regression
5 | 


--------------------------------------------------------------------------------
/shiny/multi_regression/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | fig.width <- 600
 4 | fig.height <- 450
 5 | 
 6 | shinyUI(pageWithSidebar(
 7 |   
 8 |   headerPanel("Modeling choices in multiple regression"),
 9 |   
10 |   sidebarPanel(
11 |     
12 |     div(p("Relate modeling choices to plots and summaries of the models")),
13 |     
14 |     div(
15 |       
16 |       selectInput("model",
17 |                   strong("Linear model to evaluate"),
18 |                   choices=c("Simple regression",
19 |                             "Additive model",
20 |                             "Interactive model")),
21 |       br(),
22 |       br(),
23 |       actionButton("resample", "New Sample"),
24 |       br(),
25 |       br(),
26 |       br(),
27 |       p(strong("Generating parameters")),
28 |       sliderInput("a",
29 |                   "True intercept",
30 |                   min=0, max=2, step=.2, value=1, ticks=FALSE),
31 |       sliderInput("b",
32 |                   "True main effect of x",
33 |                   min=0, max=2, step=.2, value=1, ticks=FALSE),
34 |       sliderInput("c",
35 |                   "True main effect of group",
36 |                   min=0, max=2, step=.2, value=1, ticks=FALSE),
37 |       sliderInput("d",
38 |                   "True interaction between x and group",
39 |                   min=0, max=2, step=.2, value=1, ticks=FALSE),
40 |       sliderInput("e",
41 |                   "Error standard deviation",
42 |                   min=0, max=2, step=.2, value=1, ticks=FALSE)
43 |     )
44 |   ),
45 | 
46 |   mainPanel(
47 |     div(plotOutput("reg.plot", width=fig.width, height=fig.height)),
48 |     div(class="span7", verbatimTextOutput("reg.summary"))
49 |   )
50 |     
51 | ))
52 | 


--------------------------------------------------------------------------------
/shiny/regression_bootstrap/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/regression_bootstrap/screenshot.png


--------------------------------------------------------------------------------
/shiny/regression_bootstrap/server.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | shinyServer(function(input, output) {
 4 | 
 5 |   # --------------------------------------------------------------------------
 6 |   # Get a set of random data with a fixed true model
 7 |   make.regression <- reactive({
 8 |     # This gets called whenever the app is reloaded
 9 |     
10 |     # Set up the true model
11 |     n.obs = 30
12 |     x <- rnorm(n.obs, 0, 2)
13 |     x.out <- abs(x) > 3
14 |     while (any(x.out)){
15 |       x[x.out] <- rnorm(sum(x.out), 0, 2)
16 |       x.out <- abs(x) > 3
17 |     }
18 |     
19 |     y <- 2 + .75 * x + rnorm(n.obs, 0, 1)
20 |     model.fit <- lm(y ~ x)
21 |     fit.coef <- model.fit$coefficients
22 |     
23 |     # Bootstrap the regression
24 |     boot.coef <- matrix(NA, nrow=100, ncol=2)
25 |     for (i in 1:100){
26 |       boot.idx <- sample(seq(1, n.obs), replace=TRUE)
27 |       x.boot <- x[boot.idx]
28 |       y.boot <- y[boot.idx]
29 |       fit.boot <- lm(y.boot ~ x.boot)
30 |       boot.coef[i,] <- fit.boot$coefficients
31 |     }
32 |       
33 |     return(list(x=x, y=y, model.fit=model.fit,
34 |                 fit.coef=fit.coef, boot.coef=boot.coef))
35 |     
36 |   })  
37 |   
38 |     
39 |   #---------------------------------------------------------------------------
40 |   # Plot a scatter of the data and the current model with residuals
41 |   output$reg.plot <- renderPlot({         
42 |   
43 |     # Get the current regression data
44 |     reg.data <- make.regression()
45 |     x <- reg.data$x
46 |     y <- reg.data$y
47 |     fit.coef <- reg.data$fit.coef
48 |     
49 |     # Plot the true model
50 |     plot(x, y, xlim=c(-4, 4), ylim=c(-2, 5), pch=16, cex=1.2, col="#333333", bty="n")
51 |     abline(coef=fit.coef, lwd=3.5)
52 |     
53 |     # Find the standard error of the regression
54 |     model.fit <- reg.data$model.fit
55 |     x.vals <- seq(-4.5, 4.5, .01)
56 |     y.vals <- fit.coef[1] + fit.coef[2] * x.vals
57 |     se <- predict(model.fit, data.frame(x=x.vals), se.fit=TRUE)$se.fit
58 |     
59 |     # Find the parameters for the negative density
60 |     dist.pos <- input$dist.pos
61 |     se.loc <- predict(model.fit, data.frame(x=dist.pos), se.fit=TRUE)$se.fit
62 |     y.hat.loc <- fit.coef[1] + fit.coef[2] * dist.pos
63 |     y.loc <- seq(qnorm(.0001, y.hat.loc, se.loc),
64 |                  qnorm(.9999, y.hat.loc, se.loc), .01)
65 |     d.se <- dnorm(y.loc, y.hat.loc, se.loc)
66 | 
67 |     # Plot the bootstrap estimates
68 |     boot.coef <- reg.data$boot.coef
69 |     if (input$n.boot > 0){
70 |       for (n in 1:input$n.boot){
71 |         abline(coef=boot.coef[n,], col=rgb(1, .5, 0, .33))
72 |       }
73 |     }
74 |     
75 |     # Plot the standard error of the regression
76 |     lines(x.vals, y.vals + se, col="steelblue", lwd=2)
77 |     lines(x.vals, y.vals - se, col="steelblue", lwd=2)
78 |     
79 |     # Plot the bootstrap distribution curve
80 |     if (input$plot.boot.dist){
81 |       abline(v=dist.pos, lty=3)
82 |       lines(dist.pos + d.se, y.loc, col="#333333", lwd=2)
83 |     }
84 |     
85 |   })
86 |   
87 | })


--------------------------------------------------------------------------------
/shiny/regression_bootstrap/shinyapps/mwaskom/regression_bootstrap.dcf:
--------------------------------------------------------------------------------
1 | name: regression_bootstrap
2 | account: mwaskom
3 | bundleId: 200335
4 | url: https://mwaskom.shinyapps.io/regression_bootstrap
5 | 


--------------------------------------------------------------------------------
/shiny/regression_bootstrap/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | fig.width <- 600
 4 | fig.height <- 450
 5 | 
 6 | shinyUI(pageWithSidebar(
 7 |   
 8 |   headerPanel("Uncertainty in Linear Regression"),
 9 |   
10 |   sidebarPanel(
11 |     
12 |     div(p("Relate the width of the regression error bars to the distribution of bootstrapped regression lines")),
13 |     
14 |     div(
15 |       
16 |       sliderInput("n.boot",
17 |                   strong("Number of bootstrap lines"),
18 |                   min=0, max=100, step=1, value=0, ticks=FALSE),
19 |       br(),
20 |       checkboxInput("plot.boot.dist",
21 |                     strong("Plot the distribution of yhat"),
22 |                     value=FALSE),
23 |       br(),
24 |       sliderInput("dist.pos",
25 |                   strong("Position of the density"),
26 |                   min=-4, max=4, step=.25, value=0, ticks=FALSE)
27 |       
28 |     )
29 |   ),
30 | 
31 |   mainPanel(
32 |     div(plotOutput("reg.plot", width=fig.width, height=fig.height))
33 |   )
34 |     
35 | ))
36 | 


--------------------------------------------------------------------------------
/shiny/sampling_and_stderr/.Rhistory:
--------------------------------------------------------------------------------
  1 | ls
  2 | ls()
  3 | ?ls
  4 | c?
  5 | exit
  6 | ?c
  7 | setwd("~/Dropbox/Class/252/")
  8 | dir
  9 | dir()
 10 | setwd("Week1/")
 11 | dir
 12 | dir()
 13 | dir("data")
 14 | deaths = read.csv("data/earlydeaths.csv")
 15 | deaths
 16 | deaths[:1-]
 17 | deaths[:1-]
 18 | deaths[:10]
 19 | deaths[1:10]
 20 | deaths.time
 21 | deaths[time]
 22 | deaths["time"]
 23 | deaths["time"][:10]
 24 | deaths["time"][1:10]
 25 | ?read.csv
 26 | ?load.csv
 27 | help(read.csv)
 28 | ?var
 29 | ?read.csv
 30 | ?table
 31 | d0 = c(26,31,45)
 32 | d0
 33 | rs1 = chisq.test(c(26,31,45), p=c(1,1,1), rescale.p=T, simulate.p.value=F)
 34 | print(rs1)
 35 | md = c(11,15,15,16,25,20)
 36 | md
 37 | mean(md
 38 | )
 39 | var(md)
 40 | ?var
 41 | sqrt(var(md))
 42 | sqrt(var(md))/len(md)
 43 | sqrt(var(md))/length(md)
 44 | ?stderr
 45 | setwd()
 46 | dir()
 47 | setwd("Class/252/Week1/")
 48 | d0 = read.csv("data/earlydeaths.csv")
 49 | ?tabe
 50 | ?table
 51 | View(`d0`)
 52 | View(`d0`)
 53 | deaths = table(d0$time, d0$cause)
 54 | deaths
 55 | deaths = t(deaths)
 56 | deaths
 57 | chisq.test(d0
 58 | )
 59 | chisq.test(d0[1:3,1], d0[1:3,2])
 60 | d0[1]
 61 | deaths[1]
 62 | deaths[1:3]
 63 | chisq.test(d0[1:3], d0[4:6])
 64 | chisq.test(deaths[1:3], deaths[4:6])
 65 | ?chisq.test
 66 | chisq.test(deaths[1:3], p=deaths[4:6])
 67 | chisq.test(deaths[1:3], p=deaths[4:6], rescale.p=T)
 68 | deaths
 69 | chisq.test(deaths)
 70 | jobs = rbind(c(18,14,8), c(12,16,32)
 71 | )
 72 | jobs
 73 | jobs = table(jobs)
 74 | jobs
 75 | jobs = table(c(18,14,8), c(12,16,32)
 76 | )
 77 | jobs
 78 | ?table
 79 | jobs = table(na,
 80 | d0$cause
 81 | jobs = table(c(18,14,8), c(12,16,32))
 82 | jobs
 83 | jobs = matrix(c(18,14, 8, 12,16,32), byrow=T, ncol=3)
 84 | jobs
 85 | chisq.test(jobs
 86 | )
 87 | d1 = read.csv('fieldsimul1.csv')
 88 | d1 = read.csv('data/fieldsimul1.csv')
 89 | d1
 90 | ?density
 91 | x0 = c(1:3, 5 7 9)
 92 | x0 = c(1:3, 5, 7, 9)
 93 | p0 = c(.2, .4, .24, .1, .05, .01)
 94 | p0
 95 | mu0 = sum(x0*p0)/sum(p0)
 96 | mu0
 97 | o1 = sample(d0, 1000, replace=T, prob=p0)
 98 | o1 = sample(d0, 1000, replace=T, prob=p0)
 99 | x0
100 | p0
101 | length(p0)
102 | length(x0)
103 | o1 = sample(d0, 1000, replace=T, prob=p0)
104 | ?sample
105 | o1 = sample(x0, 1000, replace=T, prob=p0)
106 | o1
107 | hist(o1)
108 | density(o1, adjust=3)
109 | sm1 = density(o1, adjust=3)
110 | sm1
111 | plot(sm1)
112 | lines(sm1)
113 | lines(sm1)
114 | lines(sm1)
115 | plot(sm1)
116 | lines(sm1)
117 | hist(o1)
118 | ?X11
119 | mu0
120 | ?test.t
121 | t.test
122 | ?t.test
123 | t.test(o1, mu=mu1)
124 | mu0
125 | t.test(o1, mu=mu0)
126 | D0
127 | d0
128 | o2 = sample(d0, 7, replace=T, prob=p0)
129 | o2 = sample(x0, 7, replace=T, prob=p0)
130 | 02
131 | o2
132 | hist(o2)
133 | mean(o2)
134 | mu0
135 | tsc = t.test(o2, mu=mu0, paired=F)
136 | tsc
137 | tsc/2
138 | tsc.t
139 | ls(tsc)
140 | t.statistic
141 | t:statistic
142 | t$statistic
143 | tsc$statistic
144 | t_scores = vector(mode=float, length=1000)
145 | ?vector
146 | t_scores = vector(mode="float"", length=1000)
147 | t_scores = vector(mode="float, length=1000)
148 | t_scores = vector(mode="float"", length=1000)
149 | t_scores = vector(mode="float", length=1000)
150 | t_scores = vector(length=1000)
151 | t_scores
152 | t_scores = vector("list", 1000)
153 | t_scores
154 | t_scores = vector("numeric"", 1000)
155 | t_scores = vector("numeric", 1000)
156 | t_scores
157 | ?for
158 | for (i in 1:1000)
159 | o = sample(x0, 7, replace=T, prob=p0)
160 | for (i in 1:1000)
161 | o = sample(x0, 7, replace=T, prob=p0);
162 | for (i in 1:1000)
163 | o = sample(x0, 7, replace=T, prob=p0); t_scores[i] = t.test(o, mu=mu0)$statistic
164 | t_scores
165 | i
166 | for (i in 1:1000); print i
167 | for (i in 1:1000) print i
168 | for (i in 1:1000) print(i)
169 | o = sample(x0, 7, replace=T, prob=p0); t_scores[i] <- t.test(o, mu=mu0)$statistic
170 | o
171 | t.test(o, mu=mu0)
172 | t.test(o, mu=mu0)$statistic
173 | t_scores[-5] = .Last.value
174 | t_scores
175 | t_scores[-5]
176 | t_scores
177 | ?t_scores
178 | t_scoes
179 | t_scores
180 | lenth(t_scores)
181 | length(t_scores)
182 | t_scores[1]
183 | t_scores[-5]
184 | t_scores[1000] = .Last.value
185 | t_scores[1000] = .5
186 | t_scores[-5]
187 | t_scores = vector("numeric", 1000)
188 | for (1 in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0))$statistic
189 | for (i in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0))$statistic
190 | for (i in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0))$statistic
191 | t.test(sample(x0, 7, replace=T, prob=p0))$statistic
192 | vector[i] = t.test(sample(x0, 7, replace = T, prob = p0))$statistic
193 | t.test(sample(x0, 7, replace = T, prob = p0))$statisti
194 | t.test(sample(x0, 7, replace = T, prob = p0))$statistic
195 | res = t.test(sample(x0, 7, replace = T, prob = p0))
196 | res
197 | res$statistic
198 | res$statistic + 5
199 | foo = c(0,0,0)
200 | foo
201 | foo[1] = res$statistic
202 | foo
203 | res = t.test(sample(x0, 7, replace = T, prob = p0))$statistic
204 | res
205 | foo[2] = t.test(sample(x0, 7, replace = T, prob = p0))$statistic
206 | foo
207 | res
208 | for (i in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
209 | t.test(sample(x0, 7, replace = T, prob = p0), mu = mu0)$statistic
210 | fo;
211 | for (i in 1:1000) {res = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0); t_scores[i] = res$statistic}
212 | x0
213 | for (i in 1:1000) {res = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0); t_scores[i] = res$statistic}
214 | t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)
215 | t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)
216 | for (i in 1:1000){}
217 | for (i in 1:1000){
218 | res = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)
219 | t_scores[i] = res$statistic
220 | }
221 | res
222 | for (i in 1:1000){
223 | draw = sample(x0, 7, replace=T, prob=p0)
224 | res = t.test(draw, mu=mu0)
225 | t_scores[i] = res$statistic
226 | }
227 | for (i in 1:1000){
228 | + draw = sample(x0, 7, replace=T, prob=p0)
229 | + res = t.test(draw, mu=mu0)
230 | + t_scores[i] = res$statistic
231 | + }
232 | for (i in 1:1000){
233 | + draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
234 | + res = t.test(draw, mu=mu0)
235 | + t_scores[i] = res$statistic
236 | + }
237 | for (i in 1:1000){
238 | + draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
239 | + res = t.test(draw, mu=mu0)
240 | + t_scores[i] = res$statistic
241 | + }
242 | for (i in 1:1000){
243 | + draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
244 | + res = t.test(draw, mu=mu0)
245 | + t_scores[i] = res$statistic }
246 | +
247 | for (i in 1:1000){
248 | draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
249 | res = t.test(draw, mu=mu0)
250 | }
251 | x0
252 | p0
253 | mu0
254 | t.test?
255 | ?t.test
256 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
257 | myvec = 0
258 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
259 | myvec
260 | plot(myvec)
261 | hist(myvec)
262 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
263 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
264 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
265 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
266 | v = vector("numeric", 1000)
267 | v
268 | for (i in 1:1000) v[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
269 | for (i in 1:1000) v[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
270 | for (i in 1:1000) v[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
271 | for (i in 1:1000) v[i] = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
272 | sample(x0, 7, replace=T, prob=p0)
273 | sample(x0, 7, replace=T, prob=p0)
274 | sample(x0, 7, replace=T, prob=p0)
275 | sample(x0, 7, replace=T, prob=p0)
276 | sample(x0, 7, replace=T, prob=p0)
277 | dir()
278 | open("r_scripts/smonte1.r")
279 | ?quantile
280 | ?sprintf
281 | sprintf("hello%s", "world")
282 | mal_deaths = c(11, 15, 15, 16, 25, 20)
283 | mal_mean = mean(mal_deaths)
284 | mal_var = var(mal_deaths)
285 | mal_std = sqrt(var)
286 | print("Problem 1.b.i:")
287 | print(sprintf("Maltreatment death statistics:\n
288 | \tMean: %.3f\n
289 | \tVariance: %.3f\n
290 | \tS.D.: %.3f", mal_mean, mal_var, mal_std)
291 | )
292 | ?var
293 | var(c(1,2,3,4,5,3,7,2,3))
294 | sqrt(var(c(1,2,3,4,5,3,7,2,3)))
295 | var(c(1,2,3,4,5,3,7,2,3))
296 | getwd()
297 | setwd("Class/252/Week1/")
298 | source("hw.r")
299 | setwd("~/Dropbox/TA/252/Week2/")
300 | setwd("~/Dropbox/TA/252/Week2/shiny")
301 | install.packages("knitr")
302 | install.packages("shiny)
303 | "
304 | )
305 | install.packages("shiny")
306 | install.packages("ggplot2")
307 | shiny::runApp("~/Dropbox/TA//252/Week2/shiny/")
308 | shiny::runApp("~/Dropbox/TA//252/Week2/shiny/")
309 | 1:10:2
310 | 1:2:10
311 | 1:10::2
312 | 1:10
313 | ?dnorm
314 | shiny::runApp("~/Dropbox/TA//252/Week2/shiny/")
315 | shiny::runApp("~/Dropbox/TA//252/Week2/shiny/")
316 | shiny::runApp("~/Dropbox/TA//252/Week2/shiny/")
317 | ?runApp
318 | runApp()
319 | runApp()
320 | runApp()
321 | 


--------------------------------------------------------------------------------
/shiny/sampling_and_stderr/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/sampling_and_stderr/screenshot.png


--------------------------------------------------------------------------------
/shiny/sampling_and_stderr/server.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | shinyServer(function(input, output) {
 4 |   
 5 |   output$population <- renderPlot({
 6 |     x <- seq(-10, 10, length.out=1000)
 7 |     pdf <- dnorm(x, 0, input$pop.sd)
 8 |     plot(x, pdf, type="l", col="navy", lwd=3, main="Population", frame=FALSE)
 9 |   })
10 |   
11 |   output$sample <- renderPlot({
12 |     x <- rnorm(input$n.sample, 0, input$pop.sd)
13 |     x <- x[x > -10 & x < 10]
14 |     bins <- hist(x, breaks=seq(-10, 10, 1), col="#BBBBBB", xlim=c(-10, 10),
15 |                  main="One Sample from the Population")
16 |     annot.height <- max(bins$count) / 2
17 |     sd.x = sd(x)
18 |     sem.x = sd(x) / sqrt(length(x))
19 |     lines(c(-sd.x, sd.x), rep(annot.height, 2), lwd=6, col="darkred")
20 |     lines(c(-sem.x, sem.x), rep(annot.height, 2), lwd=5, col="pink")
21 |     
22 |     legend(-10, max(bins$count), c("+/- sd", "+/- sem"),
23 |            col=c("darkred", "pink"), lty=c(1, 1), lwd=c(4, 4),
24 |            box.lwd = 0, box.col = "white",bg = "white")
25 |     
26 |     rug(x, col="navy", lwd=2, ticksize=.05)
27 |     
28 |   })
29 |   
30 |   output$standard.error <- renderPlot({
31 |     sem <- input$pop.sd / sqrt(input$n.sample)
32 |     x <- rnorm(10000, 0, sem)
33 |     hist(x, col="#BBBBBB", xlim=c(-10, 10), freq=FALSE,
34 |          main="Distribution of Means from Many Samples")
35 |     x.pos <- seq(-10, 10, length.out=1000)
36 |     pdf <- dnorm(x.pos, 0, sem)
37 |     lines(x.pos, pdf, col="navy", lwd=2)
38 |     annot.height <- max(pdf) / 2
39 |     lines(c(-sem, sem), rep(annot.height, 2), lwd=4, col="pink")
40 |     
41 |     legend(-10, max(pdf), "+/- sd",
42 |            col="pink", lty=1, lwd=4,
43 |            box.lwd = 0, box.col = "white",bg = "white")
44 |     
45 |   })
46 | })


--------------------------------------------------------------------------------
/shiny/sampling_and_stderr/shinyapps/mwaskom/sampling_and_stderr.dcf:
--------------------------------------------------------------------------------
1 | name: sampling_and_stderr
2 | account: mwaskom
3 | bundleId: 200323
4 | url: https://mwaskom.shinyapps.io/sampling_and_stderr
5 | 


--------------------------------------------------------------------------------
/shiny/sampling_and_stderr/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | fig.width <- 600
 4 | fig.height <- 250
 5 | shinyUI(pageWithSidebar(
 6 |   headerPanel("Sampling and Standard Error"),
 7 |   
 8 |   sidebarPanel(
 9 |     sliderInput("pop.sd", 
10 |                 strong("Population standard deviation"), 
11 |                 min=0, max=4, value=2, step=.2, ticks=FALSE),
12 |     sliderInput("n.sample",
13 |                 strong("Number of observations in a sample"),
14 |                 min=1, max=100, value=20)
15 |   ),
16 | 
17 |   
18 |   mainPanel(
19 |     div(plotOutput("population", width=fig.width, height=fig.height)),
20 |     div(plotOutput("sample", width=fig.width, height=fig.height)),
21 |     div(plotOutput("standard.error", width=fig.width, height=fig.height))
22 |   )
23 | ))


--------------------------------------------------------------------------------
/shiny/simple_regression/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/simple_regression/screenshot.png


--------------------------------------------------------------------------------
/shiny/simple_regression/server.R:
--------------------------------------------------------------------------------
  1 | library(shiny)
  2 | 
  3 | shinyServer(function(input, output) {
  4 | 
  5 |   # --------------------------------------------------------------------------
  6 |   # Get a set of random data with a fixed true model
  7 |   draw.sample <- reactive({
  8 |     # This gets called whenever the app is reloaded
  9 |     
 10 |     # Hardcode the true relationship
 11 |     n.obs = 50
 12 |     x <- rnorm(n.obs, 0, 2)
 13 |     y <- 2 + x + rnorm(n.obs, 0, 1)
 14 |     
 15 |     model.summary <- summary(lm(y ~ x))
 16 |     
 17 |     return(list(x=x, y=y, model.summary=model.summary))
 18 |     
 19 |   })  
 20 |   
 21 |   # --------------------------------------------------------------------------
 22 |   # Calculate the current values of the model given the inputs
 23 |   regression <- reactive({
 24 |     
 25 |     # Get shorthand access to the attributes we care about
 26 |     data.vals <- draw.sample()
 27 |     x <- data.vals$x
 28 |     y <- data.vals$y
 29 |     a <- input$intercept
 30 |     b <- input$slope
 31 |     
 32 |     # Give a visual cue when we have the right regression
 33 |     if (a == 2 & b == 1) resid.color <- "seagreen" else resid.color <- "firebrick"
 34 |     
 35 |     # Calculate the current residuals
 36 |     yhat <- input$intercept + x * input$slope
 37 |     resid <- y - yhat
 38 |     
 39 |     # Calculate the current and optimal residual sum squares
 40 |     ss.res <- sum(resid ** 2)
 41 |     resid.best <- y - (2 + x)
 42 |     ss.res.best <- sum(resid.best ** 2)
 43 |     
 44 |     # Compute R^2
 45 |     r2 <- 1 - (ss.res / sum((y - mean(y)) ** 2))
 46 |     
 47 |     return(list(x=x, y=y, yhat=yhat, a=a, b=b, r2=r2,
 48 |                 resid=resid, ss.res=ss.res, ss.res.best=ss.res.best,
 49 |                 resid.color=resid.color))
 50 |     
 51 |   })
 52 |     
 53 |   #---------------------------------------------------------------------------
 54 |   # Plot a scatter of the data and the current model with residuals
 55 |   output$reg.plot <- renderPlot({         
 56 |   
 57 |     # Get the current regression data
 58 |     reg.data <- regression()
 59 |     a <- reg.data$a
 60 |     b <- reg.data$b
 61 |     x <- reg.data$x
 62 |     y <- reg.data$y
 63 |     r2 <- reg.data$r2
 64 |     resid <- reg.data$resid
 65 |     
 66 |     # Mask data outside the viewport
 67 |     mask <- x > -4.5 & x < 4.5 & y > -3 & y < 8
 68 |     x <- x[mask]
 69 |     y <- y[mask]
 70 |     resid <- resid[mask]
 71 |     
 72 |     
 73 |     # Plot the regression line
 74 |     plot(c(-4.5, 4.5), c(a + b * -4.5,  a + b * 4.5), type="l", lwd=2,
 75 |          bty="n", xlim=c(-5, 5), ylim=c(-3, 8), xlab="x", ylab="y",
 76 |          main="Linear Model Y ~ X")
 77 |     
 78 |     # Plot each residual distance
 79 |     for (i in 1:length(resid)){
 80 |       lines(c(x[i], x[i]), c(y[i], y[i] - resid[i]),
 81 |             col=reg.data$resid.color, lwd=1.5)
 82 |     }
 83 |     
 84 |     # Plot the observations
 85 |     points(x, y,  pch=16, col="#444444")
 86 |     
 87 |     # Plot the current equation as a legend
 88 |     legend(-5, 8, sprintf("y = %.3g + %.3g * x", a, b), lty=1, lwd=2, bty="n")
 89 |     
 90 |   })
 91 |   
 92 |   #---------------------------------------------------------------------------
 93 |   # Plot the current sum squares along with the minumum possible
 94 |   output$ss.plot <- renderPlot({
 95 |     
 96 |     # Get the current regression data
 97 |     reg.data <- regression()
 98 |     ss.res <- reg.data$ss.res
 99 |     ss.res.best <- reg.data$ss.res.best
100 |     resid.color <- reg.data$resid.color
101 |     
102 |     # Plot the two points
103 |     plot(ss.res, 1, col=resid.color, cex=2,
104 |          yaxt="n", bty="n", xlim=c(0, 1000),
105 |          ylab="", xlab="", main="Sum of Squares of Residuals")
106 |     points(ss.res.best, 1, pch=4, cex=2)
107 |     
108 |   })
109 |   
110 |   #----------------------------------------------------------------------------
111 |   # Plot the current distribution of residuals and the theoretical distribution
112 |   output$resid.plot <- renderPlot({
113 |     
114 |     # Get the current regression data
115 |     reg.data <- regression()
116 |     resid <- reg.data$resid
117 |     resid <- resid[resid > -5 & resid < 5]
118 |     
119 |     # Plot a histogram of the residuals
120 |     hist(resid, seq(-5, 5, .5), prob=TRUE, col="#bbbbbb",
121 |          xlim=c(-5, 5), ylim=c(0, dnorm(0) * 1.5),
122 |          yaxt="n", bty="n", ylab="", xlab="", main="Distribution of Residuals")
123 |     rug(resid, lwd=2)
124 |     
125 |     # Plot a normal density (the expected residual distribtuion)
126 |     curve(dnorm, col=reg.data$resid.color, lwd=2, add=TRUE)
127 |     
128 |   })
129 |   
130 |   #---------------------------------------------------------------------------
131 |   # Print the glm() summary of the true model
132 |   output$summary <- renderPrint({
133 |     
134 |     if (input$summary){
135 |       return(draw.sample()$model.summary)
136 |     }
137 |     
138 |   })
139 | })


--------------------------------------------------------------------------------
/shiny/simple_regression/shinyapps/mwaskom/simple_regression.dcf:
--------------------------------------------------------------------------------
1 | name: simple_regression
2 | account: mwaskom
3 | bundleId: 200340
4 | url: https://mwaskom.shinyapps.io/simple_regression
5 | 


--------------------------------------------------------------------------------
/shiny/simple_regression/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | fig.width <- 600
 4 | fig.height <- 450
 5 | 
 6 | shinyUI(pageWithSidebar(
 7 |   
 8 |   headerPanel("Simple Linear Regression"),
 9 |   
10 |   sidebarPanel(
11 |     
12 |     div(p("Try to find values for the slope and intercept that minimize the residual error from the linear model.")),
13 |     
14 |     div(
15 |       
16 |       sliderInput("intercept",
17 |                   strong("Intercept"),
18 |                   min=-2, max=6, step=.5,
19 |                   value=sample(seq(-2, 6, .5), 1), ticks=FALSE),
20 |       br(),
21 |       sliderInput("slope", 
22 |                   strong("Slope"),
23 |                   min=-1, max=3, step=.25, 
24 |                   value=sample(seq(-1, 3, .25), 1), ticks=FALSE),
25 |       br(),
26 |       checkboxInput("summary",
27 |                     strong("Show summary(lm(y ~ x))"),
28 |                     value=FALSE)
29 |       
30 |     )
31 |   ),
32 | 
33 |   mainPanel(
34 |     div(plotOutput("reg.plot", width=fig.width, height=fig.height),
35 |         title="y = 2 + x"),
36 |     div(plotOutput("ss.plot", width=fig.width, height=fig.height / 3)),
37 |     div(plotOutput("resid.plot", width=fig.width, height=fig.height / 2)),
38 |     div(class="span7", conditionalPanel("input.summary == true",
39 |                                         p(strong("Linear model summary")),
40 |                                         verbatimTextOutput("summary")))
41 |   )
42 |     
43 | ))


--------------------------------------------------------------------------------
/shiny/ttest_simulation/.Rhistory:
--------------------------------------------------------------------------------
  1 | ls
  2 | ls()
  3 | ?ls
  4 | c?
  5 | exit
  6 | ?c
  7 | setwd("~/Dropbox/Class/252/")
  8 | dir
  9 | dir()
 10 | setwd("Week1/")
 11 | dir
 12 | dir()
 13 | dir("data")
 14 | deaths = read.csv("data/earlydeaths.csv")
 15 | deaths
 16 | deaths[:1-]
 17 | deaths[:1-]
 18 | deaths[:10]
 19 | deaths[1:10]
 20 | deaths.time
 21 | deaths[time]
 22 | deaths["time"]
 23 | deaths["time"][:10]
 24 | deaths["time"][1:10]
 25 | ?read.csv
 26 | ?load.csv
 27 | help(read.csv)
 28 | ?var
 29 | ?read.csv
 30 | ?table
 31 | d0 = c(26,31,45)
 32 | d0
 33 | rs1 = chisq.test(c(26,31,45), p=c(1,1,1), rescale.p=T, simulate.p.value=F)
 34 | print(rs1)
 35 | md = c(11,15,15,16,25,20)
 36 | md
 37 | mean(md
 38 | )
 39 | var(md)
 40 | ?var
 41 | sqrt(var(md))
 42 | sqrt(var(md))/len(md)
 43 | sqrt(var(md))/length(md)
 44 | ?stderr
 45 | setwd()
 46 | dir()
 47 | setwd("Class/252/Week1/")
 48 | d0 = read.csv("data/earlydeaths.csv")
 49 | ?tabe
 50 | ?table
 51 | View(`d0`)
 52 | View(`d0`)
 53 | deaths = table(d0$time, d0$cause)
 54 | deaths
 55 | deaths = t(deaths)
 56 | deaths
 57 | chisq.test(d0
 58 | )
 59 | chisq.test(d0[1:3,1], d0[1:3,2])
 60 | d0[1]
 61 | deaths[1]
 62 | deaths[1:3]
 63 | chisq.test(d0[1:3], d0[4:6])
 64 | chisq.test(deaths[1:3], deaths[4:6])
 65 | ?chisq.test
 66 | chisq.test(deaths[1:3], p=deaths[4:6])
 67 | chisq.test(deaths[1:3], p=deaths[4:6], rescale.p=T)
 68 | deaths
 69 | chisq.test(deaths)
 70 | jobs = rbind(c(18,14,8), c(12,16,32)
 71 | )
 72 | jobs
 73 | jobs = table(jobs)
 74 | jobs
 75 | jobs = table(c(18,14,8), c(12,16,32)
 76 | )
 77 | jobs
 78 | ?table
 79 | jobs = table(na,
 80 | d0$cause
 81 | jobs = table(c(18,14,8), c(12,16,32))
 82 | jobs
 83 | jobs = matrix(c(18,14, 8, 12,16,32), byrow=T, ncol=3)
 84 | jobs
 85 | chisq.test(jobs
 86 | )
 87 | d1 = read.csv('fieldsimul1.csv')
 88 | d1 = read.csv('data/fieldsimul1.csv')
 89 | d1
 90 | ?density
 91 | x0 = c(1:3, 5 7 9)
 92 | x0 = c(1:3, 5, 7, 9)
 93 | p0 = c(.2, .4, .24, .1, .05, .01)
 94 | p0
 95 | mu0 = sum(x0*p0)/sum(p0)
 96 | mu0
 97 | o1 = sample(d0, 1000, replace=T, prob=p0)
 98 | o1 = sample(d0, 1000, replace=T, prob=p0)
 99 | x0
100 | p0
101 | length(p0)
102 | length(x0)
103 | o1 = sample(d0, 1000, replace=T, prob=p0)
104 | ?sample
105 | o1 = sample(x0, 1000, replace=T, prob=p0)
106 | o1
107 | hist(o1)
108 | density(o1, adjust=3)
109 | sm1 = density(o1, adjust=3)
110 | sm1
111 | plot(sm1)
112 | lines(sm1)
113 | lines(sm1)
114 | lines(sm1)
115 | plot(sm1)
116 | lines(sm1)
117 | hist(o1)
118 | ?X11
119 | mu0
120 | ?test.t
121 | t.test
122 | ?t.test
123 | t.test(o1, mu=mu1)
124 | mu0
125 | t.test(o1, mu=mu0)
126 | D0
127 | d0
128 | o2 = sample(d0, 7, replace=T, prob=p0)
129 | o2 = sample(x0, 7, replace=T, prob=p0)
130 | 02
131 | o2
132 | hist(o2)
133 | mean(o2)
134 | mu0
135 | tsc = t.test(o2, mu=mu0, paired=F)
136 | tsc
137 | tsc/2
138 | tsc.t
139 | ls(tsc)
140 | t.statistic
141 | t:statistic
142 | t$statistic
143 | tsc$statistic
144 | t_scores = vector(mode=float, length=1000)
145 | ?vector
146 | t_scores = vector(mode="float"", length=1000)
147 | t_scores = vector(mode="float, length=1000)
148 | t_scores = vector(mode="float"", length=1000)
149 | t_scores = vector(mode="float", length=1000)
150 | t_scores = vector(length=1000)
151 | t_scores
152 | t_scores = vector("list", 1000)
153 | t_scores
154 | t_scores = vector("numeric"", 1000)
155 | t_scores = vector("numeric", 1000)
156 | t_scores
157 | ?for
158 | for (i in 1:1000)
159 | o = sample(x0, 7, replace=T, prob=p0)
160 | for (i in 1:1000)
161 | o = sample(x0, 7, replace=T, prob=p0);
162 | for (i in 1:1000)
163 | o = sample(x0, 7, replace=T, prob=p0); t_scores[i] = t.test(o, mu=mu0)$statistic
164 | t_scores
165 | i
166 | for (i in 1:1000); print i
167 | for (i in 1:1000) print i
168 | for (i in 1:1000) print(i)
169 | o = sample(x0, 7, replace=T, prob=p0); t_scores[i] <- t.test(o, mu=mu0)$statistic
170 | o
171 | t.test(o, mu=mu0)
172 | t.test(o, mu=mu0)$statistic
173 | t_scores[-5] = .Last.value
174 | t_scores
175 | t_scores[-5]
176 | t_scores
177 | ?t_scores
178 | t_scoes
179 | t_scores
180 | lenth(t_scores)
181 | length(t_scores)
182 | t_scores[1]
183 | t_scores[-5]
184 | t_scores[1000] = .Last.value
185 | t_scores[1000] = .5
186 | t_scores[-5]
187 | t_scores = vector("numeric", 1000)
188 | for (1 in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0))$statistic
189 | for (i in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0))$statistic
190 | for (i in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0))$statistic
191 | t.test(sample(x0, 7, replace=T, prob=p0))$statistic
192 | vector[i] = t.test(sample(x0, 7, replace = T, prob = p0))$statistic
193 | t.test(sample(x0, 7, replace = T, prob = p0))$statisti
194 | t.test(sample(x0, 7, replace = T, prob = p0))$statistic
195 | res = t.test(sample(x0, 7, replace = T, prob = p0))
196 | res
197 | res$statistic
198 | res$statistic + 5
199 | foo = c(0,0,0)
200 | foo
201 | foo[1] = res$statistic
202 | foo
203 | res = t.test(sample(x0, 7, replace = T, prob = p0))$statistic
204 | res
205 | foo[2] = t.test(sample(x0, 7, replace = T, prob = p0))$statistic
206 | foo
207 | res
208 | for (i in 1:1000) vector[i] = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
209 | t.test(sample(x0, 7, replace = T, prob = p0), mu = mu0)$statistic
210 | fo;
211 | for (i in 1:1000) {res = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0); t_scores[i] = res$statistic}
212 | x0
213 | for (i in 1:1000) {res = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0); t_scores[i] = res$statistic}
214 | t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)
215 | t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)
216 | for (i in 1:1000){}
217 | for (i in 1:1000){
218 | res = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)
219 | t_scores[i] = res$statistic
220 | }
221 | res
222 | for (i in 1:1000){
223 | draw = sample(x0, 7, replace=T, prob=p0)
224 | res = t.test(draw, mu=mu0)
225 | t_scores[i] = res$statistic
226 | }
227 | for (i in 1:1000){
228 | + draw = sample(x0, 7, replace=T, prob=p0)
229 | + res = t.test(draw, mu=mu0)
230 | + t_scores[i] = res$statistic
231 | + }
232 | for (i in 1:1000){
233 | + draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
234 | + res = t.test(draw, mu=mu0)
235 | + t_scores[i] = res$statistic
236 | + }
237 | for (i in 1:1000){
238 | + draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
239 | + res = t.test(draw, mu=mu0)
240 | + t_scores[i] = res$statistic
241 | + }
242 | for (i in 1:1000){
243 | + draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
244 | + res = t.test(draw, mu=mu0)
245 | + t_scores[i] = res$statistic }
246 | +
247 | for (i in 1:1000){
248 | draw = sample(x0, 7, replace=T, prob=p0); print(mean(draw))
249 | res = t.test(draw, mu=mu0)
250 | }
251 | x0
252 | p0
253 | mu0
254 | t.test?
255 | ?t.test
256 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
257 | myvec = 0
258 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
259 | myvec
260 | plot(myvec)
261 | hist(myvec)
262 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
263 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
264 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
265 | for (i in 1:1000) myvec[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
266 | v = vector("numeric", 1000)
267 | v
268 | for (i in 1:1000) v[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
269 | for (i in 1:1000) v[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
270 | for (i in 1:1000) v[i] <- t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
271 | for (i in 1:1000) v[i] = t.test(sample(x0, 7, replace=T, prob=p0), mu=mu0)$statistic
272 | sample(x0, 7, replace=T, prob=p0)
273 | sample(x0, 7, replace=T, prob=p0)
274 | sample(x0, 7, replace=T, prob=p0)
275 | sample(x0, 7, replace=T, prob=p0)
276 | sample(x0, 7, replace=T, prob=p0)
277 | dir()
278 | open("r_scripts/smonte1.r")
279 | ?quantile
280 | ?sprintf
281 | sprintf("hello%s", "world")
282 | mal_deaths = c(11, 15, 15, 16, 25, 20)
283 | mal_mean = mean(mal_deaths)
284 | mal_var = var(mal_deaths)
285 | mal_std = sqrt(var)
286 | print("Problem 1.b.i:")
287 | print(sprintf("Maltreatment death statistics:\n
288 | \tMean: %.3f\n
289 | \tVariance: %.3f\n
290 | \tS.D.: %.3f", mal_mean, mal_var, mal_std)
291 | )
292 | ?var
293 | var(c(1,2,3,4,5,3,7,2,3))
294 | sqrt(var(c(1,2,3,4,5,3,7,2,3)))
295 | var(c(1,2,3,4,5,3,7,2,3))
296 | getwd()
297 | setwd("Class/252/Week1/")
298 | source("hw.r")
299 | setwd("~/Dropbox/TA/252/github/Week2")
300 | library(shiny)
301 | runApp("sampling_and_stderr")
302 | cut(rnorm(10), breaks=c(1))
303 | cut(rnorm(10), breaks=c(1, 2))
304 | max(hist(rnorm(100)))
305 | max(hist(rnorm(100))$counts)
306 | runApp()
307 | runApp("sampling_and_stderr")
308 | d <- c(80, 76, 81, 72, 68, 76)
309 | m <- mean(d)
310 | s <- sd(d)
311 | s.from.var = sqrt(var(d))
312 | s == s.from.vac
313 | s == s.from.var
314 | ci.low <- m - se * 1.96
315 | ci.high <- m + se * 1.96
316 | sprintf("95%% CI: %.2f, %.2f", ci.low, ci.high)
317 | ci.low <- m - se * 1.96
318 | ci.high <- m + se * 1.96
319 | sprintf("95%% CI: %.2f, %.2f", ci.low, ci.high)
320 | n <- length(d)
321 | se <- s / n ^ .5
322 | ci.low <- m - se * 1.96
323 | ci.high <- m + se * 1.96
324 | sprintf("95%% CI: %.2f, %.2f", ci.low, ci.high)
325 | for i in 1:10 { print(i) }
326 | for i = 1:10 { print(i) }
327 | for (i = 1:10) { print(i) }
328 | for (i in 1:10) { print(i) }
329 | matrix(rnorm(10), nrow=2)
330 | d = matrix(rnorm(10), nrow=2)
331 | apply(d, mean)
332 | apply(d, 1, mean)
333 | pt(c(1, 2, 3))
334 | pt(c(1, 2, 3), 18)
335 | pt(c(1, 2, 3), 18, lower.tail=TRUE)
336 | pt(c(1, 2, 3), 18, lower.tail=FALSE)
337 | apply(d, 1, t.test)
338 | runApp("ttest_simulation")
339 | n.sim <- 1000
340 | input = list(sample.size=20, true.mean=0, true.sd=1)
341 | sample.data <- rnorm(n.sim * input$sample.size, input$true.mean, input$true.sd)
342 | sample.data <- matrix(sample.data, nrow=n.sim)
343 | sample.means <- apply(sample.data, 1, mean)
344 | sample.sems <- apply(sample.data, 1, sd) / sqrt(input$sample.size)
345 | t.stats = sample.means / sample.sems
346 | p.values = pt(t.stats, input$sample.size - 1)
347 | tb
348 | traceback
349 | traceback()
350 | runApp("ttest_simulation")
351 | ?power.t.test
352 | power.t.test(20, 1, 1, .05)
353 | power.t.test(20, 1, 1, .05)$power
354 | runApp("ttest_simulation")
355 | runApp("ttest_simulation")
356 | traceback
357 | traceback()
358 | runApp("ttest_simulation")
359 | 


--------------------------------------------------------------------------------
/shiny/ttest_simulation/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwaskom/StatApps/8e0b7594ce360e5cb43c1dd1dc254fa1e7edf818/shiny/ttest_simulation/screenshot.png


--------------------------------------------------------------------------------
/shiny/ttest_simulation/server.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | shinyServer(function(input, output) {
 4 | 
 5 | 
 6 |   n.sim <- 1000
 7 |   
 8 |   draw.sample <- reactive({
 9 |     
10 |     m <- input$effect.size
11 |     s <- 1
12 |     sample.data <- rnorm(n.sim * input$sample.size, m, s)
13 |     sample.data <- matrix(sample.data, nrow=n.sim)
14 |     
15 |     sample.means <- apply(sample.data, 1, mean)
16 |     sample.sems <- apply(sample.data, 1, sd) / sqrt(input$sample.size)
17 |     
18 |     df <- input$sample.size - 1
19 |     t.stats <- sample.means / sample.sems   
20 |     p.values <- pt(t.stats, df, lower.tail=FALSE)
21 |     
22 |     sig.rate <- sum(p.values < 0.05) / n.sim
23 |     power <- power.t.test(input$sample.size, m, s, 0.05,
24 |                           type="one.sample", alternative="one.sided")$power
25 |     
26 |     list(df=df, t.stats=t.stats, p.values=p.values, sig.rate=sig.rate, power=power)
27 |     
28 |   })
29 |     
30 |   output$t.stats <- renderPlot({
31 |     
32 |     sample <- draw.sample()
33 |     plot.title <- sprintf("Power: %.2f; Proportion rejected nulls: %.2f", sample$power, sample$sig.rate)
34 |     hist(sample$t.stats, 25, col="dodgerblue", main=plot.title, xlab="t statistics")
35 |     abline(v=qt(0.05, sample$df, lower.tail=FALSE), lwd=5)
36 |     
37 |   })
38 |   
39 |   output$p.values <- renderPlot({
40 |     
41 |     sample <- draw.sample()  
42 |     bins <- seq(0, 1, length.out=40)
43 |     hist(sample$p.values, bins, col="tomato", main=NULL, xlab="p values")
44 |     abline(v=0.05, lwd=5)
45 |            
46 |   })
47 |   
48 | })


--------------------------------------------------------------------------------
/shiny/ttest_simulation/shinyapps/mwaskom/ttest_simulation.dcf:
--------------------------------------------------------------------------------
1 | name: ttest_simulation
2 | account: mwaskom
3 | bundleId: 200342
4 | url: https://mwaskom.shinyapps.io/ttest_simulation
5 | 


--------------------------------------------------------------------------------
/shiny/ttest_simulation/ui.R:
--------------------------------------------------------------------------------
 1 | library(shiny)
 2 | 
 3 | fig.width <- 600
 4 | fig.height <- 350
 5 | 
 6 | shinyUI(pageWithSidebar(
 7 |   
 8 |   headerPanel("Simulating T-Tests"),
 9 |   
10 |   sidebarPanel(
11 |     
12 |     div(p("Simulate 1000 one-sample t tests where H_0 <= 0 while manipulating the effect size and number of samples.")),
13 |     
14 |     div(
15 |       
16 |       sliderInput("effect.size", 
17 |                   strong("Effect size"), 
18 |                   min=0, max=1, value=0, step=.1, ticks=FALSE),
19 |       sliderInput("sample.size",
20 |                   strong("Number of observations in a sample"),
21 |                   min=1, max=50, value=20, step=1, ticks=FALSE)
22 |                 
23 |     )
24 |   ),
25 | 
26 |   mainPanel(
27 |     plotOutput("t.stats", width=fig.width, height=fig.height),
28 |     plotOutput("p.values", width=fig.width, height=fig.height)
29 |   )
30 |     
31 | ))


--------------------------------------------------------------------------------