├── README
├── amdahl.g
├── da
    ├── README
    ├── da-common.r
    ├── da-libreal.r
    ├── da-libsynth.r
    ├── dist.r
    └── waterfall.sh
├── data.txt
├── frequencytrail.r
├── frequencytrailtest.pdf
├── linear.g
├── scale.pdf
├── scale.r
├── tools
    ├── README
    ├── interval.r
    └── scatter.r
├── usl.g
├── util-md1.pdf
└── util-md1.r


/README:
--------------------------------------------------------------------------------
 1 | Performance Scalability Models
 2 | 
 3 | This is some software to aid performance scalability analysis.  It includes:
 4 | 
 5 | 	linear.g		Linear scalability model
 6 | 	amdahl.g		Amdahl's law scalability
 7 | 	usl.g			Universal Scalabitily Law
 8 | 	scale.r			Scalability Models (Amdahl, USL)
 9 | 	util-md1.r		Queueing Theory M/D/1 mean response time vs util
10 | 	data.txt		Sample input file for scalability modeling
11 | 	da/			Distribution Analysis
12 | 
13 | There are more projects rather than stand-alone tools; expect to customize them
14 | for each system you are modeling, and these assume you already understand
15 | performance scalability analysis.
16 | 


--------------------------------------------------------------------------------
/amdahl.g:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env gnuplot
 2 | #
 3 | # amdahl.g	Amdahl's law scalability using gnuplot.
 4 | #
 5 | # This applies Amdahl's law to model scalability (maximum speedup) to the input
 6 | # data set.  It uses regression analysis to determine the constants.
 7 | #
 8 | # USAGE: ./amdahl.g
 9 | #
10 | # See the "tunables" section for defining the input data file, and the number
11 | # of rows to include as model input.  The remainder of rows are drawn as
12 | # "extra" data points.  The file has the form:
13 | #
14 | # N	Result
15 | # 1	2.1
16 | # 2	4.0
17 | # 3	5.9
18 | # ...
19 | #
20 | # The row order can be rearranged to customize the model input.
21 | #
22 | # Copyright 2012 Brendan Gregg.  All rights reserved.
23 | #
24 | # CDDL HEADER START
25 | #
26 | # The contents of this file are subject to the terms of the
27 | # Common Development and Distribution License (the "License").
28 | # You may not use this file except in compliance with the License.
29 | #
30 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
31 | # or http://www.opensolaris.org/os/licensing.
32 | # See the License for the specific language governing permissions
33 | # and limitations under the License.
34 | #
35 | # When distributing Covered Code, include this CDDL HEADER in each
36 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
37 | # If applicable, add the following below this CDDL HEADER, with the
38 | # fields enclosed by brackets "[]" replaced with your own identifying
39 | # information: Portions Copyright [yyyy] [name of copyright owner]
40 | #
41 | # CDDL HEADER END
42 | #
43 | # 03-May-2012	Brendan Gregg	Created this.
44 | 
45 | set terminal x11 font "arial,14"	# designed for x11 (redraws)
46 | set autoscale
47 | 
48 | # tunables
49 | filename = "data.txt"		# data file
50 | inputN = 12			# rows to include as model input
51 | scale = 1.5			# scale graph beyond data points
52 | set grid
53 | 
54 | set xlabel "CPUs (N)"
55 | set ylabel "Throughput"
56 | set title "Amdahl Scalability"
57 | set key on right bottom
58 | set pointsize 2
59 | 
60 | # read N1, the first value for normalizing the plot (workaround)
61 | plot filename using 1:(N1 = $2, 0/0) every 1:1:1:0:1:0 notitle, '' using 1:($2 / N1) with linespoints
62 | 
63 | # Amdahl
64 | alpha = 0.01
65 | amdahl(N) = N1 * N/(1 + alpha * (N - 1))
66 | 
67 | # regression analysis (non-linear least squares fitting)
68 | fit amdahl(x) filename every ::1::inputN using 1:2 via alpha
69 | 
70 | # plot data points
71 | plot filename using 1:2 with points pt 6 lc rgb "#f00000" title "extra measurements",\
72 | 	filename every ::1::inputN using 1:2 with points pt 6 lc rgb "#000000" title "input for Amdahl"
73 | set label sprintf("a = %.4f", alpha) at graph 0.5, 0.075 center
74 | set yrange [0:GPVAL_DATA_Y_MAX * scale]
75 | set xrange [0:GPVAL_DATA_X_MAX * scale]
76 | 
77 | # plot curves
78 | replot amdahl(x) with line lc rgb "#000000" title "Amdahl(N)"
79 | 
80 | pause -1 "Hit return to continue"
81 | 


--------------------------------------------------------------------------------
/da/README:
--------------------------------------------------------------------------------
1 | Distribution Analysis
2 | 
3 | Work in progress...
4 | 


--------------------------------------------------------------------------------
/da/da-common.r:
--------------------------------------------------------------------------------
 1 | # da-common.r		Common functions for dist.r.
 2 | #
 3 | # 01-Jun-2013	Brendan Gregg	Created this.
 4 | 
 5 | # printf
 6 | printf <- function(...) cat(sprintf(...))
 7 | 
 8 | # randomize ordering
 9 | randomize <- function(data) {
10 | 	data0 <- data
11 | 	data <- c()
12 | 	for(i in 1:N) { 
13 | 		ii <- sample(1:length(data0), 1)
14 | 		data[i] <- data0[ii]
15 | 		data0 <- data0[c(-ii)]
16 | 	}
17 | 	return(data)
18 | }
19 | 


--------------------------------------------------------------------------------
/da/da-libreal.r:
--------------------------------------------------------------------------------
 1 | # da-libreal		Some Real Latency Distributions
 2 | #
 3 | # This is a library for dist.r.  It requires the data files listed
 4 | # below (see read.table).
 5 | #
 6 | # Input:
 7 | #	N	number of target elements (may return a little less)
 8 | #	type	a distribution type ID (see list below)
 9 | # Output:
10 | #	data		data set
11 | #
12 | # 01-Jun-2013	Brendan Gregg	Created this.
13 | 
14 | # type	description
15 | # 500	faithful bimodal
16 | # 501	real disk I/O latency bimodal far
17 | # 502	real disk I/O latency bimodal far outliers
18 | 
19 | if (type == 500) {		# faithful bimodal
20 | 	outliers <- "N"
21 | 	attach(faithful); N <- length(eruptions); data <- eruptions
22 | 
23 | } else if (type == 501) {	# random disk I/O
24 | 	outliers <- "N"
25 | 	input <- read.table("out.iosnoop_randread01", header=FALSE, skip=1,
26 | 	    nrows=N, col.names=c("STIME","TIME","DELTA","DTIME","UID","PID",
27 | 	    "D","BLOCK","SIZE","COMM","PATHNAME"))
28 | 	attach(input); input <- input[DELTA < 10000, ]
29 | 	data <- input$DELTA
30 | 	N <- length(data)
31 | 	if (random) { data <- randomize(data) }
32 | 
33 | } else if (type == 502) {	# random disk I/O outliers
34 | 	outliers <- "Y"
35 | 	input <- read.table("out.iosnoop_randread01", header=FALSE, skip=1,
36 | 	    nrows=N, col.names=c("STIME","TIME","DELTA","DTIME","UID","PID",
37 | 	    "D","BLOCK","SIZE","COMM","PATHNAME"))
38 | 	attach(input);
39 | 	data <- input$DELTA
40 | 	if (random) { data <- randomize(data) }
41 | 
42 | } else if (type == 503) {	# random sync disk I/O outliers
43 | 	outliers <- "Y"
44 | 	input <- read.table("out.iosnoop_marssync01", header=FALSE, skip=1,
45 | 	    nrows=N, col.names=c("STIME","TIME","DELTA","DTIME","UID","PID",
46 | 	    "D","BLOCK","SIZE","COMM","PATHNAME"))
47 | 	attach(input);
48 | 	data <- input$DELTA
49 | 	if (random) { data <- randomize(data) }
50 | }
51 | 


--------------------------------------------------------------------------------
/da/da-libsynth.r:
--------------------------------------------------------------------------------
  1 | # da-libsynth.r		Synthetic Latency Distributions
  2 | #
  3 | # This is a library for dist.r.
  4 | #
  5 | # This defines various synthetic distributions for modeling I/O latency.
  6 | # The distributions are composed of values that are typically between 0 and
  7 | # 10000, with a mean around 1000.  This is loosely based on storage device I/O
  8 | # latency, in units of microseconds.  You can adjust these as desired.
  9 | #
 10 | # Input:
 11 | #	N	number of target elements (may return a little less)
 12 | #	type	a distribution type ID (see list below)
 13 | # Output:
 14 | #	data		data set
 15 | #
 16 | # 01-Jun-2013	Brendan Gregg	Created this.
 17 | 
 18 | library(VGAM)		# rpareto
 19 | 
 20 | # type	description
 21 | # 0	uniform narrow
 22 | # 1	uniform wide
 23 | # 2	uniform outliers
 24 | # 100	unimodal normal narrow
 25 | # 101	unimodal normal medium
 26 | # 102	unimodal normal wide
 27 | # 103	unimodal normal with tail
 28 | # 110	unimodal normal narrow band reject
 29 | # 111	unimodal normal spike
 30 | # 112	unimodal normal fenced
 31 | # 113	unimodal normal quantized
 32 | # 120	unimodal poisson
 33 | # 121	unimodal poisson outliers
 34 | # 130	unimodal pareto narrow
 35 | # 131	unimodal pareto wide
 36 | # 140	unimodal normal outliers 1% medium
 37 | # 141	unimodal normal outliers 1% far
 38 | # 142	unimodal normal outliers 1% very far
 39 | # 143	unimodal normal outliers 2%
 40 | # 144	unimodal normal outliers 4%
 41 | # 145	unimodal normal outliers 2% clustered
 42 | # 146	unimodal normal outliers 4% close 1
 43 | # 147	unimodal normal outliers 4% close 2
 44 | # 148	unimodal normal outliers 4% close 3
 45 | # 149	unimodal normal outliers 4% close 4
 46 | # 150	unimodal normal outliers 4% close 5
 47 | # 151	unimodal normal outliers 4% close 6
 48 | # 152	unimodal normal outliers 4% close 7
 49 | # 153	unimodal normal outliers 0.5%
 50 | # 154	unimodal normal outliers 0.2%
 51 | # 155	unimodal normal outliers 0.1%
 52 | # 200	bimodal normal very close
 53 | # 201	bimodal normal close
 54 | # 202	bimodal normal medium
 55 | # 203	bimodal normal far
 56 | # 204	bimodal normal outliers 1%
 57 | # 205	bimodal normal outliers 2%
 58 | # 206	bimodal normal outliers 4%
 59 | # 210	bimodal normal major minor
 60 | # 211	bimodal normal minor major
 61 | # 212	bimodal normal major minor outliers
 62 | # 213	bimodal normal minor major outliers
 63 | # 214	bimodal far normal far outliers 1% (blog)
 64 | # 215	bimodal very far normal far outliers 1% (blog)
 65 | # 216	bimodal very far major minor outliers 1% (blog)
 66 | # 300	trimodal normal close
 67 | # 301	trimodal normal medium
 68 | # 302	trimodal normal far
 69 | # 303	trimodal normal outliers
 70 | # 304	trimodal normal major medium minor
 71 | # 305	trimodal normal minor major minor
 72 | # 306	trimodal normal minor major medium
 73 | # 307	trimodal normal major minor medium
 74 | # 400	quadmodal normal close
 75 | # 401	quadmodal normal medium
 76 | # 402	quadmodal normal far
 77 | # 403	quadmodal normal outliers
 78 | # 1000+	unimodal normal outliers random
 79 | 
 80 | # definitions
 81 | set.seed(type)
 82 | if (type == 0) {		# uniform narrow
 83 | 	outliers <- "N"
 84 | 	data <- runif(N, min=500, max=1500)
 85 | 
 86 | } else if (type == 1) {		# uniform wide
 87 | 	outliers <- "N"
 88 | 	data <- runif(N, min=0, max=3000)
 89 | 
 90 | } else if (type == 2) {		# uniform outliers
 91 | 	outliers <- "Y"
 92 | 	data <- c(runif(N * 0.99, min=500, max=1500),
 93 | 	          runif(N * 0.01, min=1500, max=10000))
 94 | 
 95 | } else if (type == 100) {	# unimodal normal narrow
 96 | 	outliers <- "N"
 97 | 	data <- rnorm(N, mean=1000, sd=100)
 98 | 
 99 | } else if (type == 101) {	# unimodal normal medium
100 | 	outliers <- "N"
101 | 	data <- rnorm(N, mean=1000, sd=200)
102 | 
103 | } else if (type == 102) {	# unimodal normal wide
104 | 	outliers <- "N"
105 | 	data <- rnorm(N, mean=1000, sd=300)
106 | 
107 | } else if (type == 103) {	# unimodal normal with tail
108 | 	outliers <- "N"
109 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
110 | 	          runif(N * 0.04, min=1000, max=2250))
111 | 	data <- randomize(data)
112 | 
113 | } else if (type == 104) {	# unimodal normal wide
114 | 	outliers <- "N"
115 | 	data <- rnorm(N, mean=1120, sd=700)
116 | 
117 | } else if (type == 110) {	# unimodal band reject
118 | 	outliers <- "N"
119 | 	data0 <- rnorm(N, mean=1000, sd=200)
120 | 	ii <- 0
121 | 	for(i in 1:N) { 
122 | 		if (data0[i] < 770 || data0[i] > 800) {
123 | 			data[ii] <- data0[i]
124 | 			ii <- ii + 1
125 | 		}
126 | 	}
127 | 	N <- length(data)
128 | 
129 | } else if (type == 111) {	# unimodal normal spike
130 | 	outliers <- "N"
131 | 	data <- c(rnorm(N * 0.98, mean=1000, sd=200),
132 | 	          rnorm(N * 0.02, mean=750, sd=1))
133 | 	data <- randomize(data)
134 | 
135 | } else if (type == 112) {	# unimodal normal fence
136 | 	outliers <- "N"
137 | 	N <- N * 2
138 | 	data0 <- rnorm(N, mean=1000, sd=200)
139 | 	ii <- 0
140 | 	for(i in 1:N) { 
141 | 		if ((data0[i] %% 64) < 32) {
142 | 			data[ii] <- data0[i]
143 | 			ii <- ii + 1
144 | 		}
145 | 		if (ii >= 5000) { break }
146 | 	}
147 | 	N <- length(data)
148 | 
149 | } else if (type == 113) {	# unimodal normal quantized
150 | 	outliers <- "N"
151 | 	data0 <- rnorm(N, mean=1000, sd=200)
152 | 	for(i in 1:N) { 
153 | 		data[i] <- floor(data0[i] / 64) * 64
154 | 	}
155 | 
156 | } else if (type == 120) {	# unimodal poisson
157 | 	outliers <- "N"
158 | 	data <- rpois(N, lambda=1000)
159 | 
160 | } else if (type == 121) {	# unimodal poisson outliers
161 | 	outliers <- "Y"
162 | 	data <- c(rpois(N * 0.99, lambda=1000),
163 | 	          runif(N * 0.01, min=1000, max=5000))
164 | 
165 | } else if (type == 130) {	# unimodal pareto narrow
166 | 	outliers <- "N"
167 | 	data <- rpareto(N, 1000, 3)
168 | 
169 | } else if (type == 131) {	# unimodal pareto wide
170 | 	outliers <- "N"
171 | 	data <- rpareto(N, 1000, 10)
172 | 
173 | } else if (type == 140) {	# unimodal normal outliers 1% medium
174 | 	outliers <- "Y"
175 | 	data <- c(rnorm(N * 0.99, mean=1000, sd=200),
176 | 	          runif(N * 0.01, min=1000, max=5000))
177 | 	data <- randomize(data)
178 | 
179 | } else if (type == 141) {	# unimodal normal outliers 1% far
180 | 	outliers <- "Y"
181 | 	data <- c(rnorm(N * 0.99, mean=1000, sd=200),
182 | 	          runif(N * 0.01, min=1000, max=10000))
183 | 	data <- randomize(data)
184 | 
185 | } else if (type == 142) {	# unimodal normal outliers 1% very far
186 | 	outliers <- "Y"
187 | 	data <- c(rnorm(N * 0.99, mean=1000, sd=200),
188 | 	          runif(N * 0.01, min=1000, max=50000))
189 | 	data <- randomize(data)
190 | 
191 | } else if (type == 143) {	# unimodal normal outliers 2%
192 | 	outliers <- "Y"
193 | 	data <- c(rnorm(N * 0.98, mean=1000, sd=200),
194 | 	          runif(N * 0.02, min=1000, max=5000))
195 | 	data <- randomize(data)
196 | 
197 | } else if (type == 144) {	# unimodal normal outliers 4%
198 | 	outliers <- "Y"
199 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
200 | 	          runif(N * 0.04, min=1000, max=5000))
201 | 	data <- randomize(data)
202 | 
203 | } else if (type == 145) {	# unimodal normal outliers 2% clustered
204 | 	outliers <- "?"
205 | 	data <- c(rnorm(N * 0.98, mean=1000, sd=200),
206 | 	          rnorm(N * 0.02, mean=3000, sd=35))
207 | 	data <- randomize(data)
208 | 
209 | } else if (type == 146) {	# unimodal normal outliers 4% close 1
210 | 	outliers <- "Y"
211 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
212 | 	          runif(N * 0.04, min=1000, max=2700))
213 | 	data <- randomize(data)
214 | 
215 | } else if (type == 147) {	# unimodal normal outliers 4% close 2
216 | 	outliers <- "Y"
217 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
218 | 	          runif(N * 0.04, min=1000, max=2900))
219 | 	data <- randomize(data)
220 | 
221 | } else if (type == 148) {	# unimodal normal outliers 4% close 3
222 | 	outliers <- "Y"
223 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
224 | 	          runif(N * 0.04, min=1000, max=3100))
225 | 	data <- randomize(data)
226 | 
227 | } else if (type == 149) {	# unimodal normal outliers 4% close 4
228 | 	outliers <- "Y"
229 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
230 | 	          runif(N * 0.04, min=1000, max=3300))
231 | 	data <- randomize(data)
232 | 
233 | } else if (type == 150) {	# unimodal normal outliers 4% close 5
234 | 	outliers <- "Y"
235 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
236 | 	          runif(N * 0.04, min=1000, max=3500))
237 | 	data <- randomize(data)
238 | 
239 | } else if (type == 151) {	# unimodal normal outliers 4% close 6
240 | 	outliers <- "Y"
241 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
242 | 	          runif(N * 0.04, min=1000, max=3700))
243 | 	data <- randomize(data)
244 | 
245 | } else if (type == 152) {	# unimodal normal outliers 4% close 7
246 | 	outliers <- "Y"
247 | 	data <- c(rnorm(N * 0.96, mean=1000, sd=200),
248 | 	          runif(N * 0.04, min=1000, max=3900))
249 | 	data <- randomize(data)
250 | 
251 | } else if (type == 153) {	# unimodal normal outliers 0.5%
252 | 	outliers <- "Y"
253 | 	data <- c(rnorm(N * 0.995, mean=1000, sd=200),
254 | 	          runif(N * 0.005, min=1000, max=5000))
255 | 	data <- randomize(data)
256 | 
257 | } else if (type == 154) {	# unimodal normal outliers 0.2%
258 | 	outliers <- "Y"
259 | 	data <- c(rnorm(N * 0.998, mean=1000, sd=200),
260 | 	          runif(N * 0.002, min=1000, max=5000))
261 | 	data <- randomize(data)
262 | 
263 | } else if (type == 155) {	# unimodal normal outliers 0.1%
264 | 	outliers <- "Y"
265 | 	data <- c(rnorm(N * 0.999, mean=1000, sd=200),
266 | 	          runif(N * 0.001, min=1000, max=5000))
267 | 	data <- randomize(data)
268 | 
269 | } else if (type == 200) {	# bimodal normal very close
270 | 	outliers <- "N"
271 | 	data <- c(rnorm(N / 2, mean=850, sd=110),
272 | 	          rnorm(N / 2, mean=1150, sd=110))
273 | 	data <- randomize(data)
274 | 
275 | } else if (type == 201) {	# bimodal normal close
276 | 	outliers <- "N"
277 | 	data <- c(rnorm(N / 2, mean=825, sd=110),
278 | 	          rnorm(N / 2, mean=1175, sd=110))
279 | 	data <- randomize(data)
280 | 
281 | } else if (type == 202) {	# bimodal normal medium
282 | 	outliers <- "N"
283 | 	data <- c(rnorm(N / 2, mean=750, sd=110),
284 | 	          rnorm(N / 2, mean=1250, sd=110))
285 | 	data <- randomize(data)
286 | 
287 | } else if (type == 203) {	# bimodal normal far
288 | 	outliers <- "N"
289 | 	data <- c(rnorm(N / 2, mean=600, sd=110),
290 | 	          rnorm(N / 2, mean=1400, sd=110))
291 | 	data <- randomize(data)
292 | 
293 | } else if (type == 204) {	# bimodal normal outliers 1%
294 | 	outliers <- "Y"
295 | 	data <- c(rnorm(N * 0.495, mean=750, sd=110),
296 | 	          rnorm(N * 0.495, mean=1250, sd=110),
297 | 	          runif(N * 0.01, min=1000, max=5000))
298 | 	data <- randomize(data)
299 | 
300 | } else if (type == 205) {	# bimodal normal outliers 2%
301 | 	outliers <- "Y"
302 | 	data <- c(rnorm(N * 0.49, mean=750, sd=110),
303 | 	          rnorm(N * 0.49, mean=1250, sd=110),
304 | 	          runif(N * 0.02, min=1000, max=5000))
305 | 	data <- randomize(data)
306 | 
307 | } else if (type == 206) {	# bimodal normal outliers 4%
308 | 	outliers <- "Y"
309 | 	data <- c(rnorm(N * 0.48, mean=750, sd=110),
310 | 	          rnorm(N * 0.48, mean=1250, sd=110),
311 | 	          runif(N * 0.04, min=1000, max=5000))
312 | 	data <- randomize(data)
313 | 
314 | } else if (type == 210) {	# bimodal normal major minor
315 | 	outliers <- "N"
316 | 	data <- c(rnorm(N * 0.7, mean=750, sd=110),
317 | 	          rnorm(N * 0.3, mean=1250, sd=110))
318 | 	data <- randomize(data)
319 | 
320 | } else if (type == 211) {	# bimodal normal minor major
321 | 	outliers <- "N"
322 | 	data <- c(rnorm(N * 0.3, mean=750, sd=110),
323 | 	          rnorm(N * 0.7, mean=1250, sd=110))
324 | 	data <- randomize(data)
325 | 
326 | } else if (type == 212) {	# bimodal normal major minor outliers
327 | 	outliers <- "Y"
328 | 	data <- c(rnorm(N * 0.695, mean=750, sd=110),
329 | 	          rnorm(N * 0.295, mean=1250, sd=110),
330 | 	          runif(N * 0.01, min=1000, max=5000))
331 | 	N <- length(data)
332 | 	data <- randomize(data)
333 | 
334 | } else if (type == 213) {	# bimodal normal major minor outliers
335 | 	outliers <- "Y"
336 | 	data <- c(rnorm(N * 0.295, mean=750, sd=110),
337 | 	          rnorm(N * 0.695, mean=1250, sd=110),
338 | 	          runif(N * 0.01, min=1000, max=5000))
339 | 	N <- length(data)
340 | 	data <- randomize(data)
341 | 
342 | } else if (type == 214) {	# bimodal far normal far outliers 1%
343 | 	outliers <- "Y"
344 | 	data <- c(rnorm(N * 0.499, mean=500, sd=150),
345 | 	          rnorm(N * 0.499, mean=2000, sd=300),
346 | 	          runif(N * 0.002, min=1000, max=180000))
347 | 	data <- randomize(data)
348 | 
349 | } else if (type == 215) {	# bimodal far normal far outliers 1%
350 | 	outliers <- "Y"
351 | 	data <- c(rnorm(N * 0.499, mean=500, sd=100),
352 | 	          rnorm(N * 0.499, mean=4000, sd=500),
353 | 	          runif(N * 0.002, min=1000, max=180000))
354 | 	data <- randomize(data)
355 | 
356 | } else if (type == 216) {	# bimodal far normal far outliers 1%
357 | 	outliers <- "Y"
358 | 	data <- c(rnorm(N * 0.667, mean=500, sd=100),
359 | 	          rnorm(N * 0.333, mean=4000, sd=100),
360 | 	          runif(N * 0.002, min=1000, max=180000))
361 | 	data <- randomize(data)
362 | 
363 | } else if (type == 300) {	# trimodal normal close
364 | 	outliers <- "N"
365 | 	data <- c(rnorm(N * 0.333, mean=750, sd=90),
366 | 	          rnorm(N * 0.334, mean=1000, sd=90),
367 | 	          rnorm(N * 0.333, mean=1250, sd=90))
368 | 	N <- length(data)
369 | 	data <- randomize(data)
370 | 
371 | } else if (type == 301) {	# trimodal normal medium
372 | 	outliers <- "N"
373 | 	data <- c(rnorm(N * 0.333, mean=500, sd=100),
374 | 	          rnorm(N * 0.334, mean=1000, sd=100),
375 | 	          rnorm(N * 0.333, mean=1500, sd=100))
376 | 	data <- randomize(data)
377 | 
378 | } else if (type == 302) {	# trimodal normal far
379 | 	outliers <- "N"
380 | 	data <- c(rnorm(N * 0.333, mean=500, sd=65),
381 | 	          rnorm(N * 0.334, mean=1000, sd=65),
382 | 	          rnorm(N * 0.333, mean=1500, sd=65))
383 | 	data <- randomize(data)
384 | 
385 | } else if (type == 303) {	# trimodal normal outliers
386 | 	outliers <- "Y"
387 | 	data <- c(rnorm(N * 0.333, mean=500, sd=100),
388 | 	          rnorm(N * 0.334, mean=1000, sd=100),
389 | 	          rnorm(N * 0.333, mean=1500, sd=100),
390 | 	          runif(N * 0.01, min=1000, max=5000))
391 | 	data <- randomize(data)
392 | 
393 | } else if (type == 304) {	# trimodal normal major medium minor
394 | 	outliers <- "N"
395 | 	data <- c(rnorm(N * 0.50, mean=500, sd=100),
396 | 	          rnorm(N * 0.33, mean=1000, sd=100),
397 | 	          rnorm(N * 0.17, mean=1500, sd=100))
398 | 	data <- randomize(data)
399 | 
400 | } else if (type == 305) {	# trimodal normal minor major minor
401 | 	outliers <- "N"
402 | 	data <- c(rnorm(N * 0.25, mean=500, sd=100),
403 | 	          rnorm(N * 0.50, mean=1000, sd=100),
404 | 	          rnorm(N * 0.25, mean=1500, sd=100))
405 | 	data <- randomize(data)
406 | 
407 | } else if (type == 306) {	# trimodal normal minor major medium
408 | 	outliers <- "N"
409 | 	data <- c(rnorm(N * 0.17, mean=500, sd=100),
410 | 	          rnorm(N * 0.50, mean=1000, sd=100),
411 | 	          rnorm(N * 0.33, mean=1500, sd=100))
412 | 	data <- randomize(data)
413 | 
414 | } else if (type == 307) {	# trimodal normal major minor medium
415 | 	outliers <- "N"
416 | 	data <- c(rnorm(N * 0.50, mean=500, sd=100),
417 | 	          rnorm(N * 0.17, mean=1000, sd=100),
418 | 	          rnorm(N * 0.33, mean=1500, sd=100))
419 | 	data <- randomize(data)
420 | 
421 | } else if (type == 400) {	# quad normal close
422 | 	outliers <- "N"
423 | 	data <- c(rnorm(N * 0.25, mean=700, sd=75),
424 | 	          rnorm(N * 0.25, mean=900, sd=75),
425 | 	          rnorm(N * 0.25, mean=1100, sd=75),
426 | 	          rnorm(N * 0.25, mean=1300, sd=75))
427 | 	data <- randomize(data)
428 | 
429 | } else if (type == 401) {	# quad normal medium
430 | 	outliers <- "N"
431 | 	data <- c(rnorm(N * 0.25, mean=700, sd=50),
432 | 	          rnorm(N * 0.25, mean=900, sd=50),
433 | 	          rnorm(N * 0.25, mean=1100, sd=50),
434 | 	          rnorm(N * 0.25, mean=1300, sd=50))
435 | 	data <- randomize(data)
436 | 
437 | } else if (type == 402) {	# quad normal far
438 | 	outliers <- "N"
439 | 	data <- c(rnorm(N * 0.25, mean=400, sd=60),
440 | 	          rnorm(N * 0.25, mean=800, sd=60),
441 | 	          rnorm(N * 0.25, mean=1200, sd=60),
442 | 	          rnorm(N * 0.25, mean=1600, sd=60))
443 | 	data <- randomize(data)
444 | 
445 | } else if (type == 403) {	# quad normal outliers
446 | 	outliers <- "Y"
447 | 	data <- c(rnorm(N * 0.25, mean=700, sd=50),
448 | 	          rnorm(N * 0.25, mean=900, sd=50),
449 | 	          rnorm(N * 0.25, mean=1100, sd=50),
450 | 	          rnorm(N * 0.24, mean=1300, sd=50),
451 | 	          runif(N * 0.01, min=1000, max=5000))
452 | 	data <- randomize(data)
453 | 
454 | } else if (type >= 1000) {	# relative to type num
455 | 	set.seed(type)
456 | 	outliers <- "?"
457 | 	d_mean <- runif(1, 1000, 5000)
458 | 	d_sd <- runif(1, 10, 2000)
459 | 	o_ratio <- sample(1:10)[1]
460 | 	o_max <- d_mean + runif(1, 0, d_sd * 5) + runif(1, 0, 10)^5
461 | 	data <- c(rnorm(N * (1 - o_ratio/1000), mean=d_mean, sd=d_sd),
462 | 	          runif(N * o_ratio/1000, min=d_mean, max=o_max))
463 | 	N <- length(data)
464 | 	data <- randomize(data)
465 | }
466 | 


--------------------------------------------------------------------------------
/da/dist.r:
--------------------------------------------------------------------------------
  1 | # dist.r	Distribution Analysis
  2 | #
  3 | # This analyzes data set distributions, both synthetic and actual.  It is
  4 | # especially intended for latency distributions, such as disk I/O latency,
  5 | # to aid in computer performance analysis.
  6 | #
  7 | # This uses the libraries da-common.r, da-libsynth.r, da-libreal.r.
  8 | #
  9 | # Environment variables can be set to control behavior and output: see the
 10 | # environment section below.  These are set by parent shell scripts which
 11 | # execute a series of dist.r runs to generate composite images.
 12 | #
 13 | # I doubt this is a good example of R scripting.  This includes considerable
 14 | # extra complexity for the environment and process it is used in, which won't
 15 | # be apparent from this script alone.
 16 | #
 17 | # Copyright 2013 Brendan Gregg.  All rights reserved.
 18 | #
 19 | # CDDL HEADER START
 20 | #
 21 | # The contents of this file are subject to the terms of the
 22 | # Common Development and Distribution License (the "License").
 23 | # You may not use this file except in compliance with the License.
 24 | #
 25 | # You can obtain a copy of the license at docs/cddl1.txt or
 26 | # http://opensource.org/licenses/CDDL-1.0.
 27 | # See the License for the specific language governing permissions
 28 | # and limitations under the License.
 29 | #
 30 | # When distributing Covered Code, include this CDDL HEADER in each
 31 | # file and include the License file at docs/cddl1.txt.
 32 | # If applicable, add the following below this CDDL HEADER, with the
 33 | # fields enclosed by brackets "[]" replaced with your own identifying
 34 | # information: Portions Copyright [yyyy] [name of copyright owner]
 35 | #
 36 | # CDDL HEADER END
 37 | #
 38 | # 01-Jun-2013	Brendan Gregg	Created this.
 39 | 
 40 | library(e1071)		# skewness, kurtosis
 41 | library(diptest)	# diptest
 42 | source("da-common.r")
 43 | 
 44 | # input
 45 | type <- 100		# distribution type: see da-libsynth.r and below
 46 | N <- 5000		# target elements
 47 | trim <- 0		# trim data set: 0 none, 1 sd, 2 iqr, 3 maxtrim
 48 | maxtrim <- 0		# max value for use with trim 3
 49 | random <- 1		# randomize data ordering
 50 | png <- 0		# png instead of pdf
 51 | svg <- 0		# svg instead of pdf
 52 | pngheight <- 400	# default png height
 53 | pngwidth <- 600		# default png width
 54 | pdfheight <- 4.5	# default pdf/svg height
 55 | pdfwidth <- 9	 	# default pdf/svg width
 56 | density <- 0		# draw density plot instead of histogram
 57 | denadj <- 0.4		# density adjust parameter
 58 | labels <- 1		# draw chart labels (default on)
 59 | lwidth <- 8		# density line width
 60 | trans <- 0		# transparent background
 61 | rug <- 0		# do rug plot
 62 | outfile <- "dists.pdf"	# output file
 63 | infile <- ""		# input file for dist types 600+
 64 | extra <- 0		# extra tests
 65 | weight <- 0		# density weight
 66 | statlines <- 0		# plot lines for mean, stddev
 67 | plines <- 0		# plot lines for 90th, 99th, 99.9th percentiles
 68 | symlink <- 0		# create encoded symlinks
 69 | fill <- 0		# polygon fill
 70 | numbered <- 0		# add value to right of plot
 71 | num_mvalue <- 1		# that value is mvalue
 72 | num_maxsigma <- 0	# that value is maxsigma
 73 | num_max <- 0		# that value is max
 74 | num_factor <- 1000000	# factor for max value
 75 | centermean <- 0		# center mean in plot
 76 | 
 77 | # labels
 78 | mtitle <- "Latency Distribution"
 79 | xtitle <- "Disk I/O latency (us)"
 80 | 
 81 | # environment
 82 | if ((env <- Sys.getenv("N")) != "") { N <- as.numeric(env) }
 83 | if ((env <- Sys.getenv("TYPE")) != "") { type <- as.numeric(env) }
 84 | if ((env <- Sys.getenv("TRIM")) != "") { trim <- as.numeric(env) }
 85 | if ((env <- Sys.getenv("MAXTRIM")) != "") { maxtrim <- as.numeric(env) }
 86 | if ((env <- Sys.getenv("PNG")) != "") { png <- as.numeric(env) }
 87 | if ((env <- Sys.getenv("SVG")) != "") { svg <- as.numeric(env) }
 88 | if ((env <- Sys.getenv("LABELS")) != "") { labels <- as.numeric(env) }
 89 | if ((env <- Sys.getenv("DENSITY")) != "") { density <- as.numeric(env) }
 90 | if ((env <- Sys.getenv("LWD")) != "") { lwidth <- as.numeric(env) }
 91 | if ((env <- Sys.getenv("TRANS")) != "") { trans <- as.numeric(env) }
 92 | if ((env <- Sys.getenv("RUG")) != "") { rug <- as.numeric(env) }
 93 | if ((env <- Sys.getenv("FILL")) != "") { fill<- as.numeric(env) }
 94 | if ((env <- Sys.getenv("OUTFILE")) != "") { outfile <- env }
 95 | if ((env <- Sys.getenv("INFILE")) != "") { infile <- env }
 96 | if ((env <- Sys.getenv("RANDOM")) != "") { random <- as.numeric(env) }
 97 | if ((env <- Sys.getenv("EXTRA")) != "") { extra <- as.numeric(env) }
 98 | if ((env <- Sys.getenv("SYMLINK")) != "") { symlink <- as.numeric(env) }
 99 | if ((env <- Sys.getenv("STATLINES")) != "") { statlines <- as.numeric(env) }
100 | if ((env <- Sys.getenv("WEIGHT")) != "") { weight <- as.numeric(env) }
101 | if ((env <- Sys.getenv("PNGWIDTH")) != "") { pngwidth <- as.numeric(env) }
102 | if ((env <- Sys.getenv("PNGHEIGHT")) != "") { pngheight <- as.numeric(env) }
103 | if ((env <- Sys.getenv("PDFWIDTH")) != "") { pdfwidth <- as.numeric(env) }
104 | if ((env <- Sys.getenv("PDFHEIGHT")) != "") { pdfheight <- as.numeric(env) }
105 | 
106 | if (png) {
107 | 	if (outfile == "dists.pdf") { outfile <- "dists.png" }
108 | 	if ((pngheight < 200) & labels) { pngheight <- pngheight + 140; }
109 | 	png(outfile, pngwidth, pngheight)
110 | } else if (svg) {
111 | 	if (outfile == "dists.pdf") { outfile <- "dists.svg" }
112 | 	svg(outfile, width=pdfwidth, height=pdfheight)
113 | } else {
114 | 	pdf(outfile, w=pdfwidth, h=pdfheight)
115 | }
116 | if (!labels) {
117 | 	mtitle <- ' '; xtitle <- ' '; ytitle <- ' '
118 | 	par(bty = "n")
119 | 	if (numbered) {
120 | 		par(mai = c(0,0,0,1.5))
121 | 	} else {
122 | 		par(mai = c(0,0,0,0))
123 | 	}
124 | } else {
125 | 	par(mgp = c(2,0.5,0))
126 | 	#par(cex = 2)
127 | 	if (numbered) {
128 | 		par(mar = c(4,3.5,3,3))
129 | 	} else {
130 | 		par(mar = c(4,3.5,3,2))
131 | 	}
132 | }
133 | if (trans) { par(bg = NA) }
134 | if (density == 0) { ytitle <- "Frequency" } else { ytitle <- "Density" }
135 | 
136 | # distributions
137 | data <- c()
138 | source("da-libsynth.r")		# defines types 0-499
139 | source("da-libreal.r")		# defines types 500-599
140 | 
141 | if (type == 600) {		# data is column 0 from infile
142 | 	outliers <- "?"
143 | 	input <- read.table(infile, header=FALSE, skip=1, nrows=N)
144 | 	data <- input$V1
145 | 	N <- length(data)
146 | 	if (random) { data <- randomize(data) }
147 | 
148 | } else if (type == 601) {	# data is column 1 from infile
149 | 	outliers <- "?"
150 | 	input <- read.table(infile, header=FALSE, skip=1, nrows=N)
151 | 	data <- input$V2
152 | 	N <- length(data)
153 | 	if (random) { data <- randomize(data) }
154 | 
155 | } else if (length(data) == 0) {
156 | 	printf("ERROR: distribution type %d unknown.\n", type)
157 | 	quit(save = "no")
158 | }
159 | 
160 | # truncate negative
161 | data <- data[data >= 0]
162 | N <- length(data)
163 | 
164 | # pre-trimmed statistics
165 | mean <- mean(data)
166 | stddev <- sd(data)
167 | mad <- mad(data)
168 | iqr <- IQR(data)
169 | median <- median(data)
170 | max <- max(data)
171 | maxsigma <- (max - mean) / stddev
172 | 
173 | # outlier trimming
174 | if (trim == 1) {
175 | 	# +- 2 stddev
176 | 	data <- data[data <= mean + 2 * stddev]
177 | 	data <- data[data >= mean - 2 * stddev]
178 | 	N <- length(data)
179 | } else if (trim == 2) {
180 | 	# like boxplots, keep range IQR +- 1.5 x IQR
181 | 	data <- data[data <= quantile(data, 0.75) + 1.5 * iqr]
182 | 	data <- data[data >= quantile(data, 0.25) - 1.5 * iqr]
183 | 	N <- length(data)
184 | } else if (trim == 3) {
185 | 	data <- data[data <= maxtrim]
186 | 	N <- length(data)
187 | }
188 | 
189 | # post trimmed
190 | mean <- mean(data)
191 | stddev <- sd(data)
192 | 
193 | # plot histogram
194 | if (density == 0) {
195 | 	hist <- hist(data,
196 | 	    breaks = 100,
197 | 	    col = "gray90", 
198 | 	    main = mtitle,
199 | 	    xlab = xtitle,
200 | 	    ylab = ytitle)
201 | 	if (rug) {
202 | 		rug(data, lwd=lwidth, col="black", ticksize=0.032)
203 | 	}
204 | 	maxden <- max(hist$counts)
205 | } else {
206 | 	# prepare density plots
207 | 	den <- density(data, adjust = denadj)
208 | 	if (weight) { den$y <- den$y * den$x }
209 | 	maxden <- max(den$y)
210 | 	if (trim == 3) {
211 | 		xlim <- c(0, maxtrim)
212 | 	} else {
213 | 		if (centermean) {
214 | 			xlim <- c(mean - (max - mean), max)
215 | 			xlim <- c(mean - 3.5 * stddev, mean + 3.5 * stddev) 
216 | 		} else {
217 | 			xlim <- c(min(den$x), max(den$x))
218 | 		}
219 | 	}
220 | 
221 | 	# ylim is scaled by 1.05 so top can be cropped.
222 | 	# the lwd=8 plot can exceed 1.05 for sharp points
223 | 	ylim <- c(0, 1.05 * maxden)
224 | }
225 | 
226 | # density plot
227 | if (density == 1) {
228 | 	set.seed(mean + median + stddev)
229 | 	col <- "white"
230 | 	trans <- 240
231 | 
232 | 	# customize color here
233 | 
234 | 	# pink/magenta ++ / green/aqua	<-- node.js cost
235 | 	#col <- rgb(
236 | 	#    0,
237 | 	#    80 + sample(seq(1:100), 1),
238 | 	#    60 + sample(seq(1:65), 1),
239 | 	#    trans, maxColorValue = 255)
240 | 
241 | 	# purple/violet ++ / green/brown  <-- mysql cost
242 | 	#col <- rgb(
243 | 	#    70 + sample(seq(1:65), 1),
244 | 	#    90 + sample(seq(1:100), 1),
245 | 	#    0,
246 | 	#    trans, maxColorValue = 255)
247 | 
248 | 	# orange ++ / blue/turquoise	<-- disk cost
249 | 	#col <- rgb(
250 | 	#    0,
251 | 	#    80 + sample(seq(1:150), 1),
252 | 	#    255,
253 | 	#    trans, maxColorValue = 255)
254 | 
255 | 	# dark blue / yellow ++
256 | 	#v1 <- 220 + sample(seq(1:35), 1)
257 | 	#v2 <- v1 - 100 - sample(seq(1:115), 1)
258 | 	#col <- rgb(v1, v1, v2, maxColorValue = 255)
259 | 
260 | 	# dark yellow ++ / light blue  <-- synth yellow
261 | 	#v1 <- 255 - sample(seq(1:50), 1)
262 | 	#v2 <- sample(seq(1:65), 1)
263 | 	#col <- rgb(v2 + 5, v2 + 30, v1, trans, maxColorValue = 255)
264 | 
265 | 	# magenta / light green
266 | 	#v1 <- 230 + sample(seq(1:25), 1)
267 | 	#v2 <- v1 - 60 - sample(seq(1:110), 1)
268 | 	#col <- rgb(v2, v1, v2, maxColorValue = 255)
269 | 
270 | 	# green trans ++ / magenta   <-- node.js
271 | 	#v1 <- 220 + sample(seq(1:35), 1)
272 | 	#v2 <- v1 - 90 - sample(seq(1:60), 1)
273 | 	#col <- rgb(v1, v2, v1, trans, maxColorValue = 255)
274 | 
275 | 	# red trans ++ / aqua   <-- disk
276 | 	#v1 <- 230 + sample(seq(1:25), 1)
277 | 	#v2 <- sample(seq(1:125), 1)
278 | 	#col <- rgb(v2, v1, v1, trans, maxColorValue = 255)
279 | 
280 | 	# blue trans ++ / yellow   <-- mysql
281 | 	v1 <- 180 + sample(seq(1:55), 1)
282 | 	v2 <- v1 - 100 - sample(seq(1:80), 1)
283 | 	col <- rgb(v1, v1 - 20, v2, trans, maxColorValue = 255)
284 | 
285 | 	# turquoise / pink
286 | 	#v1 <- 230 + sample(seq(1:25), 1)
287 | 	#v2 <- v1 - 60 - sample(seq(1:90), 1)
288 | 	#col <- rgb(v1, v2, v2, maxColorValue = 255)
289 | 
290 | 	plot(den, main = mtitle, xlab = xtitle, ylab = ytitle,
291 | 	    lwd = lwidth, fg = NA, xlim = xlim, ylim = ylim)
292 | 	if (fill) {
293 | 		polygon(c(min(den$x), den$x, max(den$x)),
294 | 		    c(0, den$y, 0), col = col)
295 | 		plot(den, main = mtitle, xlab = xtitle, ylab = ytitle,
296 | 		    lwd = lwidth, fg = NA, xlim = xlim, ylim = ylim,
297 | 		    col = "white")
298 | 	}
299 | 	if (rug) {
300 | 		rug(data, lwd = lwidth, ticksize = 0.046, col = col,
301 | 		    xlim = xlim)
302 | 	}
303 | 
304 | # frequency trail
305 | } else if (density == 2) {
306 | 	# walk the density values and maintain a state based on
307 | 	# y height, drawing lines or rugs when the state changes.
308 | 	plot(den, main = mtitle, xlab = xtitle, ylab = ytitle,
309 | 	    lwd = 1, col = NA, fg = NA, xlim = xlim, ylim = ylim)
310 | 	state <- 0	# 0 line, 1 rug
311 | 	bx <- den$x[1]
312 | 	by <- den$y[1]
313 | 	minden <- min(den$y)
314 | 	maxx <- max(den$x)
315 | 	threshold = 3 * maxden / pngheight
316 | 
317 | 	for (i in 1:512) { 
318 | 		if (i == 512) {
319 | 			# force plot on final point
320 | 			if (state == 0) { den$y[i] = 0 }
321 | 			if (state == 1) { den$y[i] = 1.1 * threshold }
322 | 		}
323 | 
324 | 		if (den$y[i] > threshold) {
325 | 			if (state == 1) {
326 | 				if (rug) {
327 | 					rdata <- data[data >= bx]
328 | 					rdata <- rdata[data < den$x[i]]
329 | 					rug(rdata, lwd=lwidth,
330 | 					    ticksize <- 0.049,
331 | 					    col="black", xlim=xlim)
332 | 				}
333 | 				state <- 0
334 | 				bx <- den$x[i]
335 | 				by <- den$y[i]
336 | 			} 
337 | 
338 | 		} else {
339 | 			if (state == 0) {
340 | 				nn <- 1 + round(512 * (den$x[i] - bx) / maxx)
341 | 				sden <- density(data, adjust = denadj, n = nn,
342 | 				    from = bx, to = den$x[i], cut=0)
343 | 				if (weight) { sden$y <- sden$y * sden$x }
344 | 				if (fill) {
345 | 					polygon(c(bx, sden$x, den$x[i]),
346 | 					    c(0, sden$y, 0), col = "white")
347 | 				}
348 | 				lines(sden, main = " ", lwd = lwidth,
349 | 				    fg = NA, col = "black",
350 | 				    xlim=xlim, ylim=ylim)
351 | 
352 | 				state <- 1
353 | 				bx <- den$x[i]
354 | 				by <- den$y[i]
355 | 			}
356 | 		}
357 | 	}
358 | }
359 | 
360 | # calculate statistics
361 | min <- min(data)
362 | max <- max(data)
363 | mad <- mad(data)
364 | var <- var(data)
365 | percentiles <- quantile(data, c(0.9, 0.99, 0.999, 0.9999, 0.99999, 0.999999))
366 | apercentiles <- quantile(data, seq(0.01, 0.99, 0.01), names = TRUE)
367 | quartiles <- quantile(data, c(0.25, 0.75), names = FALSE)
368 | iqr <- IQR(data)
369 | prange = apercentiles[55] - apercentiles[45]
370 | median <- median(data)
371 | skewness <- skewness(data)
372 | kurtosis <- kurtosis(data)
373 | diptest <- dip(data)
374 | pstddev <- stddev * sqrt((N - 1) / N)
375 | maxsigma <- (max - mean) / stddev
376 | minsigma <- (mean - min) / stddev
377 | madmax <- (max - median) / mad
378 | madmin <- (median - min) / mad
379 | bimodalc <- ((skewness^2) + 1) / kurtosis
380 | bimodalcf <- ((skewness^2) + 1) /
381 |     (kurtosis + 3 * ((N - 1)^2) / ((N - 2) * (N - 3)))
382 | cov <- stddev / mean
383 | 
384 | # calculate madv, macdf, sacdf
385 | madv <- 0
386 | macdf <- 0
387 | sacdf <- 0
388 | stddevconn <- 0
389 | for(i in 1:N) { 
390 | 	if (i > 1) {
391 | 		d <- abs(data[i] - data[i - 1])
392 | 		macdf <- macdf + d
393 | 		sacdf <- sacdf + d^2
394 | 	}
395 | 	if (data[i] > (mean - stddev/2) & (data[i] < (mean + stddev/2))) {
396 | 		stddevconn <- stddevconn + 1
397 | 	}
398 | 	madv <- madv + abs(data[i] - mean)
399 | }
400 | stddevcon <- stddevconn / N
401 | macdf <- macdf / (N - 1)
402 | sacdf <- sqrt(sacdf / (N - 1))
403 | madv <- madv / N
404 | 
405 | # calculate mvalue
406 | maxmvalue <- 0
407 | for (a in c(2, 3, 5, 7, 10, 15, 20, 30)) {
408 | 	# try various bandwidths, starting at 2x, and keep highest mvalue
409 | 	by <- 0
410 | 	mvalue <- 0
411 | 	den <- density(data, adjust =  denadj * a)
412 | 	if (weight) { den$y <- den$y * den$x }
413 | 	maxd <- max(den$y)
414 | 	for (i in 1:length(den$x)) {
415 | 		mvalue <- mvalue + abs(den$y[i] / maxd - by)
416 | 		by <- den$y[i] / maxd
417 | 	}
418 | 	if (mvalue > maxmvalue) { maxmvalue <- mvalue }
419 | }
420 | mvalue <- maxmvalue
421 | 
422 | # print statistics
423 | printf("\n%-42s %d\n", "N", N)
424 | printf("%-42s %.2f\n", "min", min)
425 | printf("%-42s %.2f\n", "mean", mean)
426 | printf("%-42s %.2f\n", "median", median)
427 | printf("%-42s %.2f\n", "max", max)
428 | printf("%-42s %.2f\n", "max sigma", maxsigma)
429 | printf("%-42s %.2f\n", "min sigma", minsigma)
430 | printf("%-42s %.2f\n", "mad max", madmax)
431 | printf("%-42s %.2f\n", "mad min", madmin)
432 | printf("%-42s %.2f\n", "sample standard deviation", stddev)
433 | printf("%-42s %.2f\n", "population standard deviation", pstddev)
434 | printf("%-42s %.2f\n", "coefficient of variation", cov)
435 | printf("%-42s %.2f\n", "variance/mean", var / mean)
436 | printf("%-42s %.2f\n", "median absolute deviation", mad)
437 | printf("%-42s %.2f\n", "mean absolute deviation", madv)
438 | printf("%-42s %.2f\n", "mean absolute consecutive difference", macdf)
439 | printf("%-42s %.2f\n", "standard absolute consecutive difference", sacdf)
440 | printf("%-42s %.2f\n", "90th percentile", percentiles[1])
441 | printf("%-42s %.2f\n", "99th percentile", percentiles[2])
442 | printf("%-42s %.2f\n", "99.9th percentile", percentiles[3])
443 | printf("%-42s %.2f\n", "99.99th percentile", percentiles[4])
444 | printf("%-42s %.2f\n", "99.999th percentile", percentiles[5])
445 | printf("%-42s %.2f\n", "99.9999th percentile", percentiles[6])
446 | printf("%-42s %.2f\n", "25% quartile", quartiles[1])
447 | printf("%-42s %.2f\n", "75% quartile", quartiles[2])
448 | printf("%-42s %.2f\n", "inter quartile range", iqr)
449 | printf("%-42s %.2f\n", "45%-55% percentile range", prange)
450 | printf("%-42s %.2f\n", "skewness", skewness)
451 | printf("%-42s %.2f\n", "kurtosis", kurtosis)
452 | printf("%-42s %.2f\n", "bimodality coefficient", bimodalc)
453 | printf("%-42s %.2f\n", "bimodality coefficient finite sample", bimodalcf)
454 | printf("%-42s %.3f\n", "dip test statistic", diptest)
455 | printf("%-42s %.3f\n", "mvalue (y difference)", mvalue)
456 | printf("%-42s %.2f\n", "macdf/mean", macdf / mean)
457 | printf("%-42s %.2f\n", "sacdf/mean", sacdf / mean)
458 | printf("%-42s %.2f\n", "madv/stddev", madv / stddev)
459 | 
460 | # print table output
461 | printf("\nHEAD type trim outliers N min mean median max minsigma maxsigma madmax iqr prange mad stddev cov skewness kurtosis bimodalcf madv macdf sacdf stddevcon diptest\n");
462 | printf("DATA %d %d %s %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.3f\n",
463 |     type, trim, outliers, N, min, mean, median, max, minsigma, maxsigma, madmax, iqr, prange, mad, stddev, cov, skewness, kurtosis, bimodalcf, madv, macdf, sacdf, stddevcon, diptest);
464 | 
465 | if (extra) {
466 | 	print(shapiro.test(data))
467 | 	library(ADGofTest)
468 | 	print(ad.test(data, pnorm, 0, max(data)))
469 | 	print(ks.test(data, "pnorm", mean = mean, sd = stddev))
470 | 	print(dip(data, full = "all"))
471 | }
472 | 
473 | if (numbered) {
474 | 	#
475 | 	# Some awful code.  Ideally we'd create a text variable with whatever
476 | 	# we want printed, then mtext() would place it right-aligned at a
477 | 	# _reasonable_ spacing to the plot.  I've never got that to work.
478 | 	# Instead, I let mtext() place it left-aligned, and achieve right-
479 | 	# alignment by padding the text variable with spaces.  Two spaces
480 | 	# for each digit, since it is variable width.
481 | 	#
482 | 
483 | 	if (num_maxsigma) {
484 | 		num <- maxsigma
485 | 		# 2 dec places, up to 99; %5.2f has crooked alignment
486 | 		if (num < 10) { text <- sprintf("  %.2f", num) }
487 | 		else { text <- sprintf("%.2f", num) }
488 | 	}
489 | 
490 | 	if (num_mvalue) {
491 | 		num <- mvalue
492 | 		# 2 dec places, up to 99; %5.2f has crooked alignment
493 | 		if (num < 10) { text <- sprintf("  %.2f", num) }
494 | 		else { text <- sprintf("%.2f", num) }
495 | 	}
496 | 
497 | 	if (num_max) {
498 | 		num <- max / num_factor
499 | 		text <- ""; x <- round(num)
500 | 		if (x == 0) { x <- 1 }
501 | 		while (x < 1000) {
502 | 			text <- paste(text, "  ", sep = "")
503 | 			x <- x * 10
504 | 		}
505 | 		text <- paste(text, sprintf("%d", round(num)), sep = "")
506 | 	}
507 | 	if (num_max) {
508 | 		num <- max / num_factor
509 | 		text <- ""; x <- round(num)
510 | 		if (x == 0) { x <- 1 }
511 | 		while (x < 1000) {
512 | 			text <- paste(text, "  ", sep = "")
513 | 			x <- x * 10
514 | 		}
515 | 		text <- paste(text, sprintf("%d", round(num)), sep = "")
516 | 	}
517 | 
518 | 	# padj = 2 for pngheight 120; 3.85 for pngheight 220; 1.5 centered
519 | 	# col = white for filled; black for trail;
520 | 	mtext(text, side = 4, las = 1, cex = 3, adj = 0.5, padj = 3.85,
521 | 	    col = "white")
522 | }
523 | 
524 | # plot statistics
525 | if (statlines) {
526 | 	abline(v=mean, col="black", lwd=1, lty="dashed")
527 | 	abline(v=mean + stddev, col="black", lty="dotted")
528 | 	abline(v=mean - stddev, col="black", lty="dotted")
529 | 	abline(v=percentiles[2], col="black", lty="1A")
530 | 	abline(v=mean + 6 * stddev, col="black", lty="4A")
531 | 	legend("topright",
532 | 	    c("mean", "stddev", "99th pct", expression(6 * sigma)),
533 | 	    lty=c("dashed", "dotted", "1A", "4A"),
534 | 	    lwd=1)
535 | }
536 | 
537 | if (centermean) {
538 | 	lines(x = c(mean, mean), y = c(0, maxden),
539 | 	    col = "white", lwd = 8, lend = 1)
540 | }
541 | 
542 | if (plines) {
543 | 	lines(x = c(percentiles[1], percentiles[1]), y = c(0, maxden / 4),
544 | 	    col = "white", lwd = 4)
545 | 	lines(x = c(percentiles[2], percentiles[2]), y = c(0, maxden / 4),
546 | 	    col = "white", lwd = 4)
547 | 	lines(x = c(percentiles[3], percentiles[3]), y = c(0, maxden / 4),
548 | 	    col = "white", lwd = 4)
549 | }
550 | 
551 | dev.off()
552 | printf("\n%s written.\n", outfile)
553 | 
554 | # create symlinks
555 | if (symlink) {
556 | 	print("making symlinks...")
557 | 	inf <- basename(infile)
558 | 
559 | 	# create ordered max pngs
560 | 	link <- sprintf("max_%016d_%s%d.png", round(max), inf, type)
561 | 	system(sprintf("ln -s %s %s", outfile, link))
562 | 
563 | 	# create ordered maxsigma pngs
564 | 	link <- sprintf("maxsigma_%03d.%06d_%s%d.png",
565 | 	    floor(maxsigma), round(1000000 * (maxsigma %% 1)), inf, type)
566 | 	system(sprintf("ln -s %s %s", outfile, link))
567 | 
568 | 	# create ordered bimodalcf pngs
569 | 	link <- sprintf("bimodalcf_%03d.%06d_%s%d.png",
570 | 	    floor(bimodalcf), round(1000000 * (bimodalcf %% 1)), inf, type)
571 | 	system(sprintf("ln -s %s %s", outfile, link))
572 | 
573 | 	# create ordered diptest pngs
574 | 	link <- sprintf("diptest_%03d.%06d_%s%d.png",
575 | 	    floor(diptest), round(1000000 * (diptest %% 1)), inf, type)
576 | 	system(sprintf("ln -s %s %s", outfile, link))
577 | 
578 | 	# create ordered cov pngs
579 | 	link <- sprintf("cov_%03d.%06d_%s%d%s.png",
580 | 	    floor(cov), round(1000000 * (cov %% 1)), inf, type, trim)
581 | 	system(sprintf("ln -s %s %s", outfile, link))
582 | 
583 | 	# create ordered mvalue pngs
584 | 	link <- sprintf("ydiff_%03d.%06d_%s%d%s.png",
585 | 	    floor(mvalue), round(1000000 * (mvalue %% 1)), inf, type, trim)
586 | 	system(sprintf("ln -s %s %s", outfile, link))
587 | }
588 | 


--------------------------------------------------------------------------------
/da/waterfall.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/ksh
  2 | #
  3 | # waterfall.sh		Run a series of dist.r plots and create
  4 | #			composite waterfall plots
  5 | #
  6 | # requires: ImageMagick (convert), R.
  7 | #
  8 | # 01-Jun-2013	Brendan Gregg	Created this.
  9 | 
 10 | #
 11 | # Parameters
 12 | #
 13 | 
 14 | # synthetic
 15 | dists="0 1 2"
 16 | dists="$dists 100 101 102 103 110 111 112 120 130 131"
 17 | dists="$dists 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155"
 18 | dists="$dists 200 201 202 203 204 205 206 210 211 212 213"
 19 | dists="$dists 300 301 302 303 304 305 306 307"
 20 | dists="$dists 400 401 402 403"
 21 | dists="$dists 500 501 502 503"
 22 | 
 23 | # plot type
 24 | # trail: density=2 rug=1
 25 | # trailfill: density=1 rug=1 same=1
 26 | # dotfill: density=2 rug=1 same=1
 27 | # dash: density=2 rug=0
 28 | # line: density=1 rug=0
 29 | # rugfill: density=1 rug=1
 30 | # hist: density=0 rug=0
 31 | # histrug: density=0 rug=1
 32 | # density types: 0 = hist, 1 = line, 2 = frequency trail
 33 | density=1
 34 | rug=1
 35 | same=1
 36 | 
 37 | # input
 38 | runtype=1		# 1 = synth, 2 = random, 3 = infiles
 39 | infiles='../dilt03/*'
 40 | dist=601
 41 | trimlist="2" 		# 0 = none, 1 = sd, 2 = iqr, 3 = maxtrim
 42 | maxtrim=1000000
 43 | weight=0
 44 | 
 45 | # execution
 46 | outdir=working
 47 | maxpng=35
 48 | makepng=1
 49 | makecomposites=1
 50 | parallel=5
 51 | stride=1
 52 | N=5000
 53 | 
 54 | # layout
 55 | yoffset=55
 56 | xoffset=0
 57 | ypad=10
 58 | xpad=40
 59 | width=2300
 60 | pngwidth=2400
 61 | pngheight=220
 62 | lwd=2
 63 | 
 64 | #
 65 | # Combinations
 66 | #
 67 | 
 68 | # modal vertical:
 69 | #density=2; rug=1; same=0; trimlist=1; weight=0
 70 | #yoffset=50; xoffset=0; pngheight=220; lwd=8
 71 | 
 72 | # 20ms vertical compact:
 73 | #density=2; rug=1; same=0; trimlist=2; maxtrim=20000; weight=0
 74 | #yoffset=20; xoffset=0; pngheight=220; lwd=8
 75 | 
 76 | # 100ms staggered compact:
 77 | #density=2; rug=1; same=0; trimlist=2; maxtrim=100000; weight=0
 78 | #yoffset=20; xoffset=10; pngheight=220; lwd=8
 79 | 
 80 | # 100ms staggered compact filled - datacenter outliers:
 81 | #density=1; rug=1; same=0; trimlist=2; maxtrim=100000; weight=0
 82 | #yoffset=20; xoffset=10; pngheight=220; lwd=8
 83 | 
 84 | # 20ms diagonal:
 85 | #density=2; rug=1; same=0; trimlist=2; maxtrim=20000; weight=0
 86 | #yoffset=30; xoffset=30; pngheight=220; lwd=8
 87 | 
 88 | # outlier detection
 89 | #density=1; rug=1; same=1; trimlist=0; weight=0
 90 | #yoffset=55; xoffset=0; pngheight=220; lwd=8; N=50000
 91 | 
 92 | # modal colored
 93 | #density=1; rug=1; same=1; trimlist=0; weight=0
 94 | #yoffset=55; xoffset=0; pngheight=220; lwd=2; N=10000	# top yoffset=205
 95 | #incl. white line after polygon
 96 | 
 97 | mkdir -p $outdir
 98 | cd $outdir
 99 | echo output directory: $outdir
100 | 
101 | if (( makepng )); then
102 | 	rm dist_*png
103 | 	rm max_*png
104 | 	rm maxsigma_*png
105 | 	rm bimodalcf_*png
106 | 	rm diptest_*png
107 | 	rm cov_*png
108 | 	rm ydiff_*png
109 | fi
110 | 
111 | # onedistpng
112 | #
113 | # environment: dist trim maxtrim pfile density weight infile maxtrim
114 | #	pngwidth pngheight lwd
115 | #
116 | function onedistpng {
117 | 	echo 'source("../dist.r")' | \
118 | 	    TYPE=$dist TRIM=$trim OUTFILE=$pfile PNG=1 LABELS=0 N=$N \
119 | 	    DENSITY=$density LWD=$lwd RUG=$rug FILL=1 TRANS=1 WEIGHT=$weight \
120 | 	    RANDOM=0 INFILE=$infile MAXTRIM=$maxtrim SYMLINK=1 \
121 | 	    PNGWIDTH=$pngwidth PNGHEIGHT=$pngheight \
122 | 	    R --no-save 2>/dev/null | \
123 | 	    grep DATA | sed 's/[^ ]* //'
124 | 
125 | 	# was +88
126 | 	convert $pfile -crop ${width}x$((pngheight - pngheight/24 - 1))+50+1 \
127 | 	    $pfile
128 | 
129 | 	# same color
130 | 	if (( same )); then convert $pfile -negate $pfile; fi
131 | }
132 | 
133 | # make synthetic dist pngs
134 | function makesynth {
135 | 	for trim in $trimlist; do
136 | 		i=1; j=1
137 | 		for dist in $dists; do
138 | 			if (( i++ > maxpng )); then wait; continue; fi
139 | 			if (( $trim )); then
140 | 				pfile=`printf "dist_%03dt.png" $dist`
141 | 			else
142 | 				pfile=`printf "dist_%03df.png" $dist`
143 | 			fi
144 | 			onedistpng &
145 | 			if (( j++ >= $parallel )); then j=1; wait; fi
146 | 		done
147 | 		wait
148 | 	done
149 | }
150 | 
151 | # make random dist pngs
152 | function makerandom {
153 | 	for trim in $trimlist; do
154 | 		i=1; j=1
155 | 		while (( i < maxpng )); do
156 | 			if (( $trim )); then
157 | 				pfile=`printf "dist_%03dt.png" $i`
158 | 			else
159 | 				pfile=`printf "dist_%03df.png" $i`
160 | 			fi
161 | 			(( dist = 1030 + i ))
162 | 			onedistpng &
163 | 			if (( j++ >= $parallel )); then j=1; wait; fi
164 | 			if (( i++ >= maxpng )); then wait; continue; fi
165 | 		done
166 | 		wait
167 | 	done
168 | }
169 | 
170 | # make actual dist pngs
171 | function makereal {
172 | 	for trim in $trimlist; do
173 | 		i=1; j=1
174 | 		for infile in $infiles; do
175 | 			if (( i++ > maxpng )); then wait; continue; fi
176 | 			if (( $trim )); then
177 | 				pfile=dist_${infile##*/}t.png
178 | 			else
179 | 				pfile=dist_${infile##*/}f.png
180 | 			fi
181 | 			onedistpng &
182 | 			if (( j++ >= $parallel )); then j=1; wait; fi
183 | 		done
184 | 		wait
185 | 	done
186 | }
187 | 
188 | # main
189 | if (( makepng )); then
190 | 	(( runtype == 1 )) && makesynth
191 | 	(( runtype == 2 )) && makerandom
192 | 	(( runtype == 3 )) && makereal
193 | fi
194 | if (( !makecomposites )); then exit; fi
195 | 
196 | # makecomposite
197 | #
198 | # environment: ypad xpad yoffset xoffset
199 | # input: name
200 | #
201 | function makecomposite {
202 | 	name=$1
203 | 	(( maxy = pngheight + ypad * 2 ))
204 | 	(( maxx = width + xpad * 2 - xoffset ))
205 | 	i=1; j=1; y=$ypad; files=
206 | 
207 | 	for f in ${name}_*png; do
208 | 		if (( j++ >= stride )); then j=1; else continue; fi
209 | 		if (( i++ > maxpng )); then break; fi
210 | 
211 | 		(( maxy += yoffset ))
212 | 		(( maxx += xoffset ))
213 | 		files="$files $f"
214 | 	done
215 | 
216 | 	(( x = maxx - width - xpad ))
217 | 	dest=waterfall_${name}.png
218 | 	im=""
219 | 
220 | 	for f in $files; do
221 | 		if [ -e $f ]; then
222 | 			im="$im $f -geometry +$x+$y -composite"
223 | 		fi
224 | 		(( y += yoffset ))
225 | 		(( x -= xoffset ))
226 | 	done
227 | 
228 | 	echo making composite $dest
229 | 	convert -size ${maxx}x$maxy canvas:transparent $im $dest
230 | }
231 | 
232 | # make composites
233 | for trim in $trimlist; do eval trim$trim=1; done
234 | (( trim1 || trim2 )) && makecomposite bimodalcf &
235 | (( trim1 || trim2 )) && makecomposite diptest &
236 | (makecomposite max) &
237 | (makecomposite maxsigma) &
238 | (makecomposite cov) &
239 | (makecomposite ydiff) &
240 | wait
241 | 
242 | function negate {
243 | 	file=$1
244 | 	white=white$2
245 | 	black=black$2
246 | 	gray=gray$2
247 | 	whitefill=whitefill$2
248 | 	blackfill=blackfill$2
249 | 	whitefile=${file%.png}_$white.png
250 | 	blackfile=${file%.png}_$black.png
251 | 	grayfile=${file%.png}_$gray.png
252 | 	whitefillfile=${file%.png}_$whitefill.png
253 | 	blackfillfile=${file%.png}_$blackfill.png
254 | 	if (( same )); then
255 | 		convert $file -background white -alpha remove -alpha off \
256 | 		    $whitefillfile
257 | 		convert $whitefillfile -negate $blackfillfile
258 | 		return
259 | 	fi
260 | 	convert $file -background white -alpha remove -alpha off $whitefile
261 | 	convert $whitefile -negate $blackfile
262 | 	convert $file -background '#909090' -alpha remove -alpha off $grayfile
263 | }
264 | 
265 | function rugnegate {
266 | 	file=$1
267 | 	white=white$2
268 | 	black=black$2
269 | 	whitefill=whitefill$2
270 | 	blackfill=blackfill$2
271 | 	nfile=${file%.png}_negate.png
272 | 	whitefile=${file%.png}_$white.png
273 | 	blackfile=${file%.png}_$black.png
274 | 	whitefillfile=${file%.png}_$whitefill.png
275 | 	blackfillfile=${file%.png}_$blackfill.png
276 | 	if (( same )); then
277 | 		convert $file -background white -alpha remove -alpha off \
278 | 		    $whitefillfile
279 | 		convert $whitefillfile -negate $blackfillfile
280 | 		return
281 | 	fi
282 | 	convert $file -negate $nfile
283 | 	convert $nfile -background white -alpha remove -alpha off $whitefile
284 | 	convert $whitefile -negate $blackfile
285 | }
286 | 
287 | # negations
288 | echo making negations
289 | if (( rug && density < 2 )); then
290 | 	nfunc=rugnegate; name=rugfill
291 | elif (( density == 2 )); then
292 | 	nfunc=negate; name=dot
293 | else
294 | 	nfunc=negate; name=line
295 | fi
296 | $nfunc waterfall_max.png $name &
297 | $nfunc waterfall_maxsigma.png $name &
298 | (( trmi1 || trim2 )) && $nfunc waterfall_bimodalcf.png $name &
299 | (( trmi1 || trim2 )) && $nfunc waterfall_diptest.png $name &
300 | $nfunc waterfall_cov.png $name &
301 | $nfunc waterfall_ydiff.png $name &
302 | wait
303 | 


--------------------------------------------------------------------------------
/data.txt:
--------------------------------------------------------------------------------
 1 | N	Result
 2 | 1	8.672
 3 | 2	17.48
 4 | 3	26.9
 5 | 4	35.28
 6 | 5	41.96
 7 | 6	48.22
 8 | 7	53.62
 9 | 8	57.2
10 | 9	59.26
11 | 10	60.16
12 | 11	61.46
13 | 12	62.4
14 | 13	63.66
15 | 14	64.38
16 | 15	65.36
17 | 16	65.9
18 | 


--------------------------------------------------------------------------------
/frequencytrail.r:
--------------------------------------------------------------------------------
 1 | # frequencytrail.r	Example frequency trail implementation.
 2 | #
 3 | # This implementation takes a density plot with a high resolution, and
 4 | # removes lines that are below a minimum threshold, by setting their value
 5 | # to NA.  This hides the zero probability line, and provide a coarse (but
 6 | # probably sufficient) view of distribution outliers.
 7 | #
 8 | # 08-Jun-2013	Brendan Gregg	Created this.
 9 | 
10 | pdf("frequencytrailtest.pdf", w=8, h=4)
11 | 
12 | # plot a data frame as a frequency trail
13 | plotfrequencytrail <- function(data) {
14 | 	n <- 2048				# resolution
15 | 	lwd <- 4				# line width
16 | 
17 | 	# threshold. todo: improve this calculation to be more robust.
18 | 	thr <- 1 / (sd(data) * length(data))
19 | 
20 | 	den <- density(data, n=n)
21 | 	plot(den, col=NA, fg=NA)
22 | 
23 | 	# replace low frequency with NA to avoid plotting
24 | 	for (i in 1:n) { if (den$y[i] < thr) { den$y[i] = NA } }
25 | 
26 | 	lines(den, lwd=lwd)
27 | }
28 | 
29 | # data set is a normal distribution plus outliers
30 | data <- c(rnorm(9900, mean=1000, sd=100),
31 | 	  runif(10, min=2000, max=10000))
32 | 
33 | plotfrequencytrail(data)
34 | 


--------------------------------------------------------------------------------
/frequencytrailtest.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brendangregg/PerfModels/8612f83119869e510e29196c6c49743445ae4559/frequencytrailtest.pdf


--------------------------------------------------------------------------------
/linear.g:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env gnuplot
 2 | #
 3 | # linear.g	Linear scalability model using gnuplot.
 4 | #
 5 | # This applies a linear scalability model to an input data set.  It uses
 6 | # regression analysis to determine the constants.  Two linear functions
 7 | # are plotted: Linear(N), which fits the input set; and LinearN1(N), which
 8 | # uses N=1 only.
 9 | #
10 | # USAGE: ./linear.g
11 | #
12 | # See the "tunables" section for defining the input data file, and the number
13 | # of rows to include as model input.  The remainder of rows are drawn as
14 | # "extra" data points.  The file has the form:
15 | #
16 | # N	Result
17 | # 1	2.1
18 | # 2	4.0
19 | # 3	5.9
20 | # ...
21 | #
22 | # The row order can be rearranged to customize the model input.
23 | #
24 | # Copyright 2012 Brendan Gregg.  All rights reserved.
25 | #
26 | # CDDL HEADER START
27 | #
28 | # The contents of this file are subject to the terms of the
29 | # Common Development and Distribution License (the "License").
30 | # You may not use this file except in compliance with the License.
31 | #
32 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
33 | # or http://www.opensolaris.org/os/licensing.
34 | # See the License for the specific language governing permissions
35 | # and limitations under the License.
36 | #
37 | # When distributing Covered Code, include this CDDL HEADER in each
38 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
39 | # If applicable, add the following below this CDDL HEADER, with the
40 | # fields enclosed by brackets "[]" replaced with your own identifying
41 | # information: Portions Copyright [yyyy] [name of copyright owner]
42 | #
43 | # CDDL HEADER END
44 | #
45 | # 03-May-2012	Brendan Gregg	Created this.
46 | 
47 | set terminal x11 font "arial,14"	# designed for x11 (redraws)
48 | set autoscale
49 | 
50 | # tunables
51 | filename = "data.txt"		# data file
52 | inputN = 6			# rows to include as model input
53 | scale = 1.5			# scale graph beyond data points
54 | set grid
55 | 
56 | set xlabel "CPUs (N)"
57 | set ylabel "Throughput"
58 | set title "Linear Scalability"
59 | set key on right bottom
60 | set pointsize 2
61 | 
62 | # read N1, the first value for normalizing the plot (workaround)
63 | plot filename using 1:(N1 = $2, 0/0) every 1:1:1:0:1:0 notitle, '' using 1:($2 / N1) with linespoints
64 | 
65 | # Linear, N1 only
66 | linearN1(N) = N1 * N
67 | 
68 | # Linear, input set
69 | alpha = 0.9
70 | linear(N) = N1 * alpha * N
71 | 
72 | # regression fitting
73 | fit linear(x) filename every ::1::inputN using 1:2 via alpha
74 | 
75 | # plot data points
76 | plot filename using 1:2 with points pt 6 lc rgb "#f00000" title "extra measurements",\
77 | 	filename every ::1::inputN using 1:2 with points pt 6 lc rgb "#000000" title "input for Linear(N)"
78 | set label sprintf("a = %.4f", alpha) at graph 0.5, 0.075 center
79 | set yrange [0:GPVAL_DATA_Y_MAX * scale]
80 | set xrange [0:GPVAL_DATA_X_MAX * scale]
81 | 
82 | # plot curves
83 | replot linear(x) with line lc rgb "#000000" title "Linear(N)"
84 | replot linearN1(x) with line lc rgb "#a0a0a0" title "LinearN1(N)"
85 | 
86 | pause -1 "Hit return to continue"
87 | 


--------------------------------------------------------------------------------
/scale.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brendangregg/PerfModels/8612f83119869e510e29196c6c49743445ae4559/scale.pdf


--------------------------------------------------------------------------------
/scale.r:
--------------------------------------------------------------------------------
 1 | # scale.r	Amdahl's law and USL scalability using R statistics.
 2 | #
 3 | # This applies both Amdahl's law to model scalability (maximum speedup) and
 4 | # Universal Scalability Law to the input data set.  It uses regression
 5 | # analysis to determine the constants.
 6 | #
 7 | # USAGE: R --save < scale.r		# generates scale.pdf
 8 | #
 9 | # See the "Tunables" section for defining the input data file, and the number
10 | # of rows to include as model input.  The remainder of rows are drawn as
11 | # "extra" data points.  The file has the form:
12 | #
13 | # N	Result
14 | # 1	2.1
15 | # 2	4.0
16 | # 3	5.9
17 | # ...
18 | #
19 | # The heading line is important (processed by R).
20 | #
21 | # BASED ON: USLcalc.r by Dr. Neil Gunther.
22 | #
23 | # SEE ALSO: http://www.perfdynamics.com/Manifesto/USLscalability.html
24 | #
25 | # Copyright 2012 Brendan Gregg.  All rights reserved.
26 | #
27 | # CDDL HEADER START
28 | #
29 | # The contents of this file are subject to the terms of the
30 | # Common Development and Distribution License (the "License").
31 | # You may not use this file except in compliance with the License.
32 | #
33 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
34 | # or http://www.opensolaris.org/os/licensing.
35 | # See the License for the specific language governing permissions
36 | # and limitations under the License.
37 | #
38 | # When distributing Covered Code, include this CDDL HEADER in each
39 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
40 | # If applicable, add the following below this CDDL HEADER, with the
41 | # fields enclosed by brackets "[]" replaced with your own identifying
42 | # information: Portions Copyright [yyyy] [name of copyright owner]
43 | #
44 | # CDDL HEADER END
45 | #
46 | # 20-Oct-2012	Brendan Gregg	Created this.
47 | 
48 | # Tunables
49 | filename <- "data.txt"		# data file (see top comment for format)
50 | inputN <- 10			# rows to include as model input
51 | padding <- 1.1			# chart padding
52 | pdf("scale.pdf", w=10, h=6)	# comment for interactive
53 | 
54 | # Input
55 | input_full <- read.table(filename, header=TRUE)
56 | input_model <- subset(input_full, input_full$N <= inputN)
57 | input_extra <- subset(input_full, input_full$N > inputN)
58 | 
59 | # Calculate normalization rate based on 1st datum
60 | input_model$Norm <- input_model$Result/input_model$Result[1]
61 | 
62 | # Regression analysis: standard non-linear least squares (NLS) fit
63 | amdahl <- nls(Norm ~ N / (1 + alpha * (N - 1)),
64 |     input_model, start=c(alpha=0.1))
65 | usl <- nls(Norm ~ N / (1 + alpha * (N - 1) + beta * N * (N - 1)),
66 |     input_model, start=c(alpha=0.1, beta=0.01))
67 | 
68 | # Print parameters
69 | print(summary(amdahl))
70 | print(coef(amdahl))
71 | amdahls.coef <- coef(amdahl)
72 | print(summary(usl))
73 | print(coef(usl))
74 | usls.coef <- coef(usl)
75 | 
76 | # Chart padding
77 | max_x <- padding * max(input_full$N)
78 | max_y <- padding * max(input_full$Result)
79 | 
80 | # Plot model results
81 | plot(x <- c(0:max_x), input_model$Result[1] * x /
82 |     (1 + usls.coef['alpha'] * (x - 1) + usls.coef['beta'] * x * (x - 1)),
83 |     type="l", lty=2, lwd=1,
84 |     xlim=c(0, max_x), ylim=c(0, max_y),
85 |     xlab="CPUs (N)", ylab="Throughput X(N)")
86 | points(x <- c(0:max_x),
87 |     input_model$Result[1] * x / (1 + amdahls.coef['alpha'] * (x - 1)),
88 |     type="l", lty=3, lwd=1)
89 | 
90 | # Plot data
91 | points(input_model$N, input_model$Result, pch=1)
92 | points(input_extra$N, input_extra$Result, pch=4)
93 | 
94 | title("Scalability Models")
95 | legend("bottomright", c("model input", "extra measurements"), pch=c(1,4))
96 | legend("bottom", c("Amdahl", "USL"), lty=c(3,2))
97 | 


--------------------------------------------------------------------------------
/tools/README:
--------------------------------------------------------------------------------
1 | Basic tools.
2 | 


--------------------------------------------------------------------------------
/tools/interval.r:
--------------------------------------------------------------------------------
 1 | # interval.r	R line graph of interval measurements.
 2 | #
 3 | # USAGE: R --no-save < interval.r
 4 | #
 5 | # This time I'm putting it on github where I won't lose it.
 6 | #
 7 | # Input is a single column of measurements, taken at a known interval. The
 8 | # number of input elements, interval, and column number can be customized (see
 9 | # the N, interval, and data variables).
10 | #
11 | # 17-Jun-2014	Brendan Gregg	Created this.
12 | 
13 | filename <- "data.txt"
14 | pdf("interval.pdf", w=10, h=6)
15 | N <- 121			# max number of elements
16 | interval <- 5			# interval
17 | xlab <- "Time (secs)"		# x-axis label
18 | ylab <- "Measurement"		# y-axis label
19 | title <- "Plot of data.txt"	# plot title
20 | 
21 | input <- read.table(filename, header=FALSE, nrows=N)
22 | data <- input$V2		# use 2nd column
23 | xaxis <- seq(0, N * interval - interval, interval)
24 | 
25 | # type: p=points, l=lines, n=none, o=overplotted, b=both; cex=size
26 | plot(xaxis, data, main=title, type="o", cex=0.6, xlab=xlab, ylab=ylab)
27 | 
28 | grid(col = "lightgray", lty = "dotted", lwd = par("lwd"), equilogs = TRUE)
29 | 
30 | 


--------------------------------------------------------------------------------
/tools/scatter.r:
--------------------------------------------------------------------------------
 1 | # scatter.r	R scatter plot hello world.
 2 | #
 3 | # USAGE: R --no-save < scatter.r
 4 | #
 5 | # This time I'm putting it on github where I won't lose it.
 6 | #
 7 | # Input is two columns, for time (seconds) and latency (ms).
 8 | #
 9 | # 17-Jun-2014	Brendan Gregg	Created this.
10 | 
11 | filename <- "scatter.txt"
12 | pdf("scatter.pdf", w=10, h=5)
13 | 
14 | # max rows to use
15 | N <- 10000
16 | 
17 | data <- read.table(filename, header=FALSE, nrows=N)
18 | N <- length(data)
19 | 
20 | # type: p=points, l=lines, n=none, o=overplotted, b=both
21 | plot(data, cex=0.5, xlab="Time (s)", ylab="Latency (ms)")
22 | 
23 | grid(col = "lightgray", lty = "dotted",
24 |      lwd = par("lwd"), equilogs = TRUE)
25 | 


--------------------------------------------------------------------------------
/usl.g:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env gnuplot
 2 | #
 3 | # usl.g		USL using gnuplot.
 4 | #
 5 | # This applies Universal Scalability Law (Dr. Neil J. Gunther) to the input
 6 | # data set.  It uses regression analysis to determine the constants.
 7 | #
 8 | # USAGE: ./usl.g
 9 | #
10 | # See the "tunables" section for defining the input data file, and the number
11 | # of rows to include as model input (USL insists on a minimum of six).  The
12 | # remainder of rows are drawn as "extra" data points.  The file has the form:
13 | #
14 | # N	Result
15 | # 1	2.1
16 | # 2	4.0
17 | # 3	5.9
18 | # ...
19 | #
20 | # The row order can be rearranged to customize the model input.
21 | #
22 | # SEE ALSO: http://www.perfdynamics.com/Manifesto/USLscalability.html
23 | #
24 | # Copyright 2012 Brendan Gregg.  All rights reserved.
25 | #
26 | # CDDL HEADER START
27 | #
28 | # The contents of this file are subject to the terms of the
29 | # Common Development and Distribution License (the "License").
30 | # You may not use this file except in compliance with the License.
31 | #
32 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
33 | # or http://www.opensolaris.org/os/licensing.
34 | # See the License for the specific language governing permissions
35 | # and limitations under the License.
36 | #
37 | # When distributing Covered Code, include this CDDL HEADER in each
38 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
39 | # If applicable, add the following below this CDDL HEADER, with the
40 | # fields enclosed by brackets "[]" replaced with your own identifying
41 | # information: Portions Copyright [yyyy] [name of copyright owner]
42 | #
43 | # CDDL HEADER END
44 | #
45 | # 03-May-2012	Brendan Gregg	Created this.
46 | 
47 | set terminal x11 font "arial,14"	# designed for x11 (redraws)
48 | set autoscale
49 | 
50 | # tunables
51 | filename = "data.txt"		# data file
52 | inputN = 6			# rows to include as model input
53 | scale = 1.5			# scale graph beyond data points
54 | set grid
55 | 
56 | set xlabel "CPUs (N)"
57 | set ylabel "Throughput"
58 | set title "USL Scalability"
59 | set key on right bottom
60 | set pointsize 2
61 | 
62 | # read N1, the first value for normalizing the plot (workaround)
63 | plot filename using 1:(N1 = $2, 0/0) every 1:1:1:0:1:0 notitle, '' using 1:($2 / N1) with linespoints
64 | 
65 | # USL
66 | alpha = 0.01
67 | beta = 0.001
68 | usl(N) = N1 * N/(1 + alpha * (N - 1) + beta * N * (N - 1))
69 | 
70 | # regression analysis (non-linear least squares fitting)
71 | fit usl(x) filename every ::1::inputN using 1:2 via alpha, beta
72 | 
73 | # plot data points
74 | plot filename using 1:2 with points pt 6 lc rgb "#f00000" title "extra measurements",\
75 | 	filename every ::1::inputN using 1:2 with points pt 6 lc rgb "#000000" title "input for USL"
76 | set label sprintf("a = %.4f\nb = %.4f", alpha, beta) at graph 0.5, 0.075 center
77 | set yrange [0:GPVAL_DATA_Y_MAX * scale]
78 | set xrange [0:GPVAL_DATA_X_MAX * scale]
79 | 
80 | # plot curves
81 | replot usl(x) with line lc rgb "#000000" title "USL(N)"
82 | 
83 | pause -1 "Hit return to continue"
84 | print "$0";
85 | 


--------------------------------------------------------------------------------
/util-md1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brendangregg/PerfModels/8612f83119869e510e29196c6c49743445ae4559/util-md1.pdf


--------------------------------------------------------------------------------
/util-md1.r:
--------------------------------------------------------------------------------
 1 | # util-md1.r	Queueing Theory M/D/1 mean response time vs utilization
 2 | #
 3 | # USAGE: R --save < util-md1.r		# generates util-md1.pdf
 4 | #
 5 | # See the "Tunables" section for defining the mean service time.
 6 | #
 7 | # Copyright 2012 Brendan Gregg.  All rights reserved.
 8 | #
 9 | # CDDL HEADER START
10 | #
11 | # The contents of this file are subject to the terms of the
12 | # Common Development and Distribution License (the "License").
13 | # You may not use this file except in compliance with the License.
14 | #
15 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
16 | # or http://www.opensolaris.org/os/licensing.
17 | # See the License for the specific language governing permissions
18 | # and limitations under the License.
19 | #
20 | # When distributing Covered Code, include this CDDL HEADER in each
21 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
22 | # If applicable, add the following below this CDDL HEADER, with the
23 | # fields enclosed by brackets "[]" replaced with your own identifying
24 | # information: Portions Copyright [yyyy] [name of copyright owner]
25 | #
26 | # CDDL HEADER END
27 | #
28 | # 20-Oct-2012	Brendan Gregg	Created this.
29 | 
30 | # Tunables
31 | svc_ms <- 1				# average disk I/O service time
32 | pdf("util-md1.pdf", w=10, h=6)		# comment for interactive
33 | util_min <- 0
34 | util_max <- 100
35 | ms_min <- 0
36 | ms_max <- 10
37 | 
38 | # Plot mean response time vs utilization (M/D/1)
39 | plot(x <- c(util_min:util_max), svc_ms * (2 - x/100) / (2 * (1 - x/100)),
40 |     type="l", lty=1, lwd=1,
41 |     xlim=c(util_min, util_max), ylim=c(ms_min, ms_max),
42 |     xlab="Utilization %", ylab="Mean Response Time (ms)")
43 | 
44 | # Grids
45 | abline(v=(seq(util_min, util_max, (util_max - util_min) / 10)),
46 |     col="lightgray", lty="dotted")
47 | abline(h=(seq(ms_min, ms_max, (ms_max - ms_min) / 10)),
48 |     col="lightgray", lty="dotted")
49 | 
50 | title("Single Service Queue, Constant Service Times (M/D/1)")
51 | 


--------------------------------------------------------------------------------