├── README ├── amdahl.g ├── da ├── README ├── da-common.r ├── da-libreal.r ├── da-libsynth.r ├── dist.r └── waterfall.sh ├── data.txt ├── frequencytrail.r ├── frequencytrailtest.pdf ├── linear.g ├── scale.pdf ├── scale.r ├── tools ├── README ├── interval.r └── scatter.r ├── usl.g ├── util-md1.pdf └── util-md1.r /README: -------------------------------------------------------------------------------- 1 | Performance Scalability Models 2 | 3 | This is some software to aid performance scalability analysis. It includes: 4 | 5 | linear.g Linear scalability model 6 | amdahl.g Amdahl's law scalability 7 | usl.g Universal Scalabitily Law 8 | scale.r Scalability Models (Amdahl, USL) 9 | util-md1.r Queueing Theory M/D/1 mean response time vs util 10 | data.txt Sample input file for scalability modeling 11 | da/ Distribution Analysis 12 | 13 | There are more projects rather than stand-alone tools; expect to customize them 14 | for each system you are modeling, and these assume you already understand 15 | performance scalability analysis. 16 | -------------------------------------------------------------------------------- /amdahl.g: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | # 3 | # amdahl.g Amdahl's law scalability using gnuplot. 4 | # 5 | # This applies Amdahl's law to model scalability (maximum speedup) to the input 6 | # data set. It uses regression analysis to determine the constants. 7 | # 8 | # USAGE: ./amdahl.g 9 | # 10 | # See the "tunables" section for defining the input data file, and the number 11 | # of rows to include as model input. The remainder of rows are drawn as 12 | # "extra" data points. The file has the form: 13 | # 14 | # N Result 15 | # 1 2.1 16 | # 2 4.0 17 | # 3 5.9 18 | # ... 19 | # 20 | # The row order can be rearranged to customize the model input. 21 | # 22 | # Copyright 2012 Brendan Gregg. All rights reserved. 23 | # 24 | # CDDL HEADER START 25 | # 26 | # The contents of this file are subject to the terms of the 27 | # Common Development and Distribution License (the "License"). 28 | # You may not use this file except in compliance with the License. 29 | # 30 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 31 | # or http://www.opensolaris.org/os/licensing. 32 | # See the License for the specific language governing permissions 33 | # and limitations under the License. 34 | # 35 | # When distributing Covered Code, include this CDDL HEADER in each 36 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 37 | # If applicable, add the following below this CDDL HEADER, with the 38 | # fields enclosed by brackets "[]" replaced with your own identifying 39 | # information: Portions Copyright [yyyy] [name of copyright owner] 40 | # 41 | # CDDL HEADER END 42 | # 43 | # 03-May-2012 Brendan Gregg Created this. 44 | 45 | set terminal x11 font "arial,14" # designed for x11 (redraws) 46 | set autoscale 47 | 48 | # tunables 49 | filename = "data.txt" # data file 50 | inputN = 12 # rows to include as model input 51 | scale = 1.5 # scale graph beyond data points 52 | set grid 53 | 54 | set xlabel "CPUs (N)" 55 | set ylabel "Throughput" 56 | set title "Amdahl Scalability" 57 | set key on right bottom 58 | set pointsize 2 59 | 60 | # read N1, the first value for normalizing the plot (workaround) 61 | plot filename using 1:(N1 = $2, 0/0) every 1:1:1:0:1:0 notitle, '' using 1:($2 / N1) with linespoints 62 | 63 | # Amdahl 64 | alpha = 0.01 65 | amdahl(N) = N1 * N/(1 + alpha * (N - 1)) 66 | 67 | # regression analysis (non-linear least squares fitting) 68 | fit amdahl(x) filename every ::1::inputN using 1:2 via alpha 69 | 70 | # plot data points 71 | plot filename using 1:2 with points pt 6 lc rgb "#f00000" title "extra measurements",\ 72 | filename every ::1::inputN using 1:2 with points pt 6 lc rgb "#000000" title "input for Amdahl" 73 | set label sprintf("a = %.4f", alpha) at graph 0.5, 0.075 center 74 | set yrange [0:GPVAL_DATA_Y_MAX * scale] 75 | set xrange [0:GPVAL_DATA_X_MAX * scale] 76 | 77 | # plot curves 78 | replot amdahl(x) with line lc rgb "#000000" title "Amdahl(N)" 79 | 80 | pause -1 "Hit return to continue" 81 | -------------------------------------------------------------------------------- /da/README: -------------------------------------------------------------------------------- 1 | Distribution Analysis 2 | 3 | Work in progress... 4 | -------------------------------------------------------------------------------- /da/da-common.r: -------------------------------------------------------------------------------- 1 | # da-common.r Common functions for dist.r. 2 | # 3 | # 01-Jun-2013 Brendan Gregg Created this. 4 | 5 | # printf 6 | printf <- function(...) cat(sprintf(...)) 7 | 8 | # randomize ordering 9 | randomize <- function(data) { 10 | data0 <- data 11 | data <- c() 12 | for(i in 1:N) { 13 | ii <- sample(1:length(data0), 1) 14 | data[i] <- data0[ii] 15 | data0 <- data0[c(-ii)] 16 | } 17 | return(data) 18 | } 19 | -------------------------------------------------------------------------------- /da/da-libreal.r: -------------------------------------------------------------------------------- 1 | # da-libreal Some Real Latency Distributions 2 | # 3 | # This is a library for dist.r. It requires the data files listed 4 | # below (see read.table). 5 | # 6 | # Input: 7 | # N number of target elements (may return a little less) 8 | # type a distribution type ID (see list below) 9 | # Output: 10 | # data data set 11 | # 12 | # 01-Jun-2013 Brendan Gregg Created this. 13 | 14 | # type description 15 | # 500 faithful bimodal 16 | # 501 real disk I/O latency bimodal far 17 | # 502 real disk I/O latency bimodal far outliers 18 | 19 | if (type == 500) { # faithful bimodal 20 | outliers <- "N" 21 | attach(faithful); N <- length(eruptions); data <- eruptions 22 | 23 | } else if (type == 501) { # random disk I/O 24 | outliers <- "N" 25 | input <- read.table("out.iosnoop_randread01", header=FALSE, skip=1, 26 | nrows=N, col.names=c("STIME","TIME","DELTA","DTIME","UID","PID", 27 | "D","BLOCK","SIZE","COMM","PATHNAME")) 28 | attach(input); input <- input[DELTA < 10000, ] 29 | data <- input$DELTA 30 | N <- length(data) 31 | if (random) { data <- randomize(data) } 32 | 33 | } else if (type == 502) { # random disk I/O outliers 34 | outliers <- "Y" 35 | input <- read.table("out.iosnoop_randread01", header=FALSE, skip=1, 36 | nrows=N, col.names=c("STIME","TIME","DELTA","DTIME","UID","PID", 37 | "D","BLOCK","SIZE","COMM","PATHNAME")) 38 | attach(input); 39 | data <- input$DELTA 40 | if (random) { data <- randomize(data) } 41 | 42 | } else if (type == 503) { # random sync disk I/O outliers 43 | outliers <- "Y" 44 | input <- read.table("out.iosnoop_marssync01", header=FALSE, skip=1, 45 | nrows=N, col.names=c("STIME","TIME","DELTA","DTIME","UID","PID", 46 | "D","BLOCK","SIZE","COMM","PATHNAME")) 47 | attach(input); 48 | data <- input$DELTA 49 | if (random) { data <- randomize(data) } 50 | } 51 | -------------------------------------------------------------------------------- /da/da-libsynth.r: -------------------------------------------------------------------------------- 1 | # da-libsynth.r Synthetic Latency Distributions 2 | # 3 | # This is a library for dist.r. 4 | # 5 | # This defines various synthetic distributions for modeling I/O latency. 6 | # The distributions are composed of values that are typically between 0 and 7 | # 10000, with a mean around 1000. This is loosely based on storage device I/O 8 | # latency, in units of microseconds. You can adjust these as desired. 9 | # 10 | # Input: 11 | # N number of target elements (may return a little less) 12 | # type a distribution type ID (see list below) 13 | # Output: 14 | # data data set 15 | # 16 | # 01-Jun-2013 Brendan Gregg Created this. 17 | 18 | library(VGAM) # rpareto 19 | 20 | # type description 21 | # 0 uniform narrow 22 | # 1 uniform wide 23 | # 2 uniform outliers 24 | # 100 unimodal normal narrow 25 | # 101 unimodal normal medium 26 | # 102 unimodal normal wide 27 | # 103 unimodal normal with tail 28 | # 110 unimodal normal narrow band reject 29 | # 111 unimodal normal spike 30 | # 112 unimodal normal fenced 31 | # 113 unimodal normal quantized 32 | # 120 unimodal poisson 33 | # 121 unimodal poisson outliers 34 | # 130 unimodal pareto narrow 35 | # 131 unimodal pareto wide 36 | # 140 unimodal normal outliers 1% medium 37 | # 141 unimodal normal outliers 1% far 38 | # 142 unimodal normal outliers 1% very far 39 | # 143 unimodal normal outliers 2% 40 | # 144 unimodal normal outliers 4% 41 | # 145 unimodal normal outliers 2% clustered 42 | # 146 unimodal normal outliers 4% close 1 43 | # 147 unimodal normal outliers 4% close 2 44 | # 148 unimodal normal outliers 4% close 3 45 | # 149 unimodal normal outliers 4% close 4 46 | # 150 unimodal normal outliers 4% close 5 47 | # 151 unimodal normal outliers 4% close 6 48 | # 152 unimodal normal outliers 4% close 7 49 | # 153 unimodal normal outliers 0.5% 50 | # 154 unimodal normal outliers 0.2% 51 | # 155 unimodal normal outliers 0.1% 52 | # 200 bimodal normal very close 53 | # 201 bimodal normal close 54 | # 202 bimodal normal medium 55 | # 203 bimodal normal far 56 | # 204 bimodal normal outliers 1% 57 | # 205 bimodal normal outliers 2% 58 | # 206 bimodal normal outliers 4% 59 | # 210 bimodal normal major minor 60 | # 211 bimodal normal minor major 61 | # 212 bimodal normal major minor outliers 62 | # 213 bimodal normal minor major outliers 63 | # 214 bimodal far normal far outliers 1% (blog) 64 | # 215 bimodal very far normal far outliers 1% (blog) 65 | # 216 bimodal very far major minor outliers 1% (blog) 66 | # 300 trimodal normal close 67 | # 301 trimodal normal medium 68 | # 302 trimodal normal far 69 | # 303 trimodal normal outliers 70 | # 304 trimodal normal major medium minor 71 | # 305 trimodal normal minor major minor 72 | # 306 trimodal normal minor major medium 73 | # 307 trimodal normal major minor medium 74 | # 400 quadmodal normal close 75 | # 401 quadmodal normal medium 76 | # 402 quadmodal normal far 77 | # 403 quadmodal normal outliers 78 | # 1000+ unimodal normal outliers random 79 | 80 | # definitions 81 | set.seed(type) 82 | if (type == 0) { # uniform narrow 83 | outliers <- "N" 84 | data <- runif(N, min=500, max=1500) 85 | 86 | } else if (type == 1) { # uniform wide 87 | outliers <- "N" 88 | data <- runif(N, min=0, max=3000) 89 | 90 | } else if (type == 2) { # uniform outliers 91 | outliers <- "Y" 92 | data <- c(runif(N * 0.99, min=500, max=1500), 93 | runif(N * 0.01, min=1500, max=10000)) 94 | 95 | } else if (type == 100) { # unimodal normal narrow 96 | outliers <- "N" 97 | data <- rnorm(N, mean=1000, sd=100) 98 | 99 | } else if (type == 101) { # unimodal normal medium 100 | outliers <- "N" 101 | data <- rnorm(N, mean=1000, sd=200) 102 | 103 | } else if (type == 102) { # unimodal normal wide 104 | outliers <- "N" 105 | data <- rnorm(N, mean=1000, sd=300) 106 | 107 | } else if (type == 103) { # unimodal normal with tail 108 | outliers <- "N" 109 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 110 | runif(N * 0.04, min=1000, max=2250)) 111 | data <- randomize(data) 112 | 113 | } else if (type == 104) { # unimodal normal wide 114 | outliers <- "N" 115 | data <- rnorm(N, mean=1120, sd=700) 116 | 117 | } else if (type == 110) { # unimodal band reject 118 | outliers <- "N" 119 | data0 <- rnorm(N, mean=1000, sd=200) 120 | ii <- 0 121 | for(i in 1:N) { 122 | if (data0[i] < 770 || data0[i] > 800) { 123 | data[ii] <- data0[i] 124 | ii <- ii + 1 125 | } 126 | } 127 | N <- length(data) 128 | 129 | } else if (type == 111) { # unimodal normal spike 130 | outliers <- "N" 131 | data <- c(rnorm(N * 0.98, mean=1000, sd=200), 132 | rnorm(N * 0.02, mean=750, sd=1)) 133 | data <- randomize(data) 134 | 135 | } else if (type == 112) { # unimodal normal fence 136 | outliers <- "N" 137 | N <- N * 2 138 | data0 <- rnorm(N, mean=1000, sd=200) 139 | ii <- 0 140 | for(i in 1:N) { 141 | if ((data0[i] %% 64) < 32) { 142 | data[ii] <- data0[i] 143 | ii <- ii + 1 144 | } 145 | if (ii >= 5000) { break } 146 | } 147 | N <- length(data) 148 | 149 | } else if (type == 113) { # unimodal normal quantized 150 | outliers <- "N" 151 | data0 <- rnorm(N, mean=1000, sd=200) 152 | for(i in 1:N) { 153 | data[i] <- floor(data0[i] / 64) * 64 154 | } 155 | 156 | } else if (type == 120) { # unimodal poisson 157 | outliers <- "N" 158 | data <- rpois(N, lambda=1000) 159 | 160 | } else if (type == 121) { # unimodal poisson outliers 161 | outliers <- "Y" 162 | data <- c(rpois(N * 0.99, lambda=1000), 163 | runif(N * 0.01, min=1000, max=5000)) 164 | 165 | } else if (type == 130) { # unimodal pareto narrow 166 | outliers <- "N" 167 | data <- rpareto(N, 1000, 3) 168 | 169 | } else if (type == 131) { # unimodal pareto wide 170 | outliers <- "N" 171 | data <- rpareto(N, 1000, 10) 172 | 173 | } else if (type == 140) { # unimodal normal outliers 1% medium 174 | outliers <- "Y" 175 | data <- c(rnorm(N * 0.99, mean=1000, sd=200), 176 | runif(N * 0.01, min=1000, max=5000)) 177 | data <- randomize(data) 178 | 179 | } else if (type == 141) { # unimodal normal outliers 1% far 180 | outliers <- "Y" 181 | data <- c(rnorm(N * 0.99, mean=1000, sd=200), 182 | runif(N * 0.01, min=1000, max=10000)) 183 | data <- randomize(data) 184 | 185 | } else if (type == 142) { # unimodal normal outliers 1% very far 186 | outliers <- "Y" 187 | data <- c(rnorm(N * 0.99, mean=1000, sd=200), 188 | runif(N * 0.01, min=1000, max=50000)) 189 | data <- randomize(data) 190 | 191 | } else if (type == 143) { # unimodal normal outliers 2% 192 | outliers <- "Y" 193 | data <- c(rnorm(N * 0.98, mean=1000, sd=200), 194 | runif(N * 0.02, min=1000, max=5000)) 195 | data <- randomize(data) 196 | 197 | } else if (type == 144) { # unimodal normal outliers 4% 198 | outliers <- "Y" 199 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 200 | runif(N * 0.04, min=1000, max=5000)) 201 | data <- randomize(data) 202 | 203 | } else if (type == 145) { # unimodal normal outliers 2% clustered 204 | outliers <- "?" 205 | data <- c(rnorm(N * 0.98, mean=1000, sd=200), 206 | rnorm(N * 0.02, mean=3000, sd=35)) 207 | data <- randomize(data) 208 | 209 | } else if (type == 146) { # unimodal normal outliers 4% close 1 210 | outliers <- "Y" 211 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 212 | runif(N * 0.04, min=1000, max=2700)) 213 | data <- randomize(data) 214 | 215 | } else if (type == 147) { # unimodal normal outliers 4% close 2 216 | outliers <- "Y" 217 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 218 | runif(N * 0.04, min=1000, max=2900)) 219 | data <- randomize(data) 220 | 221 | } else if (type == 148) { # unimodal normal outliers 4% close 3 222 | outliers <- "Y" 223 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 224 | runif(N * 0.04, min=1000, max=3100)) 225 | data <- randomize(data) 226 | 227 | } else if (type == 149) { # unimodal normal outliers 4% close 4 228 | outliers <- "Y" 229 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 230 | runif(N * 0.04, min=1000, max=3300)) 231 | data <- randomize(data) 232 | 233 | } else if (type == 150) { # unimodal normal outliers 4% close 5 234 | outliers <- "Y" 235 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 236 | runif(N * 0.04, min=1000, max=3500)) 237 | data <- randomize(data) 238 | 239 | } else if (type == 151) { # unimodal normal outliers 4% close 6 240 | outliers <- "Y" 241 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 242 | runif(N * 0.04, min=1000, max=3700)) 243 | data <- randomize(data) 244 | 245 | } else if (type == 152) { # unimodal normal outliers 4% close 7 246 | outliers <- "Y" 247 | data <- c(rnorm(N * 0.96, mean=1000, sd=200), 248 | runif(N * 0.04, min=1000, max=3900)) 249 | data <- randomize(data) 250 | 251 | } else if (type == 153) { # unimodal normal outliers 0.5% 252 | outliers <- "Y" 253 | data <- c(rnorm(N * 0.995, mean=1000, sd=200), 254 | runif(N * 0.005, min=1000, max=5000)) 255 | data <- randomize(data) 256 | 257 | } else if (type == 154) { # unimodal normal outliers 0.2% 258 | outliers <- "Y" 259 | data <- c(rnorm(N * 0.998, mean=1000, sd=200), 260 | runif(N * 0.002, min=1000, max=5000)) 261 | data <- randomize(data) 262 | 263 | } else if (type == 155) { # unimodal normal outliers 0.1% 264 | outliers <- "Y" 265 | data <- c(rnorm(N * 0.999, mean=1000, sd=200), 266 | runif(N * 0.001, min=1000, max=5000)) 267 | data <- randomize(data) 268 | 269 | } else if (type == 200) { # bimodal normal very close 270 | outliers <- "N" 271 | data <- c(rnorm(N / 2, mean=850, sd=110), 272 | rnorm(N / 2, mean=1150, sd=110)) 273 | data <- randomize(data) 274 | 275 | } else if (type == 201) { # bimodal normal close 276 | outliers <- "N" 277 | data <- c(rnorm(N / 2, mean=825, sd=110), 278 | rnorm(N / 2, mean=1175, sd=110)) 279 | data <- randomize(data) 280 | 281 | } else if (type == 202) { # bimodal normal medium 282 | outliers <- "N" 283 | data <- c(rnorm(N / 2, mean=750, sd=110), 284 | rnorm(N / 2, mean=1250, sd=110)) 285 | data <- randomize(data) 286 | 287 | } else if (type == 203) { # bimodal normal far 288 | outliers <- "N" 289 | data <- c(rnorm(N / 2, mean=600, sd=110), 290 | rnorm(N / 2, mean=1400, sd=110)) 291 | data <- randomize(data) 292 | 293 | } else if (type == 204) { # bimodal normal outliers 1% 294 | outliers <- "Y" 295 | data <- c(rnorm(N * 0.495, mean=750, sd=110), 296 | rnorm(N * 0.495, mean=1250, sd=110), 297 | runif(N * 0.01, min=1000, max=5000)) 298 | data <- randomize(data) 299 | 300 | } else if (type == 205) { # bimodal normal outliers 2% 301 | outliers <- "Y" 302 | data <- c(rnorm(N * 0.49, mean=750, sd=110), 303 | rnorm(N * 0.49, mean=1250, sd=110), 304 | runif(N * 0.02, min=1000, max=5000)) 305 | data <- randomize(data) 306 | 307 | } else if (type == 206) { # bimodal normal outliers 4% 308 | outliers <- "Y" 309 | data <- c(rnorm(N * 0.48, mean=750, sd=110), 310 | rnorm(N * 0.48, mean=1250, sd=110), 311 | runif(N * 0.04, min=1000, max=5000)) 312 | data <- randomize(data) 313 | 314 | } else if (type == 210) { # bimodal normal major minor 315 | outliers <- "N" 316 | data <- c(rnorm(N * 0.7, mean=750, sd=110), 317 | rnorm(N * 0.3, mean=1250, sd=110)) 318 | data <- randomize(data) 319 | 320 | } else if (type == 211) { # bimodal normal minor major 321 | outliers <- "N" 322 | data <- c(rnorm(N * 0.3, mean=750, sd=110), 323 | rnorm(N * 0.7, mean=1250, sd=110)) 324 | data <- randomize(data) 325 | 326 | } else if (type == 212) { # bimodal normal major minor outliers 327 | outliers <- "Y" 328 | data <- c(rnorm(N * 0.695, mean=750, sd=110), 329 | rnorm(N * 0.295, mean=1250, sd=110), 330 | runif(N * 0.01, min=1000, max=5000)) 331 | N <- length(data) 332 | data <- randomize(data) 333 | 334 | } else if (type == 213) { # bimodal normal major minor outliers 335 | outliers <- "Y" 336 | data <- c(rnorm(N * 0.295, mean=750, sd=110), 337 | rnorm(N * 0.695, mean=1250, sd=110), 338 | runif(N * 0.01, min=1000, max=5000)) 339 | N <- length(data) 340 | data <- randomize(data) 341 | 342 | } else if (type == 214) { # bimodal far normal far outliers 1% 343 | outliers <- "Y" 344 | data <- c(rnorm(N * 0.499, mean=500, sd=150), 345 | rnorm(N * 0.499, mean=2000, sd=300), 346 | runif(N * 0.002, min=1000, max=180000)) 347 | data <- randomize(data) 348 | 349 | } else if (type == 215) { # bimodal far normal far outliers 1% 350 | outliers <- "Y" 351 | data <- c(rnorm(N * 0.499, mean=500, sd=100), 352 | rnorm(N * 0.499, mean=4000, sd=500), 353 | runif(N * 0.002, min=1000, max=180000)) 354 | data <- randomize(data) 355 | 356 | } else if (type == 216) { # bimodal far normal far outliers 1% 357 | outliers <- "Y" 358 | data <- c(rnorm(N * 0.667, mean=500, sd=100), 359 | rnorm(N * 0.333, mean=4000, sd=100), 360 | runif(N * 0.002, min=1000, max=180000)) 361 | data <- randomize(data) 362 | 363 | } else if (type == 300) { # trimodal normal close 364 | outliers <- "N" 365 | data <- c(rnorm(N * 0.333, mean=750, sd=90), 366 | rnorm(N * 0.334, mean=1000, sd=90), 367 | rnorm(N * 0.333, mean=1250, sd=90)) 368 | N <- length(data) 369 | data <- randomize(data) 370 | 371 | } else if (type == 301) { # trimodal normal medium 372 | outliers <- "N" 373 | data <- c(rnorm(N * 0.333, mean=500, sd=100), 374 | rnorm(N * 0.334, mean=1000, sd=100), 375 | rnorm(N * 0.333, mean=1500, sd=100)) 376 | data <- randomize(data) 377 | 378 | } else if (type == 302) { # trimodal normal far 379 | outliers <- "N" 380 | data <- c(rnorm(N * 0.333, mean=500, sd=65), 381 | rnorm(N * 0.334, mean=1000, sd=65), 382 | rnorm(N * 0.333, mean=1500, sd=65)) 383 | data <- randomize(data) 384 | 385 | } else if (type == 303) { # trimodal normal outliers 386 | outliers <- "Y" 387 | data <- c(rnorm(N * 0.333, mean=500, sd=100), 388 | rnorm(N * 0.334, mean=1000, sd=100), 389 | rnorm(N * 0.333, mean=1500, sd=100), 390 | runif(N * 0.01, min=1000, max=5000)) 391 | data <- randomize(data) 392 | 393 | } else if (type == 304) { # trimodal normal major medium minor 394 | outliers <- "N" 395 | data <- c(rnorm(N * 0.50, mean=500, sd=100), 396 | rnorm(N * 0.33, mean=1000, sd=100), 397 | rnorm(N * 0.17, mean=1500, sd=100)) 398 | data <- randomize(data) 399 | 400 | } else if (type == 305) { # trimodal normal minor major minor 401 | outliers <- "N" 402 | data <- c(rnorm(N * 0.25, mean=500, sd=100), 403 | rnorm(N * 0.50, mean=1000, sd=100), 404 | rnorm(N * 0.25, mean=1500, sd=100)) 405 | data <- randomize(data) 406 | 407 | } else if (type == 306) { # trimodal normal minor major medium 408 | outliers <- "N" 409 | data <- c(rnorm(N * 0.17, mean=500, sd=100), 410 | rnorm(N * 0.50, mean=1000, sd=100), 411 | rnorm(N * 0.33, mean=1500, sd=100)) 412 | data <- randomize(data) 413 | 414 | } else if (type == 307) { # trimodal normal major minor medium 415 | outliers <- "N" 416 | data <- c(rnorm(N * 0.50, mean=500, sd=100), 417 | rnorm(N * 0.17, mean=1000, sd=100), 418 | rnorm(N * 0.33, mean=1500, sd=100)) 419 | data <- randomize(data) 420 | 421 | } else if (type == 400) { # quad normal close 422 | outliers <- "N" 423 | data <- c(rnorm(N * 0.25, mean=700, sd=75), 424 | rnorm(N * 0.25, mean=900, sd=75), 425 | rnorm(N * 0.25, mean=1100, sd=75), 426 | rnorm(N * 0.25, mean=1300, sd=75)) 427 | data <- randomize(data) 428 | 429 | } else if (type == 401) { # quad normal medium 430 | outliers <- "N" 431 | data <- c(rnorm(N * 0.25, mean=700, sd=50), 432 | rnorm(N * 0.25, mean=900, sd=50), 433 | rnorm(N * 0.25, mean=1100, sd=50), 434 | rnorm(N * 0.25, mean=1300, sd=50)) 435 | data <- randomize(data) 436 | 437 | } else if (type == 402) { # quad normal far 438 | outliers <- "N" 439 | data <- c(rnorm(N * 0.25, mean=400, sd=60), 440 | rnorm(N * 0.25, mean=800, sd=60), 441 | rnorm(N * 0.25, mean=1200, sd=60), 442 | rnorm(N * 0.25, mean=1600, sd=60)) 443 | data <- randomize(data) 444 | 445 | } else if (type == 403) { # quad normal outliers 446 | outliers <- "Y" 447 | data <- c(rnorm(N * 0.25, mean=700, sd=50), 448 | rnorm(N * 0.25, mean=900, sd=50), 449 | rnorm(N * 0.25, mean=1100, sd=50), 450 | rnorm(N * 0.24, mean=1300, sd=50), 451 | runif(N * 0.01, min=1000, max=5000)) 452 | data <- randomize(data) 453 | 454 | } else if (type >= 1000) { # relative to type num 455 | set.seed(type) 456 | outliers <- "?" 457 | d_mean <- runif(1, 1000, 5000) 458 | d_sd <- runif(1, 10, 2000) 459 | o_ratio <- sample(1:10)[1] 460 | o_max <- d_mean + runif(1, 0, d_sd * 5) + runif(1, 0, 10)^5 461 | data <- c(rnorm(N * (1 - o_ratio/1000), mean=d_mean, sd=d_sd), 462 | runif(N * o_ratio/1000, min=d_mean, max=o_max)) 463 | N <- length(data) 464 | data <- randomize(data) 465 | } 466 | -------------------------------------------------------------------------------- /da/dist.r: -------------------------------------------------------------------------------- 1 | # dist.r Distribution Analysis 2 | # 3 | # This analyzes data set distributions, both synthetic and actual. It is 4 | # especially intended for latency distributions, such as disk I/O latency, 5 | # to aid in computer performance analysis. 6 | # 7 | # This uses the libraries da-common.r, da-libsynth.r, da-libreal.r. 8 | # 9 | # Environment variables can be set to control behavior and output: see the 10 | # environment section below. These are set by parent shell scripts which 11 | # execute a series of dist.r runs to generate composite images. 12 | # 13 | # I doubt this is a good example of R scripting. This includes considerable 14 | # extra complexity for the environment and process it is used in, which won't 15 | # be apparent from this script alone. 16 | # 17 | # Copyright 2013 Brendan Gregg. All rights reserved. 18 | # 19 | # CDDL HEADER START 20 | # 21 | # The contents of this file are subject to the terms of the 22 | # Common Development and Distribution License (the "License"). 23 | # You may not use this file except in compliance with the License. 24 | # 25 | # You can obtain a copy of the license at docs/cddl1.txt or 26 | # http://opensource.org/licenses/CDDL-1.0. 27 | # See the License for the specific language governing permissions 28 | # and limitations under the License. 29 | # 30 | # When distributing Covered Code, include this CDDL HEADER in each 31 | # file and include the License file at docs/cddl1.txt. 32 | # If applicable, add the following below this CDDL HEADER, with the 33 | # fields enclosed by brackets "[]" replaced with your own identifying 34 | # information: Portions Copyright [yyyy] [name of copyright owner] 35 | # 36 | # CDDL HEADER END 37 | # 38 | # 01-Jun-2013 Brendan Gregg Created this. 39 | 40 | library(e1071) # skewness, kurtosis 41 | library(diptest) # diptest 42 | source("da-common.r") 43 | 44 | # input 45 | type <- 100 # distribution type: see da-libsynth.r and below 46 | N <- 5000 # target elements 47 | trim <- 0 # trim data set: 0 none, 1 sd, 2 iqr, 3 maxtrim 48 | maxtrim <- 0 # max value for use with trim 3 49 | random <- 1 # randomize data ordering 50 | png <- 0 # png instead of pdf 51 | svg <- 0 # svg instead of pdf 52 | pngheight <- 400 # default png height 53 | pngwidth <- 600 # default png width 54 | pdfheight <- 4.5 # default pdf/svg height 55 | pdfwidth <- 9 # default pdf/svg width 56 | density <- 0 # draw density plot instead of histogram 57 | denadj <- 0.4 # density adjust parameter 58 | labels <- 1 # draw chart labels (default on) 59 | lwidth <- 8 # density line width 60 | trans <- 0 # transparent background 61 | rug <- 0 # do rug plot 62 | outfile <- "dists.pdf" # output file 63 | infile <- "" # input file for dist types 600+ 64 | extra <- 0 # extra tests 65 | weight <- 0 # density weight 66 | statlines <- 0 # plot lines for mean, stddev 67 | plines <- 0 # plot lines for 90th, 99th, 99.9th percentiles 68 | symlink <- 0 # create encoded symlinks 69 | fill <- 0 # polygon fill 70 | numbered <- 0 # add value to right of plot 71 | num_mvalue <- 1 # that value is mvalue 72 | num_maxsigma <- 0 # that value is maxsigma 73 | num_max <- 0 # that value is max 74 | num_factor <- 1000000 # factor for max value 75 | centermean <- 0 # center mean in plot 76 | 77 | # labels 78 | mtitle <- "Latency Distribution" 79 | xtitle <- "Disk I/O latency (us)" 80 | 81 | # environment 82 | if ((env <- Sys.getenv("N")) != "") { N <- as.numeric(env) } 83 | if ((env <- Sys.getenv("TYPE")) != "") { type <- as.numeric(env) } 84 | if ((env <- Sys.getenv("TRIM")) != "") { trim <- as.numeric(env) } 85 | if ((env <- Sys.getenv("MAXTRIM")) != "") { maxtrim <- as.numeric(env) } 86 | if ((env <- Sys.getenv("PNG")) != "") { png <- as.numeric(env) } 87 | if ((env <- Sys.getenv("SVG")) != "") { svg <- as.numeric(env) } 88 | if ((env <- Sys.getenv("LABELS")) != "") { labels <- as.numeric(env) } 89 | if ((env <- Sys.getenv("DENSITY")) != "") { density <- as.numeric(env) } 90 | if ((env <- Sys.getenv("LWD")) != "") { lwidth <- as.numeric(env) } 91 | if ((env <- Sys.getenv("TRANS")) != "") { trans <- as.numeric(env) } 92 | if ((env <- Sys.getenv("RUG")) != "") { rug <- as.numeric(env) } 93 | if ((env <- Sys.getenv("FILL")) != "") { fill<- as.numeric(env) } 94 | if ((env <- Sys.getenv("OUTFILE")) != "") { outfile <- env } 95 | if ((env <- Sys.getenv("INFILE")) != "") { infile <- env } 96 | if ((env <- Sys.getenv("RANDOM")) != "") { random <- as.numeric(env) } 97 | if ((env <- Sys.getenv("EXTRA")) != "") { extra <- as.numeric(env) } 98 | if ((env <- Sys.getenv("SYMLINK")) != "") { symlink <- as.numeric(env) } 99 | if ((env <- Sys.getenv("STATLINES")) != "") { statlines <- as.numeric(env) } 100 | if ((env <- Sys.getenv("WEIGHT")) != "") { weight <- as.numeric(env) } 101 | if ((env <- Sys.getenv("PNGWIDTH")) != "") { pngwidth <- as.numeric(env) } 102 | if ((env <- Sys.getenv("PNGHEIGHT")) != "") { pngheight <- as.numeric(env) } 103 | if ((env <- Sys.getenv("PDFWIDTH")) != "") { pdfwidth <- as.numeric(env) } 104 | if ((env <- Sys.getenv("PDFHEIGHT")) != "") { pdfheight <- as.numeric(env) } 105 | 106 | if (png) { 107 | if (outfile == "dists.pdf") { outfile <- "dists.png" } 108 | if ((pngheight < 200) & labels) { pngheight <- pngheight + 140; } 109 | png(outfile, pngwidth, pngheight) 110 | } else if (svg) { 111 | if (outfile == "dists.pdf") { outfile <- "dists.svg" } 112 | svg(outfile, width=pdfwidth, height=pdfheight) 113 | } else { 114 | pdf(outfile, w=pdfwidth, h=pdfheight) 115 | } 116 | if (!labels) { 117 | mtitle <- ' '; xtitle <- ' '; ytitle <- ' ' 118 | par(bty = "n") 119 | if (numbered) { 120 | par(mai = c(0,0,0,1.5)) 121 | } else { 122 | par(mai = c(0,0,0,0)) 123 | } 124 | } else { 125 | par(mgp = c(2,0.5,0)) 126 | #par(cex = 2) 127 | if (numbered) { 128 | par(mar = c(4,3.5,3,3)) 129 | } else { 130 | par(mar = c(4,3.5,3,2)) 131 | } 132 | } 133 | if (trans) { par(bg = NA) } 134 | if (density == 0) { ytitle <- "Frequency" } else { ytitle <- "Density" } 135 | 136 | # distributions 137 | data <- c() 138 | source("da-libsynth.r") # defines types 0-499 139 | source("da-libreal.r") # defines types 500-599 140 | 141 | if (type == 600) { # data is column 0 from infile 142 | outliers <- "?" 143 | input <- read.table(infile, header=FALSE, skip=1, nrows=N) 144 | data <- input$V1 145 | N <- length(data) 146 | if (random) { data <- randomize(data) } 147 | 148 | } else if (type == 601) { # data is column 1 from infile 149 | outliers <- "?" 150 | input <- read.table(infile, header=FALSE, skip=1, nrows=N) 151 | data <- input$V2 152 | N <- length(data) 153 | if (random) { data <- randomize(data) } 154 | 155 | } else if (length(data) == 0) { 156 | printf("ERROR: distribution type %d unknown.\n", type) 157 | quit(save = "no") 158 | } 159 | 160 | # truncate negative 161 | data <- data[data >= 0] 162 | N <- length(data) 163 | 164 | # pre-trimmed statistics 165 | mean <- mean(data) 166 | stddev <- sd(data) 167 | mad <- mad(data) 168 | iqr <- IQR(data) 169 | median <- median(data) 170 | max <- max(data) 171 | maxsigma <- (max - mean) / stddev 172 | 173 | # outlier trimming 174 | if (trim == 1) { 175 | # +- 2 stddev 176 | data <- data[data <= mean + 2 * stddev] 177 | data <- data[data >= mean - 2 * stddev] 178 | N <- length(data) 179 | } else if (trim == 2) { 180 | # like boxplots, keep range IQR +- 1.5 x IQR 181 | data <- data[data <= quantile(data, 0.75) + 1.5 * iqr] 182 | data <- data[data >= quantile(data, 0.25) - 1.5 * iqr] 183 | N <- length(data) 184 | } else if (trim == 3) { 185 | data <- data[data <= maxtrim] 186 | N <- length(data) 187 | } 188 | 189 | # post trimmed 190 | mean <- mean(data) 191 | stddev <- sd(data) 192 | 193 | # plot histogram 194 | if (density == 0) { 195 | hist <- hist(data, 196 | breaks = 100, 197 | col = "gray90", 198 | main = mtitle, 199 | xlab = xtitle, 200 | ylab = ytitle) 201 | if (rug) { 202 | rug(data, lwd=lwidth, col="black", ticksize=0.032) 203 | } 204 | maxden <- max(hist$counts) 205 | } else { 206 | # prepare density plots 207 | den <- density(data, adjust = denadj) 208 | if (weight) { den$y <- den$y * den$x } 209 | maxden <- max(den$y) 210 | if (trim == 3) { 211 | xlim <- c(0, maxtrim) 212 | } else { 213 | if (centermean) { 214 | xlim <- c(mean - (max - mean), max) 215 | xlim <- c(mean - 3.5 * stddev, mean + 3.5 * stddev) 216 | } else { 217 | xlim <- c(min(den$x), max(den$x)) 218 | } 219 | } 220 | 221 | # ylim is scaled by 1.05 so top can be cropped. 222 | # the lwd=8 plot can exceed 1.05 for sharp points 223 | ylim <- c(0, 1.05 * maxden) 224 | } 225 | 226 | # density plot 227 | if (density == 1) { 228 | set.seed(mean + median + stddev) 229 | col <- "white" 230 | trans <- 240 231 | 232 | # customize color here 233 | 234 | # pink/magenta ++ / green/aqua <-- node.js cost 235 | #col <- rgb( 236 | # 0, 237 | # 80 + sample(seq(1:100), 1), 238 | # 60 + sample(seq(1:65), 1), 239 | # trans, maxColorValue = 255) 240 | 241 | # purple/violet ++ / green/brown <-- mysql cost 242 | #col <- rgb( 243 | # 70 + sample(seq(1:65), 1), 244 | # 90 + sample(seq(1:100), 1), 245 | # 0, 246 | # trans, maxColorValue = 255) 247 | 248 | # orange ++ / blue/turquoise <-- disk cost 249 | #col <- rgb( 250 | # 0, 251 | # 80 + sample(seq(1:150), 1), 252 | # 255, 253 | # trans, maxColorValue = 255) 254 | 255 | # dark blue / yellow ++ 256 | #v1 <- 220 + sample(seq(1:35), 1) 257 | #v2 <- v1 - 100 - sample(seq(1:115), 1) 258 | #col <- rgb(v1, v1, v2, maxColorValue = 255) 259 | 260 | # dark yellow ++ / light blue <-- synth yellow 261 | #v1 <- 255 - sample(seq(1:50), 1) 262 | #v2 <- sample(seq(1:65), 1) 263 | #col <- rgb(v2 + 5, v2 + 30, v1, trans, maxColorValue = 255) 264 | 265 | # magenta / light green 266 | #v1 <- 230 + sample(seq(1:25), 1) 267 | #v2 <- v1 - 60 - sample(seq(1:110), 1) 268 | #col <- rgb(v2, v1, v2, maxColorValue = 255) 269 | 270 | # green trans ++ / magenta <-- node.js 271 | #v1 <- 220 + sample(seq(1:35), 1) 272 | #v2 <- v1 - 90 - sample(seq(1:60), 1) 273 | #col <- rgb(v1, v2, v1, trans, maxColorValue = 255) 274 | 275 | # red trans ++ / aqua <-- disk 276 | #v1 <- 230 + sample(seq(1:25), 1) 277 | #v2 <- sample(seq(1:125), 1) 278 | #col <- rgb(v2, v1, v1, trans, maxColorValue = 255) 279 | 280 | # blue trans ++ / yellow <-- mysql 281 | v1 <- 180 + sample(seq(1:55), 1) 282 | v2 <- v1 - 100 - sample(seq(1:80), 1) 283 | col <- rgb(v1, v1 - 20, v2, trans, maxColorValue = 255) 284 | 285 | # turquoise / pink 286 | #v1 <- 230 + sample(seq(1:25), 1) 287 | #v2 <- v1 - 60 - sample(seq(1:90), 1) 288 | #col <- rgb(v1, v2, v2, maxColorValue = 255) 289 | 290 | plot(den, main = mtitle, xlab = xtitle, ylab = ytitle, 291 | lwd = lwidth, fg = NA, xlim = xlim, ylim = ylim) 292 | if (fill) { 293 | polygon(c(min(den$x), den$x, max(den$x)), 294 | c(0, den$y, 0), col = col) 295 | plot(den, main = mtitle, xlab = xtitle, ylab = ytitle, 296 | lwd = lwidth, fg = NA, xlim = xlim, ylim = ylim, 297 | col = "white") 298 | } 299 | if (rug) { 300 | rug(data, lwd = lwidth, ticksize = 0.046, col = col, 301 | xlim = xlim) 302 | } 303 | 304 | # frequency trail 305 | } else if (density == 2) { 306 | # walk the density values and maintain a state based on 307 | # y height, drawing lines or rugs when the state changes. 308 | plot(den, main = mtitle, xlab = xtitle, ylab = ytitle, 309 | lwd = 1, col = NA, fg = NA, xlim = xlim, ylim = ylim) 310 | state <- 0 # 0 line, 1 rug 311 | bx <- den$x[1] 312 | by <- den$y[1] 313 | minden <- min(den$y) 314 | maxx <- max(den$x) 315 | threshold = 3 * maxden / pngheight 316 | 317 | for (i in 1:512) { 318 | if (i == 512) { 319 | # force plot on final point 320 | if (state == 0) { den$y[i] = 0 } 321 | if (state == 1) { den$y[i] = 1.1 * threshold } 322 | } 323 | 324 | if (den$y[i] > threshold) { 325 | if (state == 1) { 326 | if (rug) { 327 | rdata <- data[data >= bx] 328 | rdata <- rdata[data < den$x[i]] 329 | rug(rdata, lwd=lwidth, 330 | ticksize <- 0.049, 331 | col="black", xlim=xlim) 332 | } 333 | state <- 0 334 | bx <- den$x[i] 335 | by <- den$y[i] 336 | } 337 | 338 | } else { 339 | if (state == 0) { 340 | nn <- 1 + round(512 * (den$x[i] - bx) / maxx) 341 | sden <- density(data, adjust = denadj, n = nn, 342 | from = bx, to = den$x[i], cut=0) 343 | if (weight) { sden$y <- sden$y * sden$x } 344 | if (fill) { 345 | polygon(c(bx, sden$x, den$x[i]), 346 | c(0, sden$y, 0), col = "white") 347 | } 348 | lines(sden, main = " ", lwd = lwidth, 349 | fg = NA, col = "black", 350 | xlim=xlim, ylim=ylim) 351 | 352 | state <- 1 353 | bx <- den$x[i] 354 | by <- den$y[i] 355 | } 356 | } 357 | } 358 | } 359 | 360 | # calculate statistics 361 | min <- min(data) 362 | max <- max(data) 363 | mad <- mad(data) 364 | var <- var(data) 365 | percentiles <- quantile(data, c(0.9, 0.99, 0.999, 0.9999, 0.99999, 0.999999)) 366 | apercentiles <- quantile(data, seq(0.01, 0.99, 0.01), names = TRUE) 367 | quartiles <- quantile(data, c(0.25, 0.75), names = FALSE) 368 | iqr <- IQR(data) 369 | prange = apercentiles[55] - apercentiles[45] 370 | median <- median(data) 371 | skewness <- skewness(data) 372 | kurtosis <- kurtosis(data) 373 | diptest <- dip(data) 374 | pstddev <- stddev * sqrt((N - 1) / N) 375 | maxsigma <- (max - mean) / stddev 376 | minsigma <- (mean - min) / stddev 377 | madmax <- (max - median) / mad 378 | madmin <- (median - min) / mad 379 | bimodalc <- ((skewness^2) + 1) / kurtosis 380 | bimodalcf <- ((skewness^2) + 1) / 381 | (kurtosis + 3 * ((N - 1)^2) / ((N - 2) * (N - 3))) 382 | cov <- stddev / mean 383 | 384 | # calculate madv, macdf, sacdf 385 | madv <- 0 386 | macdf <- 0 387 | sacdf <- 0 388 | stddevconn <- 0 389 | for(i in 1:N) { 390 | if (i > 1) { 391 | d <- abs(data[i] - data[i - 1]) 392 | macdf <- macdf + d 393 | sacdf <- sacdf + d^2 394 | } 395 | if (data[i] > (mean - stddev/2) & (data[i] < (mean + stddev/2))) { 396 | stddevconn <- stddevconn + 1 397 | } 398 | madv <- madv + abs(data[i] - mean) 399 | } 400 | stddevcon <- stddevconn / N 401 | macdf <- macdf / (N - 1) 402 | sacdf <- sqrt(sacdf / (N - 1)) 403 | madv <- madv / N 404 | 405 | # calculate mvalue 406 | maxmvalue <- 0 407 | for (a in c(2, 3, 5, 7, 10, 15, 20, 30)) { 408 | # try various bandwidths, starting at 2x, and keep highest mvalue 409 | by <- 0 410 | mvalue <- 0 411 | den <- density(data, adjust = denadj * a) 412 | if (weight) { den$y <- den$y * den$x } 413 | maxd <- max(den$y) 414 | for (i in 1:length(den$x)) { 415 | mvalue <- mvalue + abs(den$y[i] / maxd - by) 416 | by <- den$y[i] / maxd 417 | } 418 | if (mvalue > maxmvalue) { maxmvalue <- mvalue } 419 | } 420 | mvalue <- maxmvalue 421 | 422 | # print statistics 423 | printf("\n%-42s %d\n", "N", N) 424 | printf("%-42s %.2f\n", "min", min) 425 | printf("%-42s %.2f\n", "mean", mean) 426 | printf("%-42s %.2f\n", "median", median) 427 | printf("%-42s %.2f\n", "max", max) 428 | printf("%-42s %.2f\n", "max sigma", maxsigma) 429 | printf("%-42s %.2f\n", "min sigma", minsigma) 430 | printf("%-42s %.2f\n", "mad max", madmax) 431 | printf("%-42s %.2f\n", "mad min", madmin) 432 | printf("%-42s %.2f\n", "sample standard deviation", stddev) 433 | printf("%-42s %.2f\n", "population standard deviation", pstddev) 434 | printf("%-42s %.2f\n", "coefficient of variation", cov) 435 | printf("%-42s %.2f\n", "variance/mean", var / mean) 436 | printf("%-42s %.2f\n", "median absolute deviation", mad) 437 | printf("%-42s %.2f\n", "mean absolute deviation", madv) 438 | printf("%-42s %.2f\n", "mean absolute consecutive difference", macdf) 439 | printf("%-42s %.2f\n", "standard absolute consecutive difference", sacdf) 440 | printf("%-42s %.2f\n", "90th percentile", percentiles[1]) 441 | printf("%-42s %.2f\n", "99th percentile", percentiles[2]) 442 | printf("%-42s %.2f\n", "99.9th percentile", percentiles[3]) 443 | printf("%-42s %.2f\n", "99.99th percentile", percentiles[4]) 444 | printf("%-42s %.2f\n", "99.999th percentile", percentiles[5]) 445 | printf("%-42s %.2f\n", "99.9999th percentile", percentiles[6]) 446 | printf("%-42s %.2f\n", "25% quartile", quartiles[1]) 447 | printf("%-42s %.2f\n", "75% quartile", quartiles[2]) 448 | printf("%-42s %.2f\n", "inter quartile range", iqr) 449 | printf("%-42s %.2f\n", "45%-55% percentile range", prange) 450 | printf("%-42s %.2f\n", "skewness", skewness) 451 | printf("%-42s %.2f\n", "kurtosis", kurtosis) 452 | printf("%-42s %.2f\n", "bimodality coefficient", bimodalc) 453 | printf("%-42s %.2f\n", "bimodality coefficient finite sample", bimodalcf) 454 | printf("%-42s %.3f\n", "dip test statistic", diptest) 455 | printf("%-42s %.3f\n", "mvalue (y difference)", mvalue) 456 | printf("%-42s %.2f\n", "macdf/mean", macdf / mean) 457 | printf("%-42s %.2f\n", "sacdf/mean", sacdf / mean) 458 | printf("%-42s %.2f\n", "madv/stddev", madv / stddev) 459 | 460 | # print table output 461 | printf("\nHEAD type trim outliers N min mean median max minsigma maxsigma madmax iqr prange mad stddev cov skewness kurtosis bimodalcf madv macdf sacdf stddevcon diptest\n"); 462 | printf("DATA %d %d %s %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.3f\n", 463 | type, trim, outliers, N, min, mean, median, max, minsigma, maxsigma, madmax, iqr, prange, mad, stddev, cov, skewness, kurtosis, bimodalcf, madv, macdf, sacdf, stddevcon, diptest); 464 | 465 | if (extra) { 466 | print(shapiro.test(data)) 467 | library(ADGofTest) 468 | print(ad.test(data, pnorm, 0, max(data))) 469 | print(ks.test(data, "pnorm", mean = mean, sd = stddev)) 470 | print(dip(data, full = "all")) 471 | } 472 | 473 | if (numbered) { 474 | # 475 | # Some awful code. Ideally we'd create a text variable with whatever 476 | # we want printed, then mtext() would place it right-aligned at a 477 | # _reasonable_ spacing to the plot. I've never got that to work. 478 | # Instead, I let mtext() place it left-aligned, and achieve right- 479 | # alignment by padding the text variable with spaces. Two spaces 480 | # for each digit, since it is variable width. 481 | # 482 | 483 | if (num_maxsigma) { 484 | num <- maxsigma 485 | # 2 dec places, up to 99; %5.2f has crooked alignment 486 | if (num < 10) { text <- sprintf(" %.2f", num) } 487 | else { text <- sprintf("%.2f", num) } 488 | } 489 | 490 | if (num_mvalue) { 491 | num <- mvalue 492 | # 2 dec places, up to 99; %5.2f has crooked alignment 493 | if (num < 10) { text <- sprintf(" %.2f", num) } 494 | else { text <- sprintf("%.2f", num) } 495 | } 496 | 497 | if (num_max) { 498 | num <- max / num_factor 499 | text <- ""; x <- round(num) 500 | if (x == 0) { x <- 1 } 501 | while (x < 1000) { 502 | text <- paste(text, " ", sep = "") 503 | x <- x * 10 504 | } 505 | text <- paste(text, sprintf("%d", round(num)), sep = "") 506 | } 507 | if (num_max) { 508 | num <- max / num_factor 509 | text <- ""; x <- round(num) 510 | if (x == 0) { x <- 1 } 511 | while (x < 1000) { 512 | text <- paste(text, " ", sep = "") 513 | x <- x * 10 514 | } 515 | text <- paste(text, sprintf("%d", round(num)), sep = "") 516 | } 517 | 518 | # padj = 2 for pngheight 120; 3.85 for pngheight 220; 1.5 centered 519 | # col = white for filled; black for trail; 520 | mtext(text, side = 4, las = 1, cex = 3, adj = 0.5, padj = 3.85, 521 | col = "white") 522 | } 523 | 524 | # plot statistics 525 | if (statlines) { 526 | abline(v=mean, col="black", lwd=1, lty="dashed") 527 | abline(v=mean + stddev, col="black", lty="dotted") 528 | abline(v=mean - stddev, col="black", lty="dotted") 529 | abline(v=percentiles[2], col="black", lty="1A") 530 | abline(v=mean + 6 * stddev, col="black", lty="4A") 531 | legend("topright", 532 | c("mean", "stddev", "99th pct", expression(6 * sigma)), 533 | lty=c("dashed", "dotted", "1A", "4A"), 534 | lwd=1) 535 | } 536 | 537 | if (centermean) { 538 | lines(x = c(mean, mean), y = c(0, maxden), 539 | col = "white", lwd = 8, lend = 1) 540 | } 541 | 542 | if (plines) { 543 | lines(x = c(percentiles[1], percentiles[1]), y = c(0, maxden / 4), 544 | col = "white", lwd = 4) 545 | lines(x = c(percentiles[2], percentiles[2]), y = c(0, maxden / 4), 546 | col = "white", lwd = 4) 547 | lines(x = c(percentiles[3], percentiles[3]), y = c(0, maxden / 4), 548 | col = "white", lwd = 4) 549 | } 550 | 551 | dev.off() 552 | printf("\n%s written.\n", outfile) 553 | 554 | # create symlinks 555 | if (symlink) { 556 | print("making symlinks...") 557 | inf <- basename(infile) 558 | 559 | # create ordered max pngs 560 | link <- sprintf("max_%016d_%s%d.png", round(max), inf, type) 561 | system(sprintf("ln -s %s %s", outfile, link)) 562 | 563 | # create ordered maxsigma pngs 564 | link <- sprintf("maxsigma_%03d.%06d_%s%d.png", 565 | floor(maxsigma), round(1000000 * (maxsigma %% 1)), inf, type) 566 | system(sprintf("ln -s %s %s", outfile, link)) 567 | 568 | # create ordered bimodalcf pngs 569 | link <- sprintf("bimodalcf_%03d.%06d_%s%d.png", 570 | floor(bimodalcf), round(1000000 * (bimodalcf %% 1)), inf, type) 571 | system(sprintf("ln -s %s %s", outfile, link)) 572 | 573 | # create ordered diptest pngs 574 | link <- sprintf("diptest_%03d.%06d_%s%d.png", 575 | floor(diptest), round(1000000 * (diptest %% 1)), inf, type) 576 | system(sprintf("ln -s %s %s", outfile, link)) 577 | 578 | # create ordered cov pngs 579 | link <- sprintf("cov_%03d.%06d_%s%d%s.png", 580 | floor(cov), round(1000000 * (cov %% 1)), inf, type, trim) 581 | system(sprintf("ln -s %s %s", outfile, link)) 582 | 583 | # create ordered mvalue pngs 584 | link <- sprintf("ydiff_%03d.%06d_%s%d%s.png", 585 | floor(mvalue), round(1000000 * (mvalue %% 1)), inf, type, trim) 586 | system(sprintf("ln -s %s %s", outfile, link)) 587 | } 588 | -------------------------------------------------------------------------------- /da/waterfall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/ksh 2 | # 3 | # waterfall.sh Run a series of dist.r plots and create 4 | # composite waterfall plots 5 | # 6 | # requires: ImageMagick (convert), R. 7 | # 8 | # 01-Jun-2013 Brendan Gregg Created this. 9 | 10 | # 11 | # Parameters 12 | # 13 | 14 | # synthetic 15 | dists="0 1 2" 16 | dists="$dists 100 101 102 103 110 111 112 120 130 131" 17 | dists="$dists 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155" 18 | dists="$dists 200 201 202 203 204 205 206 210 211 212 213" 19 | dists="$dists 300 301 302 303 304 305 306 307" 20 | dists="$dists 400 401 402 403" 21 | dists="$dists 500 501 502 503" 22 | 23 | # plot type 24 | # trail: density=2 rug=1 25 | # trailfill: density=1 rug=1 same=1 26 | # dotfill: density=2 rug=1 same=1 27 | # dash: density=2 rug=0 28 | # line: density=1 rug=0 29 | # rugfill: density=1 rug=1 30 | # hist: density=0 rug=0 31 | # histrug: density=0 rug=1 32 | # density types: 0 = hist, 1 = line, 2 = frequency trail 33 | density=1 34 | rug=1 35 | same=1 36 | 37 | # input 38 | runtype=1 # 1 = synth, 2 = random, 3 = infiles 39 | infiles='../dilt03/*' 40 | dist=601 41 | trimlist="2" # 0 = none, 1 = sd, 2 = iqr, 3 = maxtrim 42 | maxtrim=1000000 43 | weight=0 44 | 45 | # execution 46 | outdir=working 47 | maxpng=35 48 | makepng=1 49 | makecomposites=1 50 | parallel=5 51 | stride=1 52 | N=5000 53 | 54 | # layout 55 | yoffset=55 56 | xoffset=0 57 | ypad=10 58 | xpad=40 59 | width=2300 60 | pngwidth=2400 61 | pngheight=220 62 | lwd=2 63 | 64 | # 65 | # Combinations 66 | # 67 | 68 | # modal vertical: 69 | #density=2; rug=1; same=0; trimlist=1; weight=0 70 | #yoffset=50; xoffset=0; pngheight=220; lwd=8 71 | 72 | # 20ms vertical compact: 73 | #density=2; rug=1; same=0; trimlist=2; maxtrim=20000; weight=0 74 | #yoffset=20; xoffset=0; pngheight=220; lwd=8 75 | 76 | # 100ms staggered compact: 77 | #density=2; rug=1; same=0; trimlist=2; maxtrim=100000; weight=0 78 | #yoffset=20; xoffset=10; pngheight=220; lwd=8 79 | 80 | # 100ms staggered compact filled - datacenter outliers: 81 | #density=1; rug=1; same=0; trimlist=2; maxtrim=100000; weight=0 82 | #yoffset=20; xoffset=10; pngheight=220; lwd=8 83 | 84 | # 20ms diagonal: 85 | #density=2; rug=1; same=0; trimlist=2; maxtrim=20000; weight=0 86 | #yoffset=30; xoffset=30; pngheight=220; lwd=8 87 | 88 | # outlier detection 89 | #density=1; rug=1; same=1; trimlist=0; weight=0 90 | #yoffset=55; xoffset=0; pngheight=220; lwd=8; N=50000 91 | 92 | # modal colored 93 | #density=1; rug=1; same=1; trimlist=0; weight=0 94 | #yoffset=55; xoffset=0; pngheight=220; lwd=2; N=10000 # top yoffset=205 95 | #incl. white line after polygon 96 | 97 | mkdir -p $outdir 98 | cd $outdir 99 | echo output directory: $outdir 100 | 101 | if (( makepng )); then 102 | rm dist_*png 103 | rm max_*png 104 | rm maxsigma_*png 105 | rm bimodalcf_*png 106 | rm diptest_*png 107 | rm cov_*png 108 | rm ydiff_*png 109 | fi 110 | 111 | # onedistpng 112 | # 113 | # environment: dist trim maxtrim pfile density weight infile maxtrim 114 | # pngwidth pngheight lwd 115 | # 116 | function onedistpng { 117 | echo 'source("../dist.r")' | \ 118 | TYPE=$dist TRIM=$trim OUTFILE=$pfile PNG=1 LABELS=0 N=$N \ 119 | DENSITY=$density LWD=$lwd RUG=$rug FILL=1 TRANS=1 WEIGHT=$weight \ 120 | RANDOM=0 INFILE=$infile MAXTRIM=$maxtrim SYMLINK=1 \ 121 | PNGWIDTH=$pngwidth PNGHEIGHT=$pngheight \ 122 | R --no-save 2>/dev/null | \ 123 | grep DATA | sed 's/[^ ]* //' 124 | 125 | # was +88 126 | convert $pfile -crop ${width}x$((pngheight - pngheight/24 - 1))+50+1 \ 127 | $pfile 128 | 129 | # same color 130 | if (( same )); then convert $pfile -negate $pfile; fi 131 | } 132 | 133 | # make synthetic dist pngs 134 | function makesynth { 135 | for trim in $trimlist; do 136 | i=1; j=1 137 | for dist in $dists; do 138 | if (( i++ > maxpng )); then wait; continue; fi 139 | if (( $trim )); then 140 | pfile=`printf "dist_%03dt.png" $dist` 141 | else 142 | pfile=`printf "dist_%03df.png" $dist` 143 | fi 144 | onedistpng & 145 | if (( j++ >= $parallel )); then j=1; wait; fi 146 | done 147 | wait 148 | done 149 | } 150 | 151 | # make random dist pngs 152 | function makerandom { 153 | for trim in $trimlist; do 154 | i=1; j=1 155 | while (( i < maxpng )); do 156 | if (( $trim )); then 157 | pfile=`printf "dist_%03dt.png" $i` 158 | else 159 | pfile=`printf "dist_%03df.png" $i` 160 | fi 161 | (( dist = 1030 + i )) 162 | onedistpng & 163 | if (( j++ >= $parallel )); then j=1; wait; fi 164 | if (( i++ >= maxpng )); then wait; continue; fi 165 | done 166 | wait 167 | done 168 | } 169 | 170 | # make actual dist pngs 171 | function makereal { 172 | for trim in $trimlist; do 173 | i=1; j=1 174 | for infile in $infiles; do 175 | if (( i++ > maxpng )); then wait; continue; fi 176 | if (( $trim )); then 177 | pfile=dist_${infile##*/}t.png 178 | else 179 | pfile=dist_${infile##*/}f.png 180 | fi 181 | onedistpng & 182 | if (( j++ >= $parallel )); then j=1; wait; fi 183 | done 184 | wait 185 | done 186 | } 187 | 188 | # main 189 | if (( makepng )); then 190 | (( runtype == 1 )) && makesynth 191 | (( runtype == 2 )) && makerandom 192 | (( runtype == 3 )) && makereal 193 | fi 194 | if (( !makecomposites )); then exit; fi 195 | 196 | # makecomposite 197 | # 198 | # environment: ypad xpad yoffset xoffset 199 | # input: name 200 | # 201 | function makecomposite { 202 | name=$1 203 | (( maxy = pngheight + ypad * 2 )) 204 | (( maxx = width + xpad * 2 - xoffset )) 205 | i=1; j=1; y=$ypad; files= 206 | 207 | for f in ${name}_*png; do 208 | if (( j++ >= stride )); then j=1; else continue; fi 209 | if (( i++ > maxpng )); then break; fi 210 | 211 | (( maxy += yoffset )) 212 | (( maxx += xoffset )) 213 | files="$files $f" 214 | done 215 | 216 | (( x = maxx - width - xpad )) 217 | dest=waterfall_${name}.png 218 | im="" 219 | 220 | for f in $files; do 221 | if [ -e $f ]; then 222 | im="$im $f -geometry +$x+$y -composite" 223 | fi 224 | (( y += yoffset )) 225 | (( x -= xoffset )) 226 | done 227 | 228 | echo making composite $dest 229 | convert -size ${maxx}x$maxy canvas:transparent $im $dest 230 | } 231 | 232 | # make composites 233 | for trim in $trimlist; do eval trim$trim=1; done 234 | (( trim1 || trim2 )) && makecomposite bimodalcf & 235 | (( trim1 || trim2 )) && makecomposite diptest & 236 | (makecomposite max) & 237 | (makecomposite maxsigma) & 238 | (makecomposite cov) & 239 | (makecomposite ydiff) & 240 | wait 241 | 242 | function negate { 243 | file=$1 244 | white=white$2 245 | black=black$2 246 | gray=gray$2 247 | whitefill=whitefill$2 248 | blackfill=blackfill$2 249 | whitefile=${file%.png}_$white.png 250 | blackfile=${file%.png}_$black.png 251 | grayfile=${file%.png}_$gray.png 252 | whitefillfile=${file%.png}_$whitefill.png 253 | blackfillfile=${file%.png}_$blackfill.png 254 | if (( same )); then 255 | convert $file -background white -alpha remove -alpha off \ 256 | $whitefillfile 257 | convert $whitefillfile -negate $blackfillfile 258 | return 259 | fi 260 | convert $file -background white -alpha remove -alpha off $whitefile 261 | convert $whitefile -negate $blackfile 262 | convert $file -background '#909090' -alpha remove -alpha off $grayfile 263 | } 264 | 265 | function rugnegate { 266 | file=$1 267 | white=white$2 268 | black=black$2 269 | whitefill=whitefill$2 270 | blackfill=blackfill$2 271 | nfile=${file%.png}_negate.png 272 | whitefile=${file%.png}_$white.png 273 | blackfile=${file%.png}_$black.png 274 | whitefillfile=${file%.png}_$whitefill.png 275 | blackfillfile=${file%.png}_$blackfill.png 276 | if (( same )); then 277 | convert $file -background white -alpha remove -alpha off \ 278 | $whitefillfile 279 | convert $whitefillfile -negate $blackfillfile 280 | return 281 | fi 282 | convert $file -negate $nfile 283 | convert $nfile -background white -alpha remove -alpha off $whitefile 284 | convert $whitefile -negate $blackfile 285 | } 286 | 287 | # negations 288 | echo making negations 289 | if (( rug && density < 2 )); then 290 | nfunc=rugnegate; name=rugfill 291 | elif (( density == 2 )); then 292 | nfunc=negate; name=dot 293 | else 294 | nfunc=negate; name=line 295 | fi 296 | $nfunc waterfall_max.png $name & 297 | $nfunc waterfall_maxsigma.png $name & 298 | (( trmi1 || trim2 )) && $nfunc waterfall_bimodalcf.png $name & 299 | (( trmi1 || trim2 )) && $nfunc waterfall_diptest.png $name & 300 | $nfunc waterfall_cov.png $name & 301 | $nfunc waterfall_ydiff.png $name & 302 | wait 303 | -------------------------------------------------------------------------------- /data.txt: -------------------------------------------------------------------------------- 1 | N Result 2 | 1 8.672 3 | 2 17.48 4 | 3 26.9 5 | 4 35.28 6 | 5 41.96 7 | 6 48.22 8 | 7 53.62 9 | 8 57.2 10 | 9 59.26 11 | 10 60.16 12 | 11 61.46 13 | 12 62.4 14 | 13 63.66 15 | 14 64.38 16 | 15 65.36 17 | 16 65.9 18 | -------------------------------------------------------------------------------- /frequencytrail.r: -------------------------------------------------------------------------------- 1 | # frequencytrail.r Example frequency trail implementation. 2 | # 3 | # This implementation takes a density plot with a high resolution, and 4 | # removes lines that are below a minimum threshold, by setting their value 5 | # to NA. This hides the zero probability line, and provide a coarse (but 6 | # probably sufficient) view of distribution outliers. 7 | # 8 | # 08-Jun-2013 Brendan Gregg Created this. 9 | 10 | pdf("frequencytrailtest.pdf", w=8, h=4) 11 | 12 | # plot a data frame as a frequency trail 13 | plotfrequencytrail <- function(data) { 14 | n <- 2048 # resolution 15 | lwd <- 4 # line width 16 | 17 | # threshold. todo: improve this calculation to be more robust. 18 | thr <- 1 / (sd(data) * length(data)) 19 | 20 | den <- density(data, n=n) 21 | plot(den, col=NA, fg=NA) 22 | 23 | # replace low frequency with NA to avoid plotting 24 | for (i in 1:n) { if (den$y[i] < thr) { den$y[i] = NA } } 25 | 26 | lines(den, lwd=lwd) 27 | } 28 | 29 | # data set is a normal distribution plus outliers 30 | data <- c(rnorm(9900, mean=1000, sd=100), 31 | runif(10, min=2000, max=10000)) 32 | 33 | plotfrequencytrail(data) 34 | -------------------------------------------------------------------------------- /frequencytrailtest.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brendangregg/PerfModels/8612f83119869e510e29196c6c49743445ae4559/frequencytrailtest.pdf -------------------------------------------------------------------------------- /linear.g: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | # 3 | # linear.g Linear scalability model using gnuplot. 4 | # 5 | # This applies a linear scalability model to an input data set. It uses 6 | # regression analysis to determine the constants. Two linear functions 7 | # are plotted: Linear(N), which fits the input set; and LinearN1(N), which 8 | # uses N=1 only. 9 | # 10 | # USAGE: ./linear.g 11 | # 12 | # See the "tunables" section for defining the input data file, and the number 13 | # of rows to include as model input. The remainder of rows are drawn as 14 | # "extra" data points. The file has the form: 15 | # 16 | # N Result 17 | # 1 2.1 18 | # 2 4.0 19 | # 3 5.9 20 | # ... 21 | # 22 | # The row order can be rearranged to customize the model input. 23 | # 24 | # Copyright 2012 Brendan Gregg. All rights reserved. 25 | # 26 | # CDDL HEADER START 27 | # 28 | # The contents of this file are subject to the terms of the 29 | # Common Development and Distribution License (the "License"). 30 | # You may not use this file except in compliance with the License. 31 | # 32 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 33 | # or http://www.opensolaris.org/os/licensing. 34 | # See the License for the specific language governing permissions 35 | # and limitations under the License. 36 | # 37 | # When distributing Covered Code, include this CDDL HEADER in each 38 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 39 | # If applicable, add the following below this CDDL HEADER, with the 40 | # fields enclosed by brackets "[]" replaced with your own identifying 41 | # information: Portions Copyright [yyyy] [name of copyright owner] 42 | # 43 | # CDDL HEADER END 44 | # 45 | # 03-May-2012 Brendan Gregg Created this. 46 | 47 | set terminal x11 font "arial,14" # designed for x11 (redraws) 48 | set autoscale 49 | 50 | # tunables 51 | filename = "data.txt" # data file 52 | inputN = 6 # rows to include as model input 53 | scale = 1.5 # scale graph beyond data points 54 | set grid 55 | 56 | set xlabel "CPUs (N)" 57 | set ylabel "Throughput" 58 | set title "Linear Scalability" 59 | set key on right bottom 60 | set pointsize 2 61 | 62 | # read N1, the first value for normalizing the plot (workaround) 63 | plot filename using 1:(N1 = $2, 0/0) every 1:1:1:0:1:0 notitle, '' using 1:($2 / N1) with linespoints 64 | 65 | # Linear, N1 only 66 | linearN1(N) = N1 * N 67 | 68 | # Linear, input set 69 | alpha = 0.9 70 | linear(N) = N1 * alpha * N 71 | 72 | # regression fitting 73 | fit linear(x) filename every ::1::inputN using 1:2 via alpha 74 | 75 | # plot data points 76 | plot filename using 1:2 with points pt 6 lc rgb "#f00000" title "extra measurements",\ 77 | filename every ::1::inputN using 1:2 with points pt 6 lc rgb "#000000" title "input for Linear(N)" 78 | set label sprintf("a = %.4f", alpha) at graph 0.5, 0.075 center 79 | set yrange [0:GPVAL_DATA_Y_MAX * scale] 80 | set xrange [0:GPVAL_DATA_X_MAX * scale] 81 | 82 | # plot curves 83 | replot linear(x) with line lc rgb "#000000" title "Linear(N)" 84 | replot linearN1(x) with line lc rgb "#a0a0a0" title "LinearN1(N)" 85 | 86 | pause -1 "Hit return to continue" 87 | -------------------------------------------------------------------------------- /scale.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brendangregg/PerfModels/8612f83119869e510e29196c6c49743445ae4559/scale.pdf -------------------------------------------------------------------------------- /scale.r: -------------------------------------------------------------------------------- 1 | # scale.r Amdahl's law and USL scalability using R statistics. 2 | # 3 | # This applies both Amdahl's law to model scalability (maximum speedup) and 4 | # Universal Scalability Law to the input data set. It uses regression 5 | # analysis to determine the constants. 6 | # 7 | # USAGE: R --save < scale.r # generates scale.pdf 8 | # 9 | # See the "Tunables" section for defining the input data file, and the number 10 | # of rows to include as model input. The remainder of rows are drawn as 11 | # "extra" data points. The file has the form: 12 | # 13 | # N Result 14 | # 1 2.1 15 | # 2 4.0 16 | # 3 5.9 17 | # ... 18 | # 19 | # The heading line is important (processed by R). 20 | # 21 | # BASED ON: USLcalc.r by Dr. Neil Gunther. 22 | # 23 | # SEE ALSO: http://www.perfdynamics.com/Manifesto/USLscalability.html 24 | # 25 | # Copyright 2012 Brendan Gregg. All rights reserved. 26 | # 27 | # CDDL HEADER START 28 | # 29 | # The contents of this file are subject to the terms of the 30 | # Common Development and Distribution License (the "License"). 31 | # You may not use this file except in compliance with the License. 32 | # 33 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 34 | # or http://www.opensolaris.org/os/licensing. 35 | # See the License for the specific language governing permissions 36 | # and limitations under the License. 37 | # 38 | # When distributing Covered Code, include this CDDL HEADER in each 39 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 40 | # If applicable, add the following below this CDDL HEADER, with the 41 | # fields enclosed by brackets "[]" replaced with your own identifying 42 | # information: Portions Copyright [yyyy] [name of copyright owner] 43 | # 44 | # CDDL HEADER END 45 | # 46 | # 20-Oct-2012 Brendan Gregg Created this. 47 | 48 | # Tunables 49 | filename <- "data.txt" # data file (see top comment for format) 50 | inputN <- 10 # rows to include as model input 51 | padding <- 1.1 # chart padding 52 | pdf("scale.pdf", w=10, h=6) # comment for interactive 53 | 54 | # Input 55 | input_full <- read.table(filename, header=TRUE) 56 | input_model <- subset(input_full, input_full$N <= inputN) 57 | input_extra <- subset(input_full, input_full$N > inputN) 58 | 59 | # Calculate normalization rate based on 1st datum 60 | input_model$Norm <- input_model$Result/input_model$Result[1] 61 | 62 | # Regression analysis: standard non-linear least squares (NLS) fit 63 | amdahl <- nls(Norm ~ N / (1 + alpha * (N - 1)), 64 | input_model, start=c(alpha=0.1)) 65 | usl <- nls(Norm ~ N / (1 + alpha * (N - 1) + beta * N * (N - 1)), 66 | input_model, start=c(alpha=0.1, beta=0.01)) 67 | 68 | # Print parameters 69 | print(summary(amdahl)) 70 | print(coef(amdahl)) 71 | amdahls.coef <- coef(amdahl) 72 | print(summary(usl)) 73 | print(coef(usl)) 74 | usls.coef <- coef(usl) 75 | 76 | # Chart padding 77 | max_x <- padding * max(input_full$N) 78 | max_y <- padding * max(input_full$Result) 79 | 80 | # Plot model results 81 | plot(x <- c(0:max_x), input_model$Result[1] * x / 82 | (1 + usls.coef['alpha'] * (x - 1) + usls.coef['beta'] * x * (x - 1)), 83 | type="l", lty=2, lwd=1, 84 | xlim=c(0, max_x), ylim=c(0, max_y), 85 | xlab="CPUs (N)", ylab="Throughput X(N)") 86 | points(x <- c(0:max_x), 87 | input_model$Result[1] * x / (1 + amdahls.coef['alpha'] * (x - 1)), 88 | type="l", lty=3, lwd=1) 89 | 90 | # Plot data 91 | points(input_model$N, input_model$Result, pch=1) 92 | points(input_extra$N, input_extra$Result, pch=4) 93 | 94 | title("Scalability Models") 95 | legend("bottomright", c("model input", "extra measurements"), pch=c(1,4)) 96 | legend("bottom", c("Amdahl", "USL"), lty=c(3,2)) 97 | -------------------------------------------------------------------------------- /tools/README: -------------------------------------------------------------------------------- 1 | Basic tools. 2 | -------------------------------------------------------------------------------- /tools/interval.r: -------------------------------------------------------------------------------- 1 | # interval.r R line graph of interval measurements. 2 | # 3 | # USAGE: R --no-save < interval.r 4 | # 5 | # This time I'm putting it on github where I won't lose it. 6 | # 7 | # Input is a single column of measurements, taken at a known interval. The 8 | # number of input elements, interval, and column number can be customized (see 9 | # the N, interval, and data variables). 10 | # 11 | # 17-Jun-2014 Brendan Gregg Created this. 12 | 13 | filename <- "data.txt" 14 | pdf("interval.pdf", w=10, h=6) 15 | N <- 121 # max number of elements 16 | interval <- 5 # interval 17 | xlab <- "Time (secs)" # x-axis label 18 | ylab <- "Measurement" # y-axis label 19 | title <- "Plot of data.txt" # plot title 20 | 21 | input <- read.table(filename, header=FALSE, nrows=N) 22 | data <- input$V2 # use 2nd column 23 | xaxis <- seq(0, N * interval - interval, interval) 24 | 25 | # type: p=points, l=lines, n=none, o=overplotted, b=both; cex=size 26 | plot(xaxis, data, main=title, type="o", cex=0.6, xlab=xlab, ylab=ylab) 27 | 28 | grid(col = "lightgray", lty = "dotted", lwd = par("lwd"), equilogs = TRUE) 29 | 30 | -------------------------------------------------------------------------------- /tools/scatter.r: -------------------------------------------------------------------------------- 1 | # scatter.r R scatter plot hello world. 2 | # 3 | # USAGE: R --no-save < scatter.r 4 | # 5 | # This time I'm putting it on github where I won't lose it. 6 | # 7 | # Input is two columns, for time (seconds) and latency (ms). 8 | # 9 | # 17-Jun-2014 Brendan Gregg Created this. 10 | 11 | filename <- "scatter.txt" 12 | pdf("scatter.pdf", w=10, h=5) 13 | 14 | # max rows to use 15 | N <- 10000 16 | 17 | data <- read.table(filename, header=FALSE, nrows=N) 18 | N <- length(data) 19 | 20 | # type: p=points, l=lines, n=none, o=overplotted, b=both 21 | plot(data, cex=0.5, xlab="Time (s)", ylab="Latency (ms)") 22 | 23 | grid(col = "lightgray", lty = "dotted", 24 | lwd = par("lwd"), equilogs = TRUE) 25 | -------------------------------------------------------------------------------- /usl.g: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | # 3 | # usl.g USL using gnuplot. 4 | # 5 | # This applies Universal Scalability Law (Dr. Neil J. Gunther) to the input 6 | # data set. It uses regression analysis to determine the constants. 7 | # 8 | # USAGE: ./usl.g 9 | # 10 | # See the "tunables" section for defining the input data file, and the number 11 | # of rows to include as model input (USL insists on a minimum of six). The 12 | # remainder of rows are drawn as "extra" data points. The file has the form: 13 | # 14 | # N Result 15 | # 1 2.1 16 | # 2 4.0 17 | # 3 5.9 18 | # ... 19 | # 20 | # The row order can be rearranged to customize the model input. 21 | # 22 | # SEE ALSO: http://www.perfdynamics.com/Manifesto/USLscalability.html 23 | # 24 | # Copyright 2012 Brendan Gregg. All rights reserved. 25 | # 26 | # CDDL HEADER START 27 | # 28 | # The contents of this file are subject to the terms of the 29 | # Common Development and Distribution License (the "License"). 30 | # You may not use this file except in compliance with the License. 31 | # 32 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 33 | # or http://www.opensolaris.org/os/licensing. 34 | # See the License for the specific language governing permissions 35 | # and limitations under the License. 36 | # 37 | # When distributing Covered Code, include this CDDL HEADER in each 38 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 39 | # If applicable, add the following below this CDDL HEADER, with the 40 | # fields enclosed by brackets "[]" replaced with your own identifying 41 | # information: Portions Copyright [yyyy] [name of copyright owner] 42 | # 43 | # CDDL HEADER END 44 | # 45 | # 03-May-2012 Brendan Gregg Created this. 46 | 47 | set terminal x11 font "arial,14" # designed for x11 (redraws) 48 | set autoscale 49 | 50 | # tunables 51 | filename = "data.txt" # data file 52 | inputN = 6 # rows to include as model input 53 | scale = 1.5 # scale graph beyond data points 54 | set grid 55 | 56 | set xlabel "CPUs (N)" 57 | set ylabel "Throughput" 58 | set title "USL Scalability" 59 | set key on right bottom 60 | set pointsize 2 61 | 62 | # read N1, the first value for normalizing the plot (workaround) 63 | plot filename using 1:(N1 = $2, 0/0) every 1:1:1:0:1:0 notitle, '' using 1:($2 / N1) with linespoints 64 | 65 | # USL 66 | alpha = 0.01 67 | beta = 0.001 68 | usl(N) = N1 * N/(1 + alpha * (N - 1) + beta * N * (N - 1)) 69 | 70 | # regression analysis (non-linear least squares fitting) 71 | fit usl(x) filename every ::1::inputN using 1:2 via alpha, beta 72 | 73 | # plot data points 74 | plot filename using 1:2 with points pt 6 lc rgb "#f00000" title "extra measurements",\ 75 | filename every ::1::inputN using 1:2 with points pt 6 lc rgb "#000000" title "input for USL" 76 | set label sprintf("a = %.4f\nb = %.4f", alpha, beta) at graph 0.5, 0.075 center 77 | set yrange [0:GPVAL_DATA_Y_MAX * scale] 78 | set xrange [0:GPVAL_DATA_X_MAX * scale] 79 | 80 | # plot curves 81 | replot usl(x) with line lc rgb "#000000" title "USL(N)" 82 | 83 | pause -1 "Hit return to continue" 84 | print "$0"; 85 | -------------------------------------------------------------------------------- /util-md1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brendangregg/PerfModels/8612f83119869e510e29196c6c49743445ae4559/util-md1.pdf -------------------------------------------------------------------------------- /util-md1.r: -------------------------------------------------------------------------------- 1 | # util-md1.r Queueing Theory M/D/1 mean response time vs utilization 2 | # 3 | # USAGE: R --save < util-md1.r # generates util-md1.pdf 4 | # 5 | # See the "Tunables" section for defining the mean service time. 6 | # 7 | # Copyright 2012 Brendan Gregg. All rights reserved. 8 | # 9 | # CDDL HEADER START 10 | # 11 | # The contents of this file are subject to the terms of the 12 | # Common Development and Distribution License (the "License"). 13 | # You may not use this file except in compliance with the License. 14 | # 15 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 16 | # or http://www.opensolaris.org/os/licensing. 17 | # See the License for the specific language governing permissions 18 | # and limitations under the License. 19 | # 20 | # When distributing Covered Code, include this CDDL HEADER in each 21 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 22 | # If applicable, add the following below this CDDL HEADER, with the 23 | # fields enclosed by brackets "[]" replaced with your own identifying 24 | # information: Portions Copyright [yyyy] [name of copyright owner] 25 | # 26 | # CDDL HEADER END 27 | # 28 | # 20-Oct-2012 Brendan Gregg Created this. 29 | 30 | # Tunables 31 | svc_ms <- 1 # average disk I/O service time 32 | pdf("util-md1.pdf", w=10, h=6) # comment for interactive 33 | util_min <- 0 34 | util_max <- 100 35 | ms_min <- 0 36 | ms_max <- 10 37 | 38 | # Plot mean response time vs utilization (M/D/1) 39 | plot(x <- c(util_min:util_max), svc_ms * (2 - x/100) / (2 * (1 - x/100)), 40 | type="l", lty=1, lwd=1, 41 | xlim=c(util_min, util_max), ylim=c(ms_min, ms_max), 42 | xlab="Utilization %", ylab="Mean Response Time (ms)") 43 | 44 | # Grids 45 | abline(v=(seq(util_min, util_max, (util_max - util_min) / 10)), 46 | col="lightgray", lty="dotted") 47 | abline(h=(seq(ms_min, ms_max, (ms_max - ms_min) / 10)), 48 | col="lightgray", lty="dotted") 49 | 50 | title("Single Service Queue, Constant Service Times (M/D/1)") 51 | --------------------------------------------------------------------------------