└── workloadSuite ├── Additional_Workloads_Placeholder.tsv ├── CHANGELOG ├── FB-2009_samples_24_times_1hr_0.tsv ├── FB-2009_samples_24_times_1hr_0_first50jobs.tsv ├── FB-2009_samples_24_times_1hr_1.tsv ├── FB-2010_samples_24_times_1hr_0.tsv ├── FB-2010_samples_24_times_1hr_withInputPaths_0.tsv ├── GenerateReplayScript.java ├── HDFSWrite.java ├── LICENSE ├── README ├── WorkGen.java ├── WorkloadSynthesis.pl ├── parse-hadoop-jobhistory.pl ├── randomwriter_conf.xsl ├── scriptsTest ├── inputPath-job-0.txt ├── inputPath-job-1.txt ├── inputPath-job-10.txt ├── inputPath-job-11.txt ├── inputPath-job-12.txt ├── inputPath-job-13.txt ├── inputPath-job-14.txt ├── inputPath-job-15.txt ├── inputPath-job-16.txt ├── inputPath-job-17.txt ├── inputPath-job-18.txt ├── inputPath-job-19.txt ├── inputPath-job-2.txt ├── inputPath-job-20.txt ├── inputPath-job-21.txt ├── inputPath-job-22.txt ├── inputPath-job-23.txt ├── inputPath-job-24.txt ├── inputPath-job-25.txt ├── inputPath-job-26.txt ├── inputPath-job-27.txt ├── inputPath-job-28.txt ├── inputPath-job-29.txt ├── inputPath-job-3.txt ├── inputPath-job-30.txt ├── inputPath-job-31.txt ├── inputPath-job-32.txt ├── inputPath-job-33.txt ├── inputPath-job-34.txt ├── inputPath-job-35.txt ├── inputPath-job-36.txt ├── inputPath-job-37.txt ├── inputPath-job-38.txt ├── inputPath-job-39.txt ├── inputPath-job-4.txt ├── inputPath-job-40.txt ├── inputPath-job-41.txt ├── inputPath-job-42.txt ├── inputPath-job-43.txt ├── inputPath-job-44.txt ├── inputPath-job-45.txt ├── inputPath-job-46.txt ├── inputPath-job-47.txt ├── inputPath-job-48.txt ├── inputPath-job-49.txt ├── inputPath-job-5.txt ├── inputPath-job-6.txt ├── inputPath-job-7.txt ├── inputPath-job-8.txt ├── inputPath-job-9.txt ├── run-job-0.sh ├── run-job-1.sh ├── run-job-10.sh ├── run-job-11.sh ├── run-job-12.sh ├── run-job-13.sh ├── run-job-14.sh ├── run-job-15.sh ├── run-job-16.sh ├── run-job-17.sh ├── run-job-18.sh ├── run-job-19.sh ├── run-job-2.sh ├── run-job-20.sh ├── run-job-21.sh ├── run-job-22.sh ├── run-job-23.sh ├── run-job-24.sh ├── run-job-25.sh ├── run-job-26.sh ├── run-job-27.sh ├── run-job-28.sh ├── run-job-29.sh ├── run-job-3.sh ├── run-job-30.sh ├── run-job-31.sh ├── run-job-32.sh ├── run-job-33.sh ├── run-job-34.sh ├── run-job-35.sh ├── run-job-36.sh ├── run-job-37.sh ├── run-job-38.sh ├── run-job-39.sh ├── run-job-4.sh ├── run-job-40.sh ├── run-job-41.sh ├── run-job-42.sh ├── run-job-43.sh ├── run-job-44.sh ├── run-job-45.sh ├── run-job-46.sh ├── run-job-47.sh ├── run-job-48.sh ├── run-job-49.sh ├── run-job-5.sh ├── run-job-6.sh ├── run-job-7.sh ├── run-job-8.sh ├── run-job-9.sh └── run-jobs-all.sh └── workGenKeyValue_conf.xsl /workloadSuite/Additional_Workloads_Placeholder.tsv: -------------------------------------------------------------------------------- 1 | job0 9 9 1762 0 14347 inputPath1 outputPath1 2 | job1 18 9 970 609 697 inputPath2 outputPath2 3 | job2 20 2 53088744165 1510005522 5759777 inputPath3 outputPath3 4 | job3 22 2 61873748853 1387369304 5840114 inputPath4 outputPath4 5 | job4 25 3 404 22 191 inputPath5 outputPath5 6 | job5 32 7 1483413 0 17296097682 inputPath6 outputPath6 7 | job6 32 0 404 22 191 inputPath7 outputPath7 8 | job7 46 14 25526958 300 24106570 inputPath8 outputPath8 9 | job8 49 3 6913085573 1380194628 2464 inputPath9 outputPath9 10 | job9 56 7 123701 0 1480875 inputPath10 outputPath10 11 | -------------------------------------------------------------------------------- /workloadSuite/CHANGELOG: -------------------------------------------------------------------------------- 1 | 2012-01-27 2 | 3 | Update to Version 1.4. 4 | 5 | FB-2010_samples_24_times_1hr_0.tsv 6 | Added Facebook 2010 workload. 7 | 8 | parse-hadoop-jobhistory.pl 9 | Added Hadoop job history log parsing tools. 10 | 11 | Fully migrated repository and documentation to git hub. 12 | Improved documentation, consolidated git hub wiki as sole 13 | documentation source. 14 | See https://github.com/SWIMProjectUCB/SWIM/wiki. 15 | 16 | README 17 | Changed to reflect the above. 18 | 19 | ---- 20 | 21 | 2011-08-16 22 | 23 | Update to Version 1.3. 24 | 25 | WorkGen.java 26 | Changed floats to doubles. 27 | 28 | GenerateReplayScript.java 29 | Added parameters for 30 | [prefix to workload output in HDFS] 31 | [workload stdout stderr output dir] 32 | [hadoop command] 33 | [path to WorkGen.jar] 34 | [path to workGenKeyValue_conf.xsl] 35 | 36 | scriptsTest/* 37 | Re-generated using the latest GenerateReplayScript.java. 38 | 39 | README 40 | Changed to reflect the above. 41 | 42 | CHANGELOG 43 | Added this file. 44 | 45 | -------------------------------------------------------------------------------- /workloadSuite/FB-2009_samples_24_times_1hr_0_first50jobs.tsv: -------------------------------------------------------------------------------- 1 | job0 49 49 740773 2339561 627471 2 | job1 101 52 736346 1700537 432269 3 | job2 122 21 267631 594312 233549 4 | job3 197 75 233422 42 37 5 | job4 208 11 3623279 0 9838062 6 | job5 349 141 233422 324968 10974 7 | job6 373 24 2497901 0 551304 8 | job7 379 6 233422 256459 8835 9 | job8 463 84 2193083 2660213 560397 10 | job9 487 24 1010097 2369060 968271 11 | job10 497 10 19555 0 41471 12 | job11 609 112 968144 41 35 13 | job12 666 57 968144 2326546 14369 14 | job13 698 32 968144 5072 2411 15 | job14 724 26 16526 0 34635 16 | job15 930 206 62032 110249 49133 17 | job16 1112 182 49006 41 37 18 | job17 1128 16 10274791099 13024975762 3600817163 19 | job18 1180 52 49006 57406 11871 20 | job19 1185 5 4776464612 2744661563 1084547 21 | job20 1187 2 4228 8456 127 22 | job21 1214 27 49006 50728 12181 23 | job22 1234 20 0 0 127 24 | job23 1262 28 23798 0 49034 25 | job24 1285 23 0 0 127 26 | job25 1420 135 23042116 44795750 71684305 27 | job26 1453 33 1083912 4761106 804562 28 | job27 1496 43 804435 7474262 804562 29 | job28 1525 29 160296 314626 136937 30 | job29 1665 140 136810 40 36 31 | job30 1669 4 136810 310392 19414 32 | job31 1700 31 1043799029 3220465724 2645916 33 | job32 1725 25 136810 817 699 34 | job33 1746 21 19859 0 41738 35 | job34 1856 110 11310 0 18722926301 36 | job35 1974 118 78 0 4162032 37 | job36 1975 1 106 0 271001141 38 | job37 1975 0 753 0 9178203796 39 | job38 1976 1 53416 0 85899343844 40 | job39 1977 1 9416 0 8826003635 41 | job40 1981 4 395 0 25803030649 42 | job41 2181 200 126 0 0 43 | job42 2228 47 359 0 2170182507 44 | job43 2281 53 69563889 70214203 20904794 45 | job44 2323 42 250 0 344553539 46 | job45 2647 324 584545 1263421 541810 47 | job46 2684 37 1212963 4926 32 48 | job47 2752 68 541683 41 36 49 | job48 2793 41 541683 1212479 11407 50 | job49 2826 33 541683 25536 3859 51 | -------------------------------------------------------------------------------- /workloadSuite/GenerateReplayScript.java: -------------------------------------------------------------------------------- 1 | import java.io.BufferedReader; 2 | import java.io.FileReader; 3 | import java.io.FileWriter; 4 | import java.io.File; 5 | import java.io.InputStreamReader; 6 | import java.util.HashMap; 7 | import java.util.ArrayList; 8 | import java.util.Arrays; 9 | import java.util.Date; 10 | import java.text.SimpleDateFormat; 11 | 12 | public class GenerateReplayScript { 13 | 14 | /* 15 | * Workload file format constants for field indices 16 | */ 17 | static final int INTER_JOB_SLEEP_TIME = 2; 18 | static final int INPUT_DATA_SIZE = 3; 19 | static final int SHUFFLE_DATA_SIZE = 4; 20 | static final int OUTPUT_DATA_SIZE = 5; 21 | 22 | /* 23 | * 24 | * Parses a tab separated file into an ArrayList> 25 | * 26 | */ 27 | public static long parseFileArrayList(String path, 28 | ArrayList> data 29 | ) throws Exception { 30 | 31 | long maxInput = 0; 32 | 33 | BufferedReader input = new BufferedReader(new FileReader(path)); 34 | String s; 35 | String[] array; 36 | int rowIndex = 0; 37 | int columnIndex = 0; 38 | while (true) { 39 | if (!input.ready()) break; 40 | s = input.readLine(); 41 | array = s.split("\t"); 42 | try { 43 | columnIndex = 0; 44 | while (columnIndex < array.length) { 45 | if (columnIndex == 0) { 46 | data.add(rowIndex,new ArrayList()); 47 | } 48 | String value = array[columnIndex]; 49 | data.get(rowIndex).add(value); 50 | 51 | if (Long.parseLong(array[INPUT_DATA_SIZE]) > maxInput) { 52 | maxInput = Long.parseLong(array[INPUT_DATA_SIZE]); 53 | } 54 | 55 | columnIndex++; 56 | } 57 | rowIndex++; 58 | } catch (Exception e) { 59 | 60 | } 61 | } 62 | 63 | return maxInput; 64 | 65 | } 66 | 67 | /* 68 | * 69 | * Prints the necessary shell scripts 70 | * 71 | */ 72 | public static void printOutput(ArrayList> workloadData, 73 | int clusterSizeRaw, 74 | int clusterSizeWorkload, 75 | int inputPartitionSize, 76 | int inputPartitionCount, 77 | String scriptDirPath, 78 | String hdfsInputDir, 79 | String hdfsOutputPrefix, 80 | long totalDataPerReduce, 81 | String workloadOutputDir, 82 | String hadoopCommand, 83 | String pathToWorkGenJar, 84 | String pathToWorkGenConf) throws Exception { 85 | 86 | 87 | if (workloadData.size() > 0) { 88 | 89 | long maxInput = 0; 90 | String toWrite = ""; 91 | 92 | FileWriter runAllJobs = new FileWriter(scriptDirPath + "/run-jobs-all.sh"); 93 | 94 | toWrite = "#!/bin/bash\n"; 95 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 96 | toWrite = "rm -r " + workloadOutputDir + "\n"; 97 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 98 | toWrite = "mkdir " + workloadOutputDir + "\n"; 99 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 100 | 101 | System.out.println(); 102 | System.out.println(workloadData.size() + " jobs in the workload."); 103 | System.out.println("Generating scripts ... please wait ... "); 104 | System.out.println(); 105 | 106 | int written = 0; 107 | 108 | for (int i=0; i maxInput) maxInput = input; 125 | if (input < maxSeqFile(67108864)) input = maxSeqFile(67108864); // 64 MB minimum size 126 | 127 | if (shuffle < 1024 ) shuffle = 1024 ; 128 | if (output < 1024 ) output = 1024 ; 129 | 130 | ArrayList inputPartitionSamples = new ArrayList(); 131 | long inputCopy = input; 132 | java.util.Random rng = new java.util.Random(); 133 | int tryPartitionSample = rng.nextInt(inputPartitionCount); 134 | while (inputCopy > 0) { 135 | boolean alreadySampled = true; 136 | while (alreadySampled) { 137 | if (inputPartitionSamples.size()>=inputPartitionCount) { 138 | System.err.println(); 139 | System.err.println("ERROR!"); 140 | System.err.println("Not enough partitions for input size of " + input + " bytes."); 141 | System.err.println("Happened on job number " + i + "."); 142 | System.err.println("Input partition size is " + inputPartitionSize + " bytes."); 143 | System.err.println("Number of partitions is " + inputPartitionCount + "."); 144 | System.err.println("Total data size is " + (((long) inputPartitionSize) * ((long) inputPartitionCount)) + " bytes < " + input + " bytes."); 145 | System.err.println("Need to generate a larger input data set."); 146 | System.err.println(); 147 | throw new Exception("Input data set not large enough. Need to generate a larger data set."); 148 | // if exception thrown here, input set not large enough - generate bigger input set 149 | } 150 | alreadySampled = false; 151 | } 152 | inputPartitionSamples.add(new Integer(tryPartitionSample)); 153 | tryPartitionSample = (tryPartitionSample + 1) % inputPartitionCount; 154 | inputCopy -= inputPartitionSize; 155 | } 156 | 157 | FileWriter inputPathFile = new FileWriter(scriptDirPath + "/inputPath-job-" + i + ".txt"); 158 | String inputPath = ""; 159 | for (int j=0; j 0) { 179 | numReduces = Math.round((shuffle + output) / ((double) totalDataPerReduce)); 180 | if (numReduces < 1) numReduces = 1; 181 | if (numReduces > clusterSizeWorkload) numReduces = clusterSizeWorkload / 5; 182 | toWrite = 183 | "" + hadoopCommand + " jar " + pathToWorkGenJar + " org.apache.hadoop.examples.WorkGen -conf " + pathToWorkGenConf + " " + 184 | "-r " + numReduces + " " + inputPath + " " + outputPath + " " + SIRatio + " " + OSRatio + 185 | " >> " + workloadOutputDir + "/job-" + i + ".txt 2>> " + workloadOutputDir + "/job-" + i + ".txt \n"; 186 | } else { 187 | toWrite = 188 | "" + hadoopCommand + " jar " + pathToWorkGenJar + " org.apache.hadoop.examples.WorkGen -conf " + pathToWorkGenConf + " " + 189 | inputPath + " " + outputPath + " " + SIRatio + " " + OSRatio + 190 | " >> " + workloadOutputDir + "/job-" + i + ".txt 2>> " + workloadOutputDir + "/job-" + i + ".txt \n"; 191 | } 192 | 193 | FileWriter runFile = new FileWriter(scriptDirPath + "/run-job-" + i + ".sh"); 194 | runFile.write(toWrite.toCharArray(), 0, toWrite.length()); 195 | toWrite = "" + hadoopCommand + " dfs -rmr " + outputPath + "\n"; 196 | runFile.write(toWrite.toCharArray(), 0, toWrite.length()); 197 | toWrite = "# inputSize " + input + "\n"; 198 | runFile.write(toWrite.toCharArray(), 0, toWrite.length()); 199 | 200 | runFile.close(); 201 | 202 | // works for linux type systems only 203 | Runtime.getRuntime().exec("chmod +x " + scriptDirPath + "/run-job-" + i + ".sh"); 204 | 205 | toWrite = "./run-job-" + i + ".sh &\n"; 206 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 207 | 208 | 209 | toWrite = "sleep " + sleep + "\n"; 210 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 211 | written++; 212 | 213 | 214 | } 215 | 216 | System.out.println(written + " jobs written ... done."); 217 | System.out.println(); 218 | 219 | toWrite = "# max input " + maxInput + "\n"; 220 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 221 | toWrite = "# inputPartitionSize " + inputPartitionSize + "\n"; 222 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 223 | toWrite = "# inputPartitionCount " + inputPartitionCount + "\n"; 224 | runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length()); 225 | 226 | runAllJobs.close(); 227 | 228 | // works for linux type systems only 229 | Runtime.getRuntime().exec("chmod +x " + scriptDirPath + "/run-jobs-all.sh"); 230 | 231 | } 232 | 233 | } 234 | 235 | /* 236 | * 237 | * Computes the size of a SequenceFile with the given number 238 | * of records. We assume the following 96 byte header: 239 | * 240 | 4 bytes (magic header prefix) 241 | ... key class name: 35 bytes for "org.apache.hadoop.io.BytesWritable" (34 characters + one-byte length) 242 | ... value class name: 35 bytes for "org.apache.hadoop.io.BytesWritable" 243 | 1 byte boolean (is each record value compressed?) 244 | 1 byte boolean (is the file block compressed?) 245 | bytes for metadata: in our case, there is no metadata, and we get 4 bytes of zeros 246 | 16 bytes of sync 247 | * 248 | * The SequenceFile writer places a periodic marker after writing a 249 | * minimum of 2000 bytes; the marker also falls at a record boundary. 250 | * Therefore, unless the serialized record size is a factor of 2000, more 251 | * than 2000 bytes will be written between markers. In the code below, we 252 | * refer to this distance as the "markerSpacing". 253 | * 254 | * The SequenceFile writer can be found in: 255 | * hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java 256 | * 257 | * There are informative constants at the top of the SequenceFile class, 258 | * and the heart of the writer is the append() method of the Writer class. 259 | * 260 | */ 261 | 262 | static final int SeqFileHeaderSize = 96; 263 | static final int SeqFileRecordSizeUsable = 100; // max_key + max_value 264 | static final int SeqFileRecordSizeSerialized = 116; // usable + 4 ints 265 | static final int SeqFileMarkerSize = 20; 266 | static final double SeqFileMarkerMinSpacing = 2000.0; 267 | 268 | private static int seqFileSize(int numRecords) { 269 | int totalSize = SeqFileHeaderSize; 270 | 271 | int recordTotal = numRecords * SeqFileRecordSizeSerialized; 272 | totalSize += recordTotal; 273 | 274 | int numRecordsBetweenMarkers = (int) Math.ceil(SeqFileMarkerMinSpacing / (SeqFileRecordSizeSerialized * 1.0)); 275 | int markerSpacing = numRecordsBetweenMarkers * SeqFileRecordSizeSerialized; 276 | int numMarkers = (int) Math.floor((totalSize * 1.0) / (markerSpacing * 1.0)); 277 | 278 | totalSize += numMarkers * SeqFileMarkerSize; 279 | 280 | return totalSize; 281 | } 282 | 283 | /* 284 | * 285 | * Computes the amount of data a SequenceFile would hold in 286 | * an HDFS block of the given size. First, we estimate the number 287 | * of records which will fit by inverting seqFileSize(), then we 288 | * decrease until we fit within the block. 289 | * 290 | * To compute the inverse, we start with a simplified form of the equation 291 | * computed by seqFileSize(), using X for the number of records: 292 | * 293 | * totalSize = 294 | * header + X * serialized 295 | * + markerSize * (header + X * serialized) / markerSpacing 296 | * 297 | * using some algebra: 298 | * 299 | * (totalSize - header) * markerSpacing 300 | * 301 | * = X * serialized * markerSpacing + markerSize * (header + X * serialized) 302 | * 303 | * 304 | * (totalSize - header) * markerSpacing - markerSize * header 305 | * 306 | * = X * serialized * markerSpacing + markerSize * X * serialized 307 | * 308 | * = (markerSpacing + markerSize) * X * serialized 309 | * 310 | * We now have a Right-Hand Side which looks easy to deal with! 311 | * 312 | * Focusing on the Left-Hand Side, we'd like to avoid multiplying 313 | * (totalSize - header) * markerSpacing as it may be a very large number. 314 | * We re-write as follows: 315 | * 316 | * (totalSize - header) * markerSpacing - markerSize * header = 317 | * (totalSize - header - markerSize * header / markerSpacing) * markerSpacing 318 | * 319 | */ 320 | 321 | public static int maxSeqFile(int blockSize) { 322 | 323 | // First, compute some values we will need. Same as in seqFileSize() 324 | int numRecordsBetweenMarkers = (int) Math.ceil(SeqFileMarkerMinSpacing / (SeqFileRecordSizeSerialized * 1.0)); 325 | double markerSpacing = numRecordsBetweenMarkers * SeqFileRecordSizeSerialized * 1.0; 326 | 327 | // Calculate the Left-Hand Side we wrote in the comment above 328 | double est = blockSize - SeqFileHeaderSize - (SeqFileMarkerSize * SeqFileHeaderSize * 1.0) / markerSpacing; 329 | est *= markerSpacing; 330 | 331 | // Now, divide the constants from the Right-Hand Side we found above 332 | est /= (markerSpacing + SeqFileMarkerSize * 1.0); 333 | est /= (SeqFileRecordSizeSerialized * 1.0); 334 | 335 | // Can't have a fractional number of records! 336 | int numRecords = (int) Math.ceil(est); 337 | 338 | // Check if we over-estimated 339 | while (seqFileSize(numRecords) > blockSize) { 340 | numRecords--; 341 | } 342 | 343 | return (numRecords * SeqFileRecordSizeUsable); 344 | } 345 | 346 | /* 347 | * 348 | * Read in command line arguments etc. 349 | * 350 | */ 351 | public static void main(String args[]) throws Exception { 352 | 353 | if (args.length < 10) { 354 | 355 | System.out.println(); 356 | System.out.println("Insufficient arguments."); 357 | System.out.println(); 358 | System.out.println("Usage: "); 359 | System.out.println(); 360 | System.out.println("java GenerateReplayScript"); 361 | System.out.println(" [path to file with workload info]"); 362 | System.out.println(" [number of machines in the original production cluster]"); 363 | System.out.println(" [number of machines in the cluster on which the workload will be run]"); 364 | System.out.println(" [HDFS block size]"); 365 | System.out.println(" [number of input partitions]"); 366 | System.out.println(" [output directory for the scripts]"); 367 | System.out.println(" [HDFS directory for the input data]"); 368 | System.out.println(" [amount of data per reduce task in byptes]"); 369 | System.out.println(" [directory for the workload output files]"); 370 | System.out.println(" [hadoop command on your system]"); 371 | System.out.println(" [path to WorkGen.jar]"); 372 | System.out.println(" [path to workGenKeyValue_conf.xsl]"); 373 | System.out.println(); 374 | 375 | } else { 376 | 377 | // variables 378 | 379 | ArrayList> workloadData = new ArrayList>(); 380 | 381 | // read command line arguments 382 | 383 | String fileWorkloadPath = args[0]; 384 | 385 | int clusterSizeRaw = Integer.parseInt(args[1]); 386 | int clusterSizeWorkload = Integer.parseInt(args[2]); 387 | int hdfsBlockSize = Integer.parseInt(args[3]); 388 | int inputPartitionCount = Integer.parseInt(args[4]); 389 | String scriptDirPath = args[5]; 390 | String hdfsInputDir = args[6]; 391 | String hdfsOutputPrefix = args[7]; 392 | long totalDataPerReduce = Long.parseLong(args[8]); 393 | String workloadOutputDir = args[9]; 394 | String hadoopCommand = args[10]; 395 | String pathToWorkGenJar = args[11]; 396 | String pathToWorkGenConf = args[12]; 397 | 398 | // parse data 399 | 400 | long maxInput = parseFileArrayList(fileWorkloadPath, workloadData); 401 | 402 | // check if maxInput fits within input data size to be generated 403 | 404 | long maxInputNeeded = maxInput * clusterSizeWorkload / clusterSizeRaw; 405 | 406 | int inputPartitionSize = maxSeqFile(hdfsBlockSize); 407 | long totalInput = ((long) inputPartitionSize) * ((long) inputPartitionCount); 408 | 409 | if (maxInputNeeded > totalInput) { 410 | 411 | System.err.println(); 412 | System.err.println("ERROR!"); 413 | System.err.println("Not enough partitions for max needed input size of " + maxInputNeeded + " bytes."); 414 | System.err.println("HDFS block size is " + hdfsBlockSize + " bytes."); 415 | System.err.println("Input partition size is " + inputPartitionSize + " bytes."); 416 | System.err.println("Number of partitions is " + inputPartitionCount + "."); 417 | System.err.println("Total actual input data size is " + totalInput + " bytes < " + maxInputNeeded + " bytes."); 418 | System.err.println("Need to generate a larger input data set."); 419 | System.err.println(); 420 | 421 | throw new Exception("Input data set not large enough. Need to generate a larger data set."); 422 | } else { 423 | 424 | System.err.println(); 425 | System.err.println("Max needed input size " + maxInputNeeded + " bytes."); 426 | System.err.println("Actual input size is " + totalInput + " bytes >= " + maxInputNeeded + " bytes."); 427 | System.err.println("All is good."); 428 | System.err.println(); 429 | } 430 | 431 | // make scriptDirPath directory if it doesn't exist 432 | 433 | File d = new File(scriptDirPath); 434 | if (d.exists()) { 435 | if (d.isDirectory()) { 436 | System.err.println("Warning! About to overwrite existing scripts in: " + scriptDirPath); 437 | System.err.print("Ok to continue? [y/n] "); 438 | BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); 439 | String s = in.readLine(); 440 | if (s == null || s.length() < 1 || s.toLowerCase().charAt(0) != 'y') { 441 | throw new Exception("Declined overwrite of existing directory"); 442 | } 443 | } else { 444 | throw new Exception(scriptDirPath + " is a file."); 445 | } 446 | } else { 447 | d.mkdirs(); 448 | } 449 | 450 | // print shell scripts 451 | 452 | printOutput(workloadData, clusterSizeRaw, clusterSizeWorkload, 453 | inputPartitionSize, inputPartitionCount, scriptDirPath, hdfsInputDir, hdfsOutputPrefix, 454 | totalDataPerReduce, workloadOutputDir, hadoopCommand, pathToWorkGenJar, pathToWorkGenConf); 455 | 456 | 457 | System.out.println("Parameter values for randomwriter_conf.xsl:"); 458 | System.out.println("test.randomwrite.total_bytes: " + totalInput); 459 | System.out.println("test.randomwrite.bytes_per_map: " + inputPartitionSize); 460 | } 461 | 462 | 463 | } 464 | } 465 | 466 | -------------------------------------------------------------------------------- /workloadSuite/HDFSWrite.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.hadoop.examples; 20 | 21 | import java.io.IOException; 22 | import java.util.Date; 23 | import java.util.Random; 24 | 25 | import org.apache.hadoop.conf.Configuration; 26 | import org.apache.hadoop.conf.Configured; 27 | import org.apache.hadoop.fs.Path; 28 | import org.apache.hadoop.io.BytesWritable; 29 | import org.apache.hadoop.io.Text; 30 | import org.apache.hadoop.io.Writable; 31 | import org.apache.hadoop.io.WritableComparable; 32 | import org.apache.hadoop.mapred.ClusterStatus; 33 | import org.apache.hadoop.mapred.FileOutputFormat; 34 | import org.apache.hadoop.mapred.FileSplit; 35 | import org.apache.hadoop.mapred.InputFormat; 36 | import org.apache.hadoop.mapred.InputSplit; 37 | import org.apache.hadoop.mapred.JobClient; 38 | import org.apache.hadoop.mapred.JobConf; 39 | import org.apache.hadoop.mapred.MapReduceBase; 40 | import org.apache.hadoop.mapred.Mapper; 41 | import org.apache.hadoop.mapred.OutputCollector; 42 | import org.apache.hadoop.mapred.RecordReader; 43 | import org.apache.hadoop.mapred.Reporter; 44 | import org.apache.hadoop.mapred.SequenceFileOutputFormat; 45 | import org.apache.hadoop.mapred.lib.IdentityReducer; 46 | import org.apache.hadoop.util.GenericOptionsParser; 47 | import org.apache.hadoop.util.Tool; 48 | import org.apache.hadoop.util.ToolRunner; 49 | 50 | /** 51 | * This program uses map/reduce to just run a distributed job where there is 52 | * no interaction between the tasks and each task write a large unsorted 53 | * random binary sequence file of BytesWritable. 54 | * In order for this program to generate data for terasort with 10-byte keys 55 | * and 90-byte values, have the following config: 56 | * 57 | * <?xml version="1.0"?> 58 | * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> 59 | * <configuration> 60 | * <property> 61 | * <name>test.randomwrite.min_key</name> 62 | * <value>10</value> 63 | * </property> 64 | * <property> 65 | * <name>test.randomwrite.max_key</name> 66 | * <value>10</value> 67 | * </property> 68 | * <property> 69 | * <name>test.randomwrite.min_value</name> 70 | * <value>90</value> 71 | * </property> 72 | * <property> 73 | * <name>test.randomwrite.max_value</name> 74 | * <value>90</value> 75 | * </property> 76 | * <property> 77 | * <name>test.randomwrite.total_bytes</name> 78 | * <value>1099511627776</value> 79 | * </property> 80 | * </configuration> 81 | * 82 | * Equivalently, {@link RandomWriter} also supports all the above options 83 | * and ones supported by {@link GenericOptionsParser} via the command-line. 84 | */ 85 | public class HDFSWrite extends Configured implements Tool { 86 | 87 | /** 88 | * User counters 89 | */ 90 | static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN } 91 | 92 | /** 93 | * A custom input format that creates virtual inputs of a single string 94 | * for each map. 95 | */ 96 | static class RandomInputFormat implements InputFormat { 97 | 98 | /** Accept all job confs */ 99 | public void validateInput(JobConf job) throws IOException { 100 | } 101 | 102 | /** 103 | * Generate the requested number of file splits, with the filename 104 | * set to the filename of the output file. 105 | */ 106 | public InputSplit[] getSplits(JobConf job, 107 | int numSplits) throws IOException { 108 | InputSplit[] result = new InputSplit[numSplits]; 109 | Path outDir = FileOutputFormat.getOutputPath(job); 110 | for(int i=0; i < result.length; ++i) { 111 | result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 112 | (String[])null); 113 | } 114 | return result; 115 | } 116 | 117 | /** 118 | * Return a single record (filename, "") where the filename is taken from 119 | * the file split. 120 | */ 121 | static class RandomRecordReader implements RecordReader { 122 | Path name; 123 | public RandomRecordReader(Path p) { 124 | name = p; 125 | } 126 | public boolean next(Text key, Text value) { 127 | if (name != null) { 128 | key.set(name.getName()); 129 | name = null; 130 | return true; 131 | } 132 | return false; 133 | } 134 | public Text createKey() { 135 | return new Text(); 136 | } 137 | public Text createValue() { 138 | return new Text(); 139 | } 140 | public long getPos() { 141 | return 0; 142 | } 143 | public void close() {} 144 | public float getProgress() { 145 | return 0.0f; 146 | } 147 | } 148 | 149 | public RecordReader getRecordReader(InputSplit split, 150 | JobConf job, 151 | Reporter reporter) throws IOException { 152 | return new RandomRecordReader(((FileSplit) split).getPath()); 153 | } 154 | } 155 | 156 | static class Map extends MapReduceBase 157 | implements Mapper { 159 | 160 | private long numBytesToWrite; 161 | private int minKeySize; 162 | private int keySizeRange; 163 | private int minValueSize; 164 | private int valueSizeRange; 165 | private Random random = new Random(); 166 | private BytesWritable randomKey = new BytesWritable(); 167 | private BytesWritable randomValue = new BytesWritable(); 168 | 169 | private void randomizeBytes(byte[] data, int offset, int length) { 170 | for(int i=offset + length - 1; i >= offset; --i) { 171 | data[i] = (byte) random.nextInt(256); 172 | } 173 | } 174 | 175 | /** 176 | * Given an output filename, write a bunch of random records to it. 177 | */ 178 | public void map(WritableComparable key, 179 | Writable value, 180 | OutputCollector output, 181 | Reporter reporter) throws IOException { 182 | int itemCount = 0; 183 | while (numBytesToWrite > 0) { 184 | int keyLength = minKeySize + 185 | (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0); 186 | randomKey.setSize(keyLength); 187 | randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength()); 188 | int valueLength = minValueSize + 189 | (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0); 190 | randomValue.setSize(valueLength); 191 | randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength()); 192 | output.collect(randomKey, randomValue); 193 | numBytesToWrite -= keyLength + valueLength; 194 | reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength); 195 | reporter.incrCounter(Counters.RECORDS_WRITTEN, 1); 196 | if (++itemCount % 200 == 0) { 197 | reporter.setStatus("wrote record " + itemCount + ". " + 198 | numBytesToWrite + " bytes left."); 199 | } 200 | } 201 | reporter.setStatus("done with " + itemCount + " records."); 202 | } 203 | 204 | /** 205 | * Save the values out of the configuaration that we need to write 206 | * the data. 207 | */ 208 | @Override 209 | public void configure(JobConf job) { 210 | numBytesToWrite = job.getLong("test.randomwrite.bytes_per_map", 211 | 1*1024*1024*1024); 212 | minKeySize = job.getInt("test.randomwrite.min_key", 10); 213 | keySizeRange = 214 | job.getInt("test.randomwrite.max_key", 1000) - minKeySize; 215 | minValueSize = job.getInt("test.randomwrite.min_value", 0); 216 | valueSizeRange = 217 | job.getInt("test.randomwrite.max_value", 20000) - minValueSize; 218 | } 219 | 220 | } 221 | 222 | /** 223 | * This is the main routine for launching a distributed random write job. 224 | * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. 225 | * The reduce doesn't do anything. 226 | * 227 | * @throws IOException 228 | */ 229 | public int run(String[] args) throws Exception { 230 | if (args.length == 0) { 231 | System.out.println("Usage: writer "); 232 | ToolRunner.printGenericCommandUsage(System.out); 233 | return -1; 234 | } 235 | 236 | Path outDir = new Path(args[0]); 237 | JobConf job = new JobConf(getConf()); 238 | 239 | job.setJarByClass(HDFSWrite.class); 240 | job.setJobName("hdfsWrite"); 241 | FileOutputFormat.setOutputPath(job, outDir); 242 | 243 | job.setOutputKeyClass(BytesWritable.class); 244 | job.setOutputValueClass(BytesWritable.class); 245 | 246 | job.setInputFormat(RandomInputFormat.class); 247 | job.setMapperClass(Map.class); 248 | job.setReducerClass(IdentityReducer.class); 249 | job.setOutputFormat(SequenceFileOutputFormat.class); 250 | 251 | JobClient client = new JobClient(job); 252 | ClusterStatus cluster = client.getClusterStatus(); 253 | int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); 254 | long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 255 | 1*1024*1024*1024); 256 | if (numBytesToWritePerMap == 0) { 257 | System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); 258 | return -2; 259 | } 260 | long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 261 | numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); 262 | /* 263 | int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); 264 | if (numMaps == 0 && totalBytesToWrite > 0) { 265 | numMaps = 1; 266 | job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); 267 | } 268 | */ 269 | 270 | int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); 271 | job.setNumMapTasks(numMaps); 272 | 273 | System.out.println("client.getClusterStatus().getMaxMapTasks() gives " + cluster.getMaxMapTasks()); 274 | System.out.println("client.getClusterStatus().getMaxReduceTasks() gives " + cluster.getMaxReduceTasks()); 275 | 276 | System.out.println("Running on " + 277 | cluster.getTaskTrackers() + " nodes with " + 278 | numMaps + " maps, \n" + 279 | "writing " + totalBytesToWrite + " bytes with " + 280 | numBytesToWritePerMap + " bytes per map."); 281 | 282 | // reducer NONE 283 | job.setNumReduceTasks(0); 284 | 285 | Date startTime = new Date(); 286 | System.out.println("Job started: " + startTime); 287 | JobClient.runJob(job); 288 | Date endTime = new Date(); 289 | System.out.println("Job ended: " + endTime); 290 | System.out.println("The job took " + 291 | (endTime.getTime() - startTime.getTime()) /1000 + 292 | " seconds."); 293 | 294 | return 0; 295 | } 296 | 297 | public static void main(String[] args) throws Exception { 298 | int res = ToolRunner.run(new Configuration(), new HDFSWrite(), args); 299 | System.exit(res); 300 | } 301 | 302 | } 303 | -------------------------------------------------------------------------------- /workloadSuite/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2011, Regents of the University of California. 3 | All rights reserved. 4 | 5 | HDFSWrite.java and WorkGen.java are "Derivative Works" of Apache 6 | Hadoop version 0.20.2, and are governed by the Apache License 2.0, 7 | found at http://www.apache.org/licenses/LICENSE-2.0. 8 | 9 | The remainder of this project is governed by the "New BSD License" 10 | below. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions 14 | are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | * Redistributions in binary form must reproduce the above copyright 19 | notice, this list of conditions and the following disclaimer in the 20 | documentation and/or other materials provided with the distribution. 21 | * Neither the name of the University of California, Berkeley 22 | nor the names of its contributors may be used to endorse or promote 23 | products derived from this software without specific prior written 24 | permission. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 29 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 30 | COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 33 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 34 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 35 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 37 | OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | -------------------------------------------------------------------------------- /workloadSuite/README: -------------------------------------------------------------------------------- 1 | 2 | To reduce confusion, we have consolidated the SWIM git hub 3 | wiki as the sole documentation source. 4 | 5 | Full documentation see https://github.com/SWIMProjectUCB/SWIM/wiki. 6 | 7 | -------------------------------------------------------------------------------- /workloadSuite/WorkGen.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.hadoop.examples; 20 | 21 | import java.io.IOException; 22 | import java.util.*; 23 | 24 | import org.apache.hadoop.conf.Configuration; 25 | import org.apache.hadoop.conf.Configured; 26 | import org.apache.hadoop.fs.Path; 27 | import org.apache.hadoop.io.BytesWritable; 28 | import org.apache.hadoop.io.Writable; 29 | import org.apache.hadoop.io.WritableComparable; 30 | import org.apache.hadoop.mapred.*; 31 | import org.apache.hadoop.mapred.lib.IdentityMapper; 32 | import org.apache.hadoop.mapred.lib.IdentityReducer; 33 | import org.apache.hadoop.util.Tool; 34 | import org.apache.hadoop.util.ToolRunner; 35 | import java.io.BufferedReader; 36 | import java.io.FileReader; 37 | 38 | 39 | /** 40 | * Comments here 41 | */ 42 | public class WorkGen extends Configured implements Tool { 43 | 44 | static int printUsage() { 45 | System.out.println("sort [-m ] [-r has no effect] " + 46 | "[-inFormat ] " + 47 | "[-outFormat ] " + 48 | "[-outKey ] " + 49 | "[-outValue ] " + 50 | " "); 51 | ToolRunner.printGenericCommandUsage(System.out); 52 | return -1; 53 | } 54 | 55 | /** 56 | * User counters 57 | */ 58 | static enum Counters { MAP_RECORDS_WRITTEN, MAP_BYTES_WRITTEN, RED_RECORDS_WRITTEN, RED_BYTES_WRITTEN }; 59 | 60 | 61 | 62 | /** 63 | * Comments 64 | */ 65 | static class RatioMapper extends MapReduceBase implements Mapper { 66 | 67 | private double shuffleInputRatio = 1.0d; 68 | 69 | private int minKeySize; 70 | private int keySizeRange; 71 | private int minValueSize; 72 | private int valueSizeRange; 73 | private Random random = new Random(); 74 | private BytesWritable randomKey; 75 | private BytesWritable randomValue; 76 | 77 | private void randomizeBytes(byte[] data, int offset, int length) { 78 | for(int i=offset + length - 1; i >= offset; --i) { 79 | data[i] = (byte) random.nextInt(256); 80 | } 81 | } 82 | 83 | /** Input key/val pair is swallowed up, no action is taken */ 84 | public void map(WritableComparable key, Writable val, OutputCollector output, Reporter reporter) throws IOException { 85 | 86 | double shuffleInputRatioTemp = shuffleInputRatio; 87 | 88 | // output floor(shuffleInputRatio) number of intermediate pairs 89 | while (shuffleInputRatioTemp >= 0.0d) { 90 | int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0); 91 | randomKey = new BytesWritable(); 92 | randomKey.setSize(keyLength); 93 | randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength()); 94 | int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0); 95 | randomValue = new BytesWritable(); 96 | randomValue.setSize(valueLength); 97 | randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength()); 98 | if (shuffleInputRatioTemp >= 1.0d || (random.nextDouble() < shuffleInputRatioTemp)) { 99 | output.collect(randomKey, randomValue); 100 | reporter.incrCounter(Counters.MAP_BYTES_WRITTEN, keyLength + valueLength); 101 | reporter.incrCounter(Counters.MAP_RECORDS_WRITTEN, 1); 102 | } 103 | shuffleInputRatioTemp -= 1.0d; 104 | } // end while 105 | 106 | } // end map() 107 | 108 | @Override 109 | public void configure(JobConf job) { 110 | shuffleInputRatio = Double.parseDouble(job.getRaw("workGen.ratios.shuffleInputRatio")); 111 | minKeySize = job.getInt("workGen.randomwrite.min_key", 10); 112 | keySizeRange = job.getInt("workGen.randomwrite.max_key", 1000) - minKeySize; 113 | minValueSize = job.getInt("workGen.randomwrite.min_value", 0); 114 | valueSizeRange = job.getInt("workGen.randomwrite.max_value", 20000) - minValueSize; 115 | } 116 | 117 | } // end static class RatioMapper 118 | 119 | /** 120 | * Comments 121 | */ 122 | static class RatioReducer extends MapReduceBase implements Reducer { 123 | 124 | private double outputShuffleRatio = 1.0d; 125 | 126 | private int minKeySize; 127 | private int keySizeRange; 128 | private int minValueSize; 129 | private int valueSizeRange; 130 | private Random random = new Random(); 131 | private BytesWritable randomKey; 132 | private BytesWritable randomValue; 133 | 134 | private void randomizeBytes(byte[] data, int offset, int length) { 135 | for(int i=offset + length - 1; i >= offset; --i) { 136 | data[i] = (byte) random.nextInt(256); 137 | } 138 | } 139 | 140 | public void reduce(WritableComparable key, Iterator values, 141 | OutputCollector output, 142 | Reporter reporter) 143 | throws IOException { 144 | 145 | while (values.hasNext()) { 146 | Writable value = values.next(); 147 | 148 | double outputShuffleRatioTemp = outputShuffleRatio; 149 | 150 | // output floor(outputShuffleRatio) number of intermediate pairs 151 | while (outputShuffleRatioTemp >= 0.0d) { 152 | int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0); 153 | randomKey = new BytesWritable(); 154 | randomKey.setSize(keyLength); 155 | randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength()); 156 | int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0); 157 | randomValue = new BytesWritable(); 158 | randomValue.setSize(valueLength); 159 | randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength()); 160 | if (outputShuffleRatioTemp >= 1.0d || (random.nextDouble() < outputShuffleRatioTemp)) { 161 | output.collect(randomKey, randomValue); 162 | reporter.incrCounter(Counters.RED_BYTES_WRITTEN, keyLength + valueLength); 163 | reporter.incrCounter(Counters.RED_RECORDS_WRITTEN, 1); 164 | } 165 | outputShuffleRatioTemp -= 1.0d; 166 | } // end while 167 | } 168 | } 169 | 170 | @Override 171 | public void configure(JobConf job) { 172 | outputShuffleRatio = Double.parseDouble(job.getRaw("workGen.ratios.outputShuffleRatio")); 173 | minKeySize = job.getInt("workGen.randomwrite.min_key", 10); 174 | keySizeRange = job.getInt("workGen.randomwrite.max_key", 10) - minKeySize; 175 | minValueSize = job.getInt("workGen.randomwrite.min_value", 90); 176 | valueSizeRange = job.getInt("workGen.randomwrite.max_value", 90) - minValueSize; 177 | } 178 | 179 | } 180 | 181 | 182 | /** 183 | * The main driver for the program. 184 | * Invoke this method to submit the map/reduce job. 185 | * @throws IOException When there is communication problems with the 186 | * job tracker. 187 | */ 188 | public int run(String[] args) throws Exception { 189 | 190 | JobConf jobConf = new JobConf(getConf(), WorkGen.class); 191 | jobConf.setJobName("workGen"); 192 | 193 | jobConf.setMapperClass(RatioMapper.class); 194 | jobConf.setReducerClass(RatioReducer.class); 195 | 196 | JobClient client = new JobClient(jobConf); 197 | ClusterStatus cluster = client.getClusterStatus(); 198 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.45); 199 | int num_maps = (int) (cluster.getMaxMapTasks() * 0.9); 200 | String sort_reduces = jobConf.get("workGen.sort.reduces_per_host"); 201 | if (sort_reduces != null) { 202 | num_reduces = cluster.getTaskTrackers() * 203 | Integer.parseInt(sort_reduces); 204 | } 205 | Class inputFormatClass = 206 | SequenceFileInputFormat.class; 207 | Class outputFormatClass = 208 | SequenceFileOutputFormat.class; 209 | Class outputKeyClass = BytesWritable.class; 210 | Class outputValueClass = BytesWritable.class; 211 | List otherArgs = new ArrayList(); 212 | for(int i=0; i < args.length; ++i) { 213 | try { 214 | if ("-m".equals(args[i])) { 215 | num_maps = Integer.parseInt(args[++i]); 216 | } else if ("-r".equals(args[i])) { 217 | num_reduces = Integer.parseInt(args[++i]); 218 | } else if ("-inFormat".equals(args[i])) { 219 | inputFormatClass = 220 | Class.forName(args[++i]).asSubclass(InputFormat.class); 221 | } else if ("-outFormat".equals(args[i])) { 222 | outputFormatClass = 223 | Class.forName(args[++i]).asSubclass(OutputFormat.class); 224 | } else if ("-outKey".equals(args[i])) { 225 | outputKeyClass = 226 | Class.forName(args[++i]).asSubclass(WritableComparable.class); 227 | } else if ("-outValue".equals(args[i])) { 228 | outputValueClass = 229 | Class.forName(args[++i]).asSubclass(Writable.class); 230 | } else { 231 | otherArgs.add(args[i]); 232 | } 233 | } catch (NumberFormatException except) { 234 | System.out.println("ERROR: Integer expected instead of " + args[i]); 235 | return printUsage(); 236 | } catch (ArrayIndexOutOfBoundsException except) { 237 | System.out.println("ERROR: Required parameter missing from " + 238 | args[i-1]); 239 | return printUsage(); // exits 240 | } 241 | } 242 | 243 | // Set user-supplied (possibly default) job configs 244 | jobConf.setNumReduceTasks(num_reduces); 245 | 246 | jobConf.setInputFormat(inputFormatClass); 247 | jobConf.setOutputFormat(outputFormatClass); 248 | 249 | jobConf.setOutputKeyClass(outputKeyClass); 250 | jobConf.setOutputValueClass(outputValueClass); 251 | 252 | // Make sure there are exactly 4 parameters left. 253 | if (otherArgs.size() != 4) { 254 | System.out.println("ERROR: Wrong number of parameters: " + 255 | otherArgs.size() + " instead of 4."); 256 | return printUsage(); 257 | } 258 | BufferedReader input = new BufferedReader(new FileReader(otherArgs.get(0))); 259 | String inputPaths = input.readLine(); 260 | FileInputFormat.setInputPaths(jobConf, inputPaths); 261 | FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); 262 | jobConf.set("workGen.ratios.shuffleInputRatio", otherArgs.get(2)); 263 | jobConf.set("workGen.ratios.outputShuffleRatio", otherArgs.get(3)); 264 | 265 | System.out.println("Max number of map tasks " + cluster.getMaxMapTasks()); 266 | System.out.println("Max number of red tasks " + cluster.getMaxReduceTasks()); 267 | System.out.println("shuffleInputRatio = " + Double.parseDouble(jobConf.getRaw("workGen.ratios.shuffleInputRatio"))); 268 | System.out.println("outputShuffleRatio = " + Double.parseDouble(jobConf.getRaw("workGen.ratios.outputShuffleRatio"))); 269 | 270 | System.out.println("Running on " + 271 | cluster.getTaskTrackers() + " nodes with " + 272 | num_maps + " maps and " + 273 | num_reduces + " reduces."); 274 | Date startTime = new Date(); 275 | Random random = new Random(); 276 | System.out.println(random.nextDouble()); 277 | System.out.println(random.nextDouble()); 278 | System.out.println("Job started: " + startTime); 279 | JobClient.runJob(jobConf); 280 | Date end_time = new Date(); 281 | System.out.println("Job ended: " + end_time); 282 | System.out.println("The job took " + 283 | (end_time.getTime() - startTime.getTime()) /1000 + " seconds."); 284 | return 0; 285 | } 286 | 287 | 288 | 289 | public static void main(String[] args) throws Exception { 290 | int res = ToolRunner.run(new Configuration(), new WorkGen(), args); 291 | System.exit(res); 292 | } 293 | 294 | } 295 | -------------------------------------------------------------------------------- /workloadSuite/WorkloadSynthesis.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use Getopt::Long; 5 | use Date::Parse; 6 | use POSIX; 7 | 8 | my $inPath = undef; 9 | my $outPrefix = undef; 10 | my $repeats = undef; 11 | my $samples = undef; 12 | my $length = undef; 13 | 14 | my $traceStart = undef; 15 | my $traceEnd = undef; 16 | 17 | my $cmdline_result = GetOptions("inPath=s" => \$inPath, 18 | "outPrefix=s" => \$outPrefix, 19 | "repeats=i" => \$repeats, 20 | "samples=i" => \$samples, 21 | "length=i" => \$length, 22 | "traceStart=i" => \$traceStart, 23 | "traceEnd=i" => \$traceEnd ); 24 | 25 | my @all_data = (); 26 | 27 | close(INPUT_FILE); 28 | open(INPUT_FILE, "< $inPath") or die "Cannot open $inPath"; 29 | 30 | my $dataSize = 0; 31 | while () { 32 | 33 | #if (($dataSize % 100000) == 0) { print "read line \#" . $dataSize . "\n"; } 34 | 35 | chomp; 36 | my $line = $_; 37 | my @fields = split(/\t/, $line); 38 | 39 | $all_data[$dataSize][0] = $fields[0]; # unique_job_id 40 | $all_data[$dataSize][1] = $fields[1]; # job_name 41 | $all_data[$dataSize][2] = $fields[2]; # map_input_bytes 42 | $all_data[$dataSize][3] = $fields[3]; # shuffle_bytes 43 | $all_data[$dataSize][4] = $fields[4]; # reduce_output_bytes 44 | $all_data[$dataSize][5] = $fields[5]; # submit_time_seconds 45 | $all_data[$dataSize][6] = $fields[6]; # duration_seconds 46 | $all_data[$dataSize][7] = $fields[7]; # map_time_task_seconds 47 | $all_data[$dataSize][8] = $fields[8]; # red_time_task_seconds 48 | $all_data[$dataSize][9] = $fields[9]; # total_time_task_seconds 49 | $all_data[$dataSize][10] = $fields[12]; # input path 50 | $all_data[$dataSize][11] = $fields[13]; # output path 51 | 52 | $dataSize++; 53 | 54 | } 55 | 56 | #print "total lines " . $dataSize . "\n"; 57 | 58 | close(INPUT_FILE); 59 | 60 | sample_and_print($inPath, $outPrefix); 61 | 62 | sub sample_and_print { 63 | 64 | my ($in_path, $out_prefix) = @_; 65 | 66 | for (my $i=0; $i<$repeats; $i++) { 67 | 68 | my $j = 0; 69 | my $startPoint = 0; 70 | my $remainder = 0; 71 | 72 | my $jobNumber = 0; 73 | my $timeSoFar = 0; 74 | 75 | my %inputHash = (); 76 | my %outputHash = (); 77 | 78 | # truncate previously existing file and open new one for append 79 | 80 | close(OUTPUT_FILE); 81 | open(OUTPUT_FILE, "> $out_prefix\_$i") or die "Cannot open $out_prefix\_$i"; 82 | close(OUTPUT_FILE); 83 | open(OUTPUT_FILE, ">> $out_prefix\_$i") or die "Cannot open $out_prefix\_$i"; 84 | 85 | # sample and print 86 | 87 | for ($j=0; $j<$samples; $j++) { 88 | 89 | my $startTime = $traceStart + rand() * ($traceEnd - $traceStart - $length); 90 | my $endTime = $startTime + $length; 91 | 92 | # book keeping ... 93 | 94 | my $prev = $startTime - $remainder; 95 | $remainder = $length + $remainder; 96 | 97 | # binary search to find index of first job with job submit time >= $startTime 98 | 99 | my $min = 0; 100 | my $max = $dataSize - 1; 101 | my $mid = $min + floor(($max - $min)/2); 102 | 103 | while (($min <= $max) && ($all_data[$mid][5] != $startTime)){ 104 | $mid = $min + floor(($max - $min)/2); 105 | if ($startTime >= ($all_data[$mid][5])) { 106 | $min = $mid + 1; 107 | } else { 108 | $max = $mid - 1; 109 | } 110 | } 111 | 112 | # print out workload 113 | 114 | for (my $k=$mid; $all_data[$k][5] <= $endTime; $k++) { 115 | 116 | if ($all_data[$k][5] >= $startTime && $all_data[$k][5] <= $endTime) { 117 | 118 | $timeSoFar += ($all_data[$k][5] - floor($prev)); 119 | 120 | print OUTPUT_FILE "job" . $jobNumber . "\t"; 121 | print OUTPUT_FILE $timeSoFar . "\t"; 122 | print OUTPUT_FILE ($all_data[$k][5] - floor($prev)) . "\t"; # inter-job time gap seconds 123 | print OUTPUT_FILE $all_data[$k][2] . "\t"; 124 | print OUTPUT_FILE $all_data[$k][3] . "\t"; 125 | print OUTPUT_FILE $all_data[$k][4] . "\t"; 126 | 127 | # print anonymized input path if info available, else print TAB 128 | if (defined($all_data[$k][10])) { 129 | my $inputPath = $all_data[$k][10]; 130 | if (defined($inputHash{$inputPath})) { 131 | $inputPath = $inputHash{$inputPath}; 132 | } else { 133 | $inputHash{$inputPath} = scalar(keys( %inputHash )); 134 | $inputPath = scalar(keys( %inputHash )); 135 | } 136 | $inputPath = "inputPath" . $inputPath; 137 | print OUTPUT_FILE $inputPath . "\t"; 138 | } else { 139 | print OUTPUT_FILE "\t"; 140 | } 141 | 142 | # print anonymized output path if info available, else print TAB 143 | if (defined($all_data[$k][11])) { 144 | my $outputPath = $all_data[$k][11]; 145 | if (defined($outputHash{$outputPath})) { 146 | $outputPath = $outputHash{$outputPath}; 147 | } else { 148 | $outputHash{$outputPath} = scalar(keys( %outputHash )); 149 | $outputPath = scalar(keys( %outputHash )); 150 | } 151 | $outputPath = "outputPath" . $outputPath; 152 | print OUTPUT_FILE $outputPath . "\t"; 153 | } else { 154 | print OUTPUT_FILE "\t"; 155 | } 156 | 157 | print OUTPUT_FILE "\n"; 158 | 159 | $prev = $all_data[$k][5]; 160 | $remainder = $endTime - $all_data[$k][5]; 161 | $jobNumber++; 162 | } 163 | 164 | } 165 | 166 | 167 | } # end for ($j=0; $j<$samples; $j++) 168 | 169 | close(OUTPUT_FILE); 170 | 171 | } # end for (my $i=0; $i<$repeats; $i++) 172 | 173 | } # end sub sample_and_print 174 | 175 | -------------------------------------------------------------------------------- /workloadSuite/parse-hadoop-jobhistory.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # Log Parger for Hadoop jobHistory files 4 | 5 | ################################################################ 6 | # 7 | # Copyright (c) 2011, Regents of the University of California. 8 | # All rights reserved. 9 | # 10 | # This file is governed by the "New BSD License" below. 11 | # 12 | # 13 | # 14 | # Redistribution and use in source and binary forms, with or without 15 | # modification, are permitted provided that the following conditions 16 | # are met: 17 | # 18 | # * Redistributions of source code must retain the above copyright notice, 19 | # this list of conditions and the following disclaimer. 20 | # * Redistributions in binary form must reproduce the above copyright 21 | # notice, this list of conditions and the following disclaimer in the 22 | # documentation and/or other materials provided with the distribution. 23 | # * Neither the name of the University of California, Berkeley 24 | # nor the names of its contributors may be used to endorse or promote 25 | # products derived from this software without specific prior written 26 | # permission. 27 | # 28 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 31 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 32 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 33 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 34 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 35 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 36 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 39 | # OF THE POSSIBILITY OF SUCH DAMAGE. 40 | # 41 | ################################################################ 42 | 43 | 44 | use List::Util qw[min max]; 45 | use Digest::MD5 qw(md5 md5_hex md5_base64); 46 | 47 | $historyDir = $ARGV[0]; 48 | 49 | opendir(my $HIST_DIR, $historyDir); 50 | #my @directory_contents = readdir(HIST_DIR); 51 | #closedir(HIST_DIR); 52 | 53 | my %jobs = {}; 54 | my $files = 0; 55 | 56 | #foreach my $dentry ( @directory_contents ) { 57 | 58 | while ( 59 | defined( my $dentry = readdir $HIST_DIR ) 60 | ) { 61 | 62 | #if (!($dentry =~ /^*conf*/) && !($dentry =~ /^*crc/)) { 63 | $files++; 64 | 65 | if ($files % 5000 == 0) { 66 | print STDERR $files . "\n"; 67 | } 68 | if (!($dentry =~ /^*crc/)) { 69 | #print "$historyDir\/$dentry\n"; 70 | 71 | $file = "$historyDir\/$dentry"; 72 | open (FILE, $file); 73 | $total_map_time = 0; 74 | $total_reduce_time = 0; 75 | $submit_time = 0; 76 | $launch_time = 0; 77 | $finish_time = 0; 78 | my $status = ""; 79 | my $job_id = ""; 80 | my $job_name=""; 81 | my $map_input_bytes = 0; 82 | my $shuffle_bytes = 0; 83 | my $red_output_bytes = 0; 84 | my $map_input_bytes_per_record = 0; 85 | my $red_output_records = 0; 86 | 87 | #my %tasks = {}; 88 | #my %jobs = {}; 89 | 90 | #if (1==2) { 91 | while () { 92 | 93 | 94 | chomp; 95 | ($line) = split("\n"); 96 | 97 | 98 | if($dentry =~ /job_(\d+)_(\d+)_conf.xml/) { 99 | $job_id = "job_" . $1 . "_" . $2; 100 | $jobs{$job_id}{"checkedPaths"} = 1; 101 | 102 | if($line =~ /\(.*?)\mapred\.input\.dir\<\/name\>\(.*?)\<\/value\>\<\/property\>/) 103 | { 104 | $jobs{$job_id}{"input_dir"} = md5_hex($2); 105 | } 106 | if($line =~ /\(.*?)\mapred\.output\.dir\<\/name\>\(.*?)\<\/value\>\<\/property\>/) 107 | { 108 | $jobs{$job_id}{"output_dir"} = md5_hex($2); 109 | } 110 | } 111 | 112 | if($line =~ /Job JOBID="(\S+)"/) { 113 | $job_id = $1; 114 | $jobs{$job_id}{"checkedStats"} = 1; 115 | 116 | if($line =~ /JOBNAME="(.*?)"/) 117 | { 118 | $jobs{$job_id}{"job_name"} = $1; 119 | } 120 | if ($line =~ /SUBMIT_TIME="(\d+)"/) { 121 | $jobs{$job_id}{"submit_time"} = $1; 122 | } 123 | 124 | if ($line =~ /LAUNCH_TIME="(\d+)"/) { 125 | $jobs{$job_id}{"launch_time"} = $1; 126 | } 127 | 128 | if ($line =~ /FINISH_TIME="(\d+)"/) { 129 | $jobs{$job_id}{"finish_time"} = $1; 130 | } 131 | 132 | if($line =~ /TOTAL_MAPS\="(\d+)"/) { 133 | $jobs{$job_id}{"maps"} = $1; 134 | } 135 | if ($line =~ /TOTAL_REDUCES\="(\d+)"/) { 136 | $jobs{$job_id}{"reduces"} = $1; 137 | } 138 | 139 | if ($line =~ /JOB_STATUS="(\w+)"/) { 140 | $jobs{$job_id}{"status"} = $1; 141 | } 142 | 143 | if ($line =~ /\(HDFS_BYTES_READ\)\((\d+)\)/) { 144 | $jobs{$job_id}{"map_input_bytes"} = $1; 145 | } 146 | 147 | if ($line =~ /\(Map input records\)\((\d+)\)/) { 148 | if ($1>0) { 149 | $jobs{$job_id}{"map_input_bytes_per_record"} = $jobs{$job_id}{"map_input_bytes"} / $1; 150 | } 151 | } 152 | 153 | if ($line =~ /\(Map output records\)\((\d+)\)/) { 154 | $jobs{$job_id}{"shuffle_bytes"} = $1 * $jobs{$job_id}{"map_input_bytes_per_record"}; 155 | } 156 | 157 | if ($line =~ /\(HDFS_BYTES_WRITTEN\)\((\d+)\)/) { 158 | $jobs{$job_id}{"red_output_bytes"} = $1; 159 | } 160 | 161 | if ($line =~ /\(Map output bytes\)\((\d+)\)/) { 162 | $jobs{$job_id}{"shuffle_bytes"} = $1; 163 | } 164 | 165 | 166 | } 167 | 168 | if ($line =~ /Task TASKID="(\S+)"/) 169 | { 170 | $task_id = $1; 171 | 172 | if($line =~ /TASK_TYPE="(\w+)"/) { 173 | $jobs{$job_id}{"tasks"}{$task_id}{"task_type"} = $1; 174 | } 175 | if ($line =~ /START_TIME="(\d+)"/) { 176 | $jobs{$job_id}{"tasks"}{$task_id}{"start_time"} = $1; 177 | } 178 | 179 | 180 | if($line =~ /TASK_TYPE="(\w+)"/) { 181 | $jobs{$job_id}{"tasks"}{$task_id}{"type"} = $1; 182 | } 183 | if($line =~ /TASK_STATUS="(\w+)"/) { 184 | #$status = $1; 185 | $jobs{$job_id}{"tasks"}{$task_id}{"status"} = $1; 186 | } 187 | if($line =~ /FINISH_TIME="(\d+)"/) { 188 | $jobs{$job_id}{"tasks"}{$task_id}{"end_time"} = $1; 189 | } 190 | $jobs{$job_id}{"tasks"}{$task_id}{"execution_time"} = $jobs{$job_id}{"tasks"}{$task_id}{"end_time"} - $jobs{$job_id}{"tasks"}{$task_id}{"start_time"}; 191 | 192 | 193 | if ($jobs{$job_id}{"tasks"}{$task_id}{"type"} =~ /MAP/ and $jobs{$job_id}{"tasks"}{$task_id}{"status"} =~ /SUCCESS/) { 194 | $jobs{$job_id}{"total_map_time"} += $jobs{$job_id}{"tasks"}{$task_id}{"execution_time"}; 195 | } 196 | if ($jobs{$job_id}{"tasks"}{$task_id}{"type"} =~ /REDUCE/ and $jobs{$job_id}{"tasks"}{$task_id}{"status"} =~ /SUCCESS/) { 197 | $jobs{$job_id}{"total_reduce_time"} += $jobs{$job_id}{"tasks"}{$task_id}{"execution_time"}; 198 | if($line =~ /\(HDFS_BYTES_WRITTEN\)\((\d+)\)/) { 199 | $jobs{$job_id}{"red_output_bytes"} += $1; 200 | } 201 | if($line =~ /\(Reduce output records\)\((\d+)\)/) { 202 | $jobs{$job_id}{"red_output_records"} += $1; 203 | } 204 | } 205 | 206 | } 207 | 208 | 209 | 210 | } 211 | close (FILE); 212 | if ($jobs{$job_id}{"status"} =~ /SUCCESS/) { 213 | if ($jobs{$job_id}{"red_output_bytes"} == 0 && $jobs{$job_id}{"red_output_records"} > 0) { 214 | $jobs{$job_id}{"red_output_bytes"} = $jobs{$job_id}{"red_output_records"} * $jobs{$job_id}{"map_input_bytes_per_record"}; 215 | } 216 | if ($jobs{$job_id}{"reduces"} ==0 && $jobs{$job_id}{"red_output_bytes"} == 0) { 217 | $jobs{$job_id}{"red_output_bytes"} = $jobs{$job_id}{"shuffle_bytes"}; 218 | $jobs{$job_id}{"shuffle_bytes"} = 0; 219 | } 220 | #print 221 | #"$job_id\t$job_name\t$map_input_bytes\t$shuffle_bytes\t$red_output_bytes\t" . ($submit_time / 1000) . "\t" . 222 | #(($finish_time - $launch_time) / 1000) . "\t" . ($total_map_time/1000) . "\t" . 223 | #($total_reduce_time/1000 ) . "\t" . (($total_map_time + $total_reduce_time) / 1000) . "\t$maps\t$reduces"; 224 | #print "\n"; 225 | } else { 226 | 227 | } 228 | 229 | if (defined($jobs{$job_id}{"checkedPaths"}) && defined($jobs{$job_id}{"checkedStats"}) && $jobs{$job_id}{"status"} =~ /SUCCESS/) { 230 | print 231 | "$job_id\t" . 232 | $jobs{$job_id}{"job_name"} . "\t" . 233 | $jobs{$job_id}{"map_input_bytes"} . "\t" . 234 | $jobs{$job_id}{"shuffle_bytes"} . "\t" . 235 | $jobs{$job_id}{"red_output_bytes"} . "\t" . 236 | ($jobs{$job_id}{"submit_time"} / 1000) . "\t" . 237 | (($jobs{$job_id}{"finish_time"} - $jobs{$job_id}{"launch_time"}) / 1000) . "\t" . 238 | ($jobs{$job_id}{"total_map_time"} /1000) . "\t" . 239 | ($jobs{$job_id}{"total_reduce_time"} /1000 ) . "\t" . 240 | (($jobs{$job_id}{"total_map_time"} + $jobs{$job_id}{"total_reduce_time"}) / 1000) . "\t" . 241 | $jobs{$job_id}{"maps"} . "\t" . 242 | $jobs{$job_id}{"reduces"} . "\t" . 243 | $jobs{$job_id}{"input_dir"} . "\t" . 244 | $jobs{$job_id}{"output_dir"}; 245 | print "\n"; 246 | delete($jobs{$job_id}); 247 | } 248 | 249 | } 250 | } 251 | 252 | #exit; 253 | 254 | foreach $job_id (keys %jobs) { 255 | if ($jobs{$job_id}{"status"} =~ /SUCCESS/) { 256 | print 257 | "$job_id\t" . 258 | $jobs{$job_id}{"job_name"} . "\t" . 259 | $jobs{$job_id}{"map_input_bytes"} . "\t" . 260 | $jobs{$job_id}{"shuffle_bytes"} . "\t" . 261 | $jobs{$job_id}{"red_output_bytes"} . "\t" . 262 | ($jobs{$job_id}{"submit_time"} / 1000) . "\t" . 263 | (($jobs{$job_id}{"finish_time"} - $jobs{$job_id}{"launch_time"}) / 1000) . "\t" . 264 | ($jobs{$job_id}{"total_map_time"} /1000) . "\t" . 265 | ($jobs{$job_id}{"total_reduce_time"} /1000 ) . "\t" . 266 | (($jobs{$job_id}{"total_map_time"} + $jobs{$job_id}{"total_reduce_time"}) / 1000) . "\t" . 267 | $jobs{$job_id}{"maps"} . "\t" . 268 | $jobs{$job_id}{"reduces"} . "\t" . 269 | $jobs{$job_id}{"input_dir"} . "\t" . 270 | $jobs{$job_id}{"output_dir"}; 271 | print "\n"; 272 | } 273 | } 274 | 275 | closedir($HIST_DIR); 276 | 277 | exit; 278 | -------------------------------------------------------------------------------- /workloadSuite/randomwriter_conf.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | test.randomwrite.min_key 6 | 10 7 | 8 | 9 | test.randomwrite.max_key 10 | 10 11 | 12 | 13 | test.randomwrite.min_value 14 | 90 15 | 16 | 17 | test.randomwrite.max_value 18 | 90 19 | 20 | 21 | test.randomwrite.total_bytes 22 | 4294967296 23 | 24 | 25 | test.randomwrite.bytes_per_map 26 | 67108864 27 | 28 | 29 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-0.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-1.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-10.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-11.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00005 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-12.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00005 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-13.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00008 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-14.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00007 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-15.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-16.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-17.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006,workGenInput/part-00007,workGenInput/part-00008 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-18.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-19.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00005,workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-2.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00002 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-20.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-21.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00000 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-22.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00002 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-23.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00001 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-24.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00008 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-25.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-26.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00002 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-27.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-28.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-29.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-3.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00001 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-30.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00005 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-31.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-32.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00002 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-33.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00001 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-34.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00008 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-35.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00005 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-36.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-37.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00000 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-38.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-39.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00008 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-4.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00002 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-40.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-41.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-42.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00006 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-43.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00008 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-44.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00000 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-45.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00001 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-46.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00007 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-47.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00004 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-48.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00000 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-49.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00002 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-5.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-6.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00003 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-7.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00004 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-8.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00002 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/inputPath-job-9.txt: -------------------------------------------------------------------------------- 1 | workGenInput/part-00000 -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-0.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-0.txt workGenOutputTest-0 5.810261E-4 0.26818323 >> workGenLogs/job-0.txt 2>> workGenLogs/job-0.txt 2 | hadoop dfs -rmr workGenOutputTest-0 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-1.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-1.txt workGenOutputTest-1 4.223287E-4 0.2541811 >> workGenLogs/job-1.txt 2>> workGenLogs/job-1.txt 2 | hadoop dfs -rmr workGenOutputTest-1 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-10.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-10.txt workGenOutputTest-10 1.5258789E-5 1.0 >> workGenLogs/job-10.txt 2>> workGenLogs/job-10.txt 2 | hadoop dfs -rmr workGenOutputTest-10 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-11.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-11.txt workGenOutputTest-11 1.5258789E-5 1.0 >> workGenLogs/job-11.txt 2>> workGenLogs/job-11.txt 2 | hadoop dfs -rmr workGenOutputTest-11 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-12.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-12.txt workGenOutputTest-12 5.777925E-4 0.02640877 >> workGenLogs/job-12.txt 2>> workGenLogs/job-12.txt 2 | hadoop dfs -rmr workGenOutputTest-12 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-13.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-13.txt workGenOutputTest-13 1.5258789E-5 1.0 >> workGenLogs/job-13.txt 2>> workGenLogs/job-13.txt 2 | hadoop dfs -rmr workGenOutputTest-13 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-14.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-14.txt workGenOutputTest-14 1.5258789E-5 1.0 >> workGenLogs/job-14.txt 2>> workGenLogs/job-14.txt 2 | hadoop dfs -rmr workGenOutputTest-14 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-15.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-15.txt workGenOutputTest-15 2.7373433E-5 0.55743057 >> workGenLogs/job-15.txt 2>> workGenLogs/job-15.txt 2 | hadoop dfs -rmr workGenOutputTest-15 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-16.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-16.txt workGenOutputTest-16 1.5258789E-5 1.0 >> workGenLogs/job-16.txt 2>> workGenLogs/job-16.txt 2 | hadoop dfs -rmr workGenOutputTest-16 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-17.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 4 inputPath-job-17.txt workGenOutputTest-17 1.2676634 0.2764548 >> workGenLogs/job-17.txt 2>> workGenLogs/job-17.txt 2 | hadoop dfs -rmr workGenOutputTest-17 3 | # inputSize 171246518 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-18.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-18.txt workGenOutputTest-18 1.5258789E-5 1.0 >> workGenLogs/job-18.txt 2>> workGenLogs/job-18.txt 2 | hadoop dfs -rmr workGenOutputTest-18 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-19.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-19.txt workGenOutputTest-19 0.574622 3.951307E-4 >> workGenLogs/job-19.txt 2>> workGenLogs/job-19.txt 2 | hadoop dfs -rmr workGenOutputTest-19 3 | # inputSize 79607743 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-2.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-2.txt workGenOutputTest-2 1.47596E-4 0.39293286 >> workGenLogs/job-2.txt 2>> workGenLogs/job-2.txt 2 | hadoop dfs -rmr workGenOutputTest-2 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-20.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-20.txt workGenOutputTest-20 1.5258789E-5 1.0 >> workGenLogs/job-20.txt 2>> workGenLogs/job-20.txt 2 | hadoop dfs -rmr workGenOutputTest-20 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-21.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-21.txt workGenOutputTest-21 1.5258789E-5 1.0 >> workGenLogs/job-21.txt 2>> workGenLogs/job-21.txt 2 | hadoop dfs -rmr workGenOutputTest-21 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-22.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-22.txt workGenOutputTest-22 1.5258789E-5 1.0 >> workGenLogs/job-22.txt 2>> workGenLogs/job-22.txt 2 | hadoop dfs -rmr workGenOutputTest-22 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-23.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-23.txt workGenOutputTest-23 1.5258789E-5 1.0 >> workGenLogs/job-23.txt 2>> workGenLogs/job-23.txt 2 | hadoop dfs -rmr workGenOutputTest-23 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-24.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-24.txt workGenOutputTest-24 1.5258789E-5 1.0 >> workGenLogs/job-24.txt 2>> workGenLogs/job-24.txt 2 | hadoop dfs -rmr workGenOutputTest-24 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-25.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-25.txt workGenOutputTest-25 0.011125132 1.6002492 >> workGenLogs/job-25.txt 2>> workGenLogs/job-25.txt 2 | hadoop dfs -rmr workGenOutputTest-25 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-26.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-26.txt workGenOutputTest-26 0.001182422 0.16898338 >> workGenLogs/job-26.txt 2>> workGenLogs/job-26.txt 2 | hadoop dfs -rmr workGenOutputTest-26 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-27.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-27.txt workGenOutputTest-27 0.0018562526 0.10764143 >> workGenLogs/job-27.txt 2>> workGenLogs/job-27.txt 2 | hadoop dfs -rmr workGenOutputTest-27 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-28.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-28.txt workGenOutputTest-28 7.812679E-5 0.435247 >> workGenLogs/job-28.txt 2>> workGenLogs/job-28.txt 2 | hadoop dfs -rmr workGenOutputTest-28 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-29.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-29.txt workGenOutputTest-29 1.5258789E-5 1.0 >> workGenLogs/job-29.txt 2>> workGenLogs/job-29.txt 2 | hadoop dfs -rmr workGenOutputTest-29 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-3.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-3.txt workGenOutputTest-3 1.5258789E-5 1.0 >> workGenLogs/job-3.txt 2>> workGenLogs/job-3.txt 2 | hadoop dfs -rmr workGenOutputTest-3 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-30.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-30.txt workGenOutputTest-30 7.708371E-5 0.1979509 >> workGenLogs/job-30.txt 2>> workGenLogs/job-30.txt 2 | hadoop dfs -rmr workGenOutputTest-30 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-31.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-31.txt workGenOutputTest-31 0.7998113 8.2158303E-4 >> workGenLogs/job-31.txt 2>> workGenLogs/job-31.txt 2 | hadoop dfs -rmr workGenOutputTest-31 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-32.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-32.txt workGenOutputTest-32 1.5258789E-5 1.0 >> workGenLogs/job-32.txt 2>> workGenLogs/job-32.txt 2 | hadoop dfs -rmr workGenOutputTest-32 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-33.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-33.txt workGenOutputTest-33 1.5258789E-5 1.0 >> workGenLogs/job-33.txt 2>> workGenLogs/job-33.txt 2 | hadoop dfs -rmr workGenOutputTest-33 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-34.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 5 inputPath-job-34.txt workGenOutputTest-34 1.5258789E-5 304735.12 >> workGenLogs/job-34.txt 2>> workGenLogs/job-34.txt 2 | hadoop dfs -rmr workGenOutputTest-34 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-35.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-35.txt workGenOutputTest-35 1.5258789E-5 67.74121 >> workGenLogs/job-35.txt 2>> workGenLogs/job-35.txt 2 | hadoop dfs -rmr workGenOutputTest-35 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-36.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-36.txt workGenOutputTest-36 1.5258789E-5 4410.825 >> workGenLogs/job-36.txt 2>> workGenLogs/job-36.txt 2 | hadoop dfs -rmr workGenOutputTest-36 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-37.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 2 inputPath-job-37.txt workGenOutputTest-37 1.5258789E-5 149384.83 >> workGenLogs/job-37.txt 2>> workGenLogs/job-37.txt 2 | hadoop dfs -rmr workGenOutputTest-37 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-38.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 2 inputPath-job-38.txt workGenOutputTest-38 1.5258789E-5 1398101.2 >> workGenLogs/job-38.txt 2>> workGenLogs/job-38.txt 2 | hadoop dfs -rmr workGenOutputTest-38 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-39.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 2 inputPath-job-39.txt workGenOutputTest-39 1.5258789E-5 143652.4 >> workGenLogs/job-39.txt 2>> workGenLogs/job-39.txt 2 | hadoop dfs -rmr workGenOutputTest-39 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-4.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-4.txt workGenOutputTest-4 1.5258789E-5 160.12402 >> workGenLogs/job-4.txt 2>> workGenLogs/job-4.txt 2 | hadoop dfs -rmr workGenOutputTest-4 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-40.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 6 inputPath-job-40.txt workGenOutputTest-40 1.5258789E-5 419971.2 >> workGenLogs/job-40.txt 2>> workGenLogs/job-40.txt 2 | hadoop dfs -rmr workGenOutputTest-40 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-41.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-41.txt workGenOutputTest-41 1.5258789E-5 1.0 >> workGenLogs/job-41.txt 2>> workGenLogs/job-41.txt 2 | hadoop dfs -rmr workGenOutputTest-41 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-42.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-42.txt workGenOutputTest-42 1.5258789E-5 35321.98 >> workGenLogs/job-42.txt 2>> workGenLogs/job-42.txt 2 | hadoop dfs -rmr workGenOutputTest-42 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-43.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-43.txt workGenOutputTest-43 0.017437875 0.29772884 >> workGenLogs/job-43.txt 2>> workGenLogs/job-43.txt 2 | hadoop dfs -rmr workGenOutputTest-43 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-44.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-44.txt workGenOutputTest-44 1.5258789E-5 5607.967 >> workGenLogs/job-44.txt 2>> workGenLogs/job-44.txt 2 | hadoop dfs -rmr workGenOutputTest-44 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-45.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-45.txt workGenOutputTest-45 3.1377375E-4 0.42883602 >> workGenLogs/job-45.txt 2>> workGenLogs/job-45.txt 2 | hadoop dfs -rmr workGenOutputTest-45 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-46.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-46.txt workGenOutputTest-46 1.5258789E-5 1.0 >> workGenLogs/job-46.txt 2>> workGenLogs/job-46.txt 2 | hadoop dfs -rmr workGenOutputTest-46 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-47.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-47.txt workGenOutputTest-47 1.5258789E-5 1.0 >> workGenLogs/job-47.txt 2>> workGenLogs/job-47.txt 2 | hadoop dfs -rmr workGenOutputTest-47 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-48.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-48.txt workGenOutputTest-48 3.0110776E-4 0.050675508 >> workGenLogs/job-48.txt 2>> workGenLogs/job-48.txt 2 | hadoop dfs -rmr workGenOutputTest-48 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-49.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-49.txt workGenOutputTest-49 1.5258789E-5 1.0 >> workGenLogs/job-49.txt 2>> workGenLogs/job-49.txt 2 | hadoop dfs -rmr workGenOutputTest-49 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-5.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-5.txt workGenOutputTest-5 8.070469E-5 0.18906942 >> workGenLogs/job-5.txt 2>> workGenLogs/job-5.txt 2 | hadoop dfs -rmr workGenOutputTest-5 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-6.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-6.txt workGenOutputTest-6 1.5258789E-5 8.972656 >> workGenLogs/job-6.txt 2>> workGenLogs/job-6.txt 2 | hadoop dfs -rmr workGenOutputTest-6 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-7.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-7.txt workGenOutputTest-7 6.368756E-5 0.2395882 >> workGenLogs/job-7.txt 2>> workGenLogs/job-7.txt 2 | hadoop dfs -rmr workGenOutputTest-7 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-8.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-8.txt workGenOutputTest-8 6.606579E-4 0.21064146 >> workGenLogs/job-8.txt 2>> workGenLogs/job-8.txt 2 | hadoop dfs -rmr workGenOutputTest-8 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-job-9.sh: -------------------------------------------------------------------------------- 1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-9.txt workGenOutputTest-9 5.8835745E-4 0.4086972 >> workGenLogs/job-9.txt 2>> workGenLogs/job-9.txt 2 | hadoop dfs -rmr workGenOutputTest-9 3 | # inputSize 67108864 4 | -------------------------------------------------------------------------------- /workloadSuite/scriptsTest/run-jobs-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm -r workGenLogs 3 | mkdir workGenLogs 4 | ./run-job-0.sh & 5 | sleep 49 6 | ./run-job-1.sh & 7 | sleep 52 8 | ./run-job-2.sh & 9 | sleep 21 10 | ./run-job-3.sh & 11 | sleep 75 12 | ./run-job-4.sh & 13 | sleep 11 14 | ./run-job-5.sh & 15 | sleep 141 16 | ./run-job-6.sh & 17 | sleep 24 18 | ./run-job-7.sh & 19 | sleep 6 20 | ./run-job-8.sh & 21 | sleep 84 22 | ./run-job-9.sh & 23 | sleep 24 24 | ./run-job-10.sh & 25 | sleep 10 26 | ./run-job-11.sh & 27 | sleep 112 28 | ./run-job-12.sh & 29 | sleep 57 30 | ./run-job-13.sh & 31 | sleep 32 32 | ./run-job-14.sh & 33 | sleep 26 34 | ./run-job-15.sh & 35 | sleep 206 36 | ./run-job-16.sh & 37 | sleep 182 38 | ./run-job-17.sh & 39 | sleep 16 40 | ./run-job-18.sh & 41 | sleep 52 42 | ./run-job-19.sh & 43 | sleep 5 44 | ./run-job-20.sh & 45 | sleep 2 46 | ./run-job-21.sh & 47 | sleep 27 48 | ./run-job-22.sh & 49 | sleep 20 50 | ./run-job-23.sh & 51 | sleep 28 52 | ./run-job-24.sh & 53 | sleep 23 54 | ./run-job-25.sh & 55 | sleep 135 56 | ./run-job-26.sh & 57 | sleep 33 58 | ./run-job-27.sh & 59 | sleep 43 60 | ./run-job-28.sh & 61 | sleep 29 62 | ./run-job-29.sh & 63 | sleep 140 64 | ./run-job-30.sh & 65 | sleep 4 66 | ./run-job-31.sh & 67 | sleep 31 68 | ./run-job-32.sh & 69 | sleep 25 70 | ./run-job-33.sh & 71 | sleep 21 72 | ./run-job-34.sh & 73 | sleep 110 74 | ./run-job-35.sh & 75 | sleep 118 76 | ./run-job-36.sh & 77 | sleep 1 78 | ./run-job-37.sh & 79 | sleep 0 80 | ./run-job-38.sh & 81 | sleep 1 82 | ./run-job-39.sh & 83 | sleep 1 84 | ./run-job-40.sh & 85 | sleep 4 86 | ./run-job-41.sh & 87 | sleep 200 88 | ./run-job-42.sh & 89 | sleep 47 90 | ./run-job-43.sh & 91 | sleep 53 92 | ./run-job-44.sh & 93 | sleep 42 94 | ./run-job-45.sh & 95 | sleep 324 96 | ./run-job-46.sh & 97 | sleep 37 98 | ./run-job-47.sh & 99 | sleep 68 100 | ./run-job-48.sh & 101 | sleep 41 102 | ./run-job-49.sh & 103 | sleep 33 104 | # max input 171246518 105 | # inputPartitionSize 67108864 106 | # inputPartitionCount 10 107 | -------------------------------------------------------------------------------- /workloadSuite/workGenKeyValue_conf.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | workGen.randomwrite.min_key 6 | 10 7 | 8 | 9 | workGen.randomwrite.max_key 10 | 10 11 | 12 | 13 | workGen.randomwrite.min_value 14 | 90 15 | 16 | 17 | workGen.randomwrite.max_value 18 | 90 19 | 20 | 21 | workGen.randomwrite.total_bytes 22 | 10737418240 23 | 24 | 25 | --------------------------------------------------------------------------------