└── workloadSuite
    ├── Additional_Workloads_Placeholder.tsv
    ├── CHANGELOG
    ├── FB-2009_samples_24_times_1hr_0.tsv
    ├── FB-2009_samples_24_times_1hr_0_first50jobs.tsv
    ├── FB-2009_samples_24_times_1hr_1.tsv
    ├── FB-2010_samples_24_times_1hr_0.tsv
    ├── FB-2010_samples_24_times_1hr_withInputPaths_0.tsv
    ├── GenerateReplayScript.java
    ├── HDFSWrite.java
    ├── LICENSE
    ├── README
    ├── WorkGen.java
    ├── WorkloadSynthesis.pl
    ├── parse-hadoop-jobhistory.pl
    ├── randomwriter_conf.xsl
    ├── scriptsTest
        ├── inputPath-job-0.txt
        ├── inputPath-job-1.txt
        ├── inputPath-job-10.txt
        ├── inputPath-job-11.txt
        ├── inputPath-job-12.txt
        ├── inputPath-job-13.txt
        ├── inputPath-job-14.txt
        ├── inputPath-job-15.txt
        ├── inputPath-job-16.txt
        ├── inputPath-job-17.txt
        ├── inputPath-job-18.txt
        ├── inputPath-job-19.txt
        ├── inputPath-job-2.txt
        ├── inputPath-job-20.txt
        ├── inputPath-job-21.txt
        ├── inputPath-job-22.txt
        ├── inputPath-job-23.txt
        ├── inputPath-job-24.txt
        ├── inputPath-job-25.txt
        ├── inputPath-job-26.txt
        ├── inputPath-job-27.txt
        ├── inputPath-job-28.txt
        ├── inputPath-job-29.txt
        ├── inputPath-job-3.txt
        ├── inputPath-job-30.txt
        ├── inputPath-job-31.txt
        ├── inputPath-job-32.txt
        ├── inputPath-job-33.txt
        ├── inputPath-job-34.txt
        ├── inputPath-job-35.txt
        ├── inputPath-job-36.txt
        ├── inputPath-job-37.txt
        ├── inputPath-job-38.txt
        ├── inputPath-job-39.txt
        ├── inputPath-job-4.txt
        ├── inputPath-job-40.txt
        ├── inputPath-job-41.txt
        ├── inputPath-job-42.txt
        ├── inputPath-job-43.txt
        ├── inputPath-job-44.txt
        ├── inputPath-job-45.txt
        ├── inputPath-job-46.txt
        ├── inputPath-job-47.txt
        ├── inputPath-job-48.txt
        ├── inputPath-job-49.txt
        ├── inputPath-job-5.txt
        ├── inputPath-job-6.txt
        ├── inputPath-job-7.txt
        ├── inputPath-job-8.txt
        ├── inputPath-job-9.txt
        ├── run-job-0.sh
        ├── run-job-1.sh
        ├── run-job-10.sh
        ├── run-job-11.sh
        ├── run-job-12.sh
        ├── run-job-13.sh
        ├── run-job-14.sh
        ├── run-job-15.sh
        ├── run-job-16.sh
        ├── run-job-17.sh
        ├── run-job-18.sh
        ├── run-job-19.sh
        ├── run-job-2.sh
        ├── run-job-20.sh
        ├── run-job-21.sh
        ├── run-job-22.sh
        ├── run-job-23.sh
        ├── run-job-24.sh
        ├── run-job-25.sh
        ├── run-job-26.sh
        ├── run-job-27.sh
        ├── run-job-28.sh
        ├── run-job-29.sh
        ├── run-job-3.sh
        ├── run-job-30.sh
        ├── run-job-31.sh
        ├── run-job-32.sh
        ├── run-job-33.sh
        ├── run-job-34.sh
        ├── run-job-35.sh
        ├── run-job-36.sh
        ├── run-job-37.sh
        ├── run-job-38.sh
        ├── run-job-39.sh
        ├── run-job-4.sh
        ├── run-job-40.sh
        ├── run-job-41.sh
        ├── run-job-42.sh
        ├── run-job-43.sh
        ├── run-job-44.sh
        ├── run-job-45.sh
        ├── run-job-46.sh
        ├── run-job-47.sh
        ├── run-job-48.sh
        ├── run-job-49.sh
        ├── run-job-5.sh
        ├── run-job-6.sh
        ├── run-job-7.sh
        ├── run-job-8.sh
        ├── run-job-9.sh
        └── run-jobs-all.sh
    └── workGenKeyValue_conf.xsl


/workloadSuite/Additional_Workloads_Placeholder.tsv:
--------------------------------------------------------------------------------
 1 | job0	9	9	1762	0	14347	inputPath1	outputPath1
 2 | job1	18	9	970	609	697	inputPath2	outputPath2
 3 | job2	20	2	53088744165	1510005522	5759777	inputPath3	outputPath3
 4 | job3	22	2	61873748853	1387369304	5840114	inputPath4	outputPath4
 5 | job4	25	3	404	22	191	inputPath5	outputPath5
 6 | job5	32	7	1483413	0	17296097682	inputPath6	outputPath6
 7 | job6	32	0	404	22	191	inputPath7	outputPath7
 8 | job7	46	14	25526958	300	24106570	inputPath8	outputPath8
 9 | job8	49	3	6913085573	1380194628	2464	inputPath9	outputPath9
10 | job9	56	7	123701	0	1480875	inputPath10	outputPath10
11 | 


--------------------------------------------------------------------------------
/workloadSuite/CHANGELOG:
--------------------------------------------------------------------------------
 1 | 2012-01-27
 2 | 
 3 | Update to Version 1.4. 
 4 | 
 5 | FB-2010_samples_24_times_1hr_0.tsv
 6 |   Added Facebook 2010 workload. 
 7 | 
 8 | parse-hadoop-jobhistory.pl
 9 |   Added Hadoop job history log parsing tools. 
10 | 
11 | Fully migrated repository and documentation to git hub. 
12 | Improved documentation, consolidated git hub wiki as sole 
13 | documentation source. 
14 |   See https://github.com/SWIMProjectUCB/SWIM/wiki. 
15 | 
16 | README
17 |   Changed to reflect the above. 
18 | 
19 | ----
20 | 
21 | 2011-08-16
22 | 
23 | Update to Version 1.3.
24 | 
25 | WorkGen.java
26 |   Changed floats to doubles. 
27 | 
28 | GenerateReplayScript.java
29 |   Added parameters for 
30 |        [prefix to workload output in HDFS]
31 |        [workload stdout stderr output dir]
32 |        [hadoop command]
33 |        [path to WorkGen.jar]
34 |        [path to workGenKeyValue_conf.xsl]
35 | 
36 | scriptsTest/*
37 |   Re-generated using the latest GenerateReplayScript.java.
38 | 
39 | README
40 |   Changed to reflect the above.
41 | 
42 | CHANGELOG
43 |   Added this file. 
44 | 
45 | 


--------------------------------------------------------------------------------
/workloadSuite/FB-2009_samples_24_times_1hr_0_first50jobs.tsv:
--------------------------------------------------------------------------------
 1 | job0	49	49	740773	2339561	627471
 2 | job1	101	52	736346	1700537	432269
 3 | job2	122	21	267631	594312	233549
 4 | job3	197	75	233422	42	37
 5 | job4	208	11	3623279	0	9838062
 6 | job5	349	141	233422	324968	10974
 7 | job6	373	24	2497901	0	551304
 8 | job7	379	6	233422	256459	8835
 9 | job8	463	84	2193083	2660213	560397
10 | job9	487	24	1010097	2369060	968271
11 | job10	497	10	19555	0	41471
12 | job11	609	112	968144	41	35
13 | job12	666	57	968144	2326546	14369
14 | job13	698	32	968144	5072	2411
15 | job14	724	26	16526	0	34635
16 | job15	930	206	62032	110249	49133
17 | job16	1112	182	49006	41	37
18 | job17	1128	16	10274791099	13024975762	3600817163
19 | job18	1180	52	49006	57406	11871
20 | job19	1185	5	4776464612	2744661563	1084547
21 | job20	1187	2	4228	8456	127
22 | job21	1214	27	49006	50728	12181
23 | job22	1234	20	0	0	127
24 | job23	1262	28	23798	0	49034
25 | job24	1285	23	0	0	127
26 | job25	1420	135	23042116	44795750	71684305
27 | job26	1453	33	1083912	4761106	804562
28 | job27	1496	43	804435	7474262	804562
29 | job28	1525	29	160296	314626	136937
30 | job29	1665	140	136810	40	36
31 | job30	1669	4	136810	310392	19414
32 | job31	1700	31	1043799029	3220465724	2645916
33 | job32	1725	25	136810	817	699
34 | job33	1746	21	19859	0	41738
35 | job34	1856	110	11310	0	18722926301
36 | job35	1974	118	78	0	4162032
37 | job36	1975	1	106	0	271001141
38 | job37	1975	0	753	0	9178203796
39 | job38	1976	1	53416	0	85899343844
40 | job39	1977	1	9416	0	8826003635
41 | job40	1981	4	395	0	25803030649
42 | job41	2181	200	126	0	0
43 | job42	2228	47	359	0	2170182507
44 | job43	2281	53	69563889	70214203	20904794
45 | job44	2323	42	250	0	344553539
46 | job45	2647	324	584545	1263421	541810
47 | job46	2684	37	1212963	4926	32
48 | job47	2752	68	541683	41	36
49 | job48	2793	41	541683	1212479	11407
50 | job49	2826	33	541683	25536	3859
51 | 


--------------------------------------------------------------------------------
/workloadSuite/GenerateReplayScript.java:
--------------------------------------------------------------------------------
  1 | import java.io.BufferedReader;
  2 | import java.io.FileReader;
  3 | import java.io.FileWriter;
  4 | import java.io.File;
  5 | import java.io.InputStreamReader;
  6 | import java.util.HashMap;
  7 | import java.util.ArrayList;
  8 | import java.util.Arrays;
  9 | import java.util.Date;
 10 | import java.text.SimpleDateFormat;
 11 | 
 12 | public class GenerateReplayScript {  
 13 | 
 14 |     /*
 15 |      * Workload file format constants for field indices
 16 |      */
 17 |     static final int INTER_JOB_SLEEP_TIME = 2;
 18 |     static final int INPUT_DATA_SIZE      = 3;
 19 |     static final int SHUFFLE_DATA_SIZE    = 4;
 20 |     static final int OUTPUT_DATA_SIZE     = 5;
 21 | 
 22 |     /*
 23 |      *
 24 |      * Parses a tab separated file into an ArrayList<ArrayList<String>>
 25 |      *
 26 |      */
 27 |     public static long parseFileArrayList(String path, 
 28 | 					  ArrayList<ArrayList<String>> data 
 29 | 					  ) throws Exception {
 30 | 	
 31 | 	long maxInput = 0;
 32 | 
 33 | 	BufferedReader input = new BufferedReader(new FileReader(path));
 34 | 	String s;
 35 | 	String[] array;
 36 | 	int rowIndex = 0;
 37 | 	int columnIndex = 0;
 38 | 	while (true) {
 39 | 	    if (!input.ready()) break;
 40 | 	    s = input.readLine();
 41 | 	    array = s.split("\t");
 42 | 	    try {
 43 | 		columnIndex = 0;
 44 | 		while (columnIndex < array.length) {
 45 | 		    if (columnIndex == 0) {
 46 | 			data.add(rowIndex,new ArrayList<String>());
 47 | 		    }
 48 | 		    String value = array[columnIndex];
 49 | 		    data.get(rowIndex).add(value);
 50 | 
 51 | 		    if (Long.parseLong(array[INPUT_DATA_SIZE]) > maxInput) {
 52 | 			maxInput = Long.parseLong(array[INPUT_DATA_SIZE]);
 53 | 		    }
 54 | 
 55 | 		    columnIndex++;
 56 | 		}
 57 | 		rowIndex++;
 58 | 	    } catch (Exception e) {
 59 | 		
 60 | 	    }
 61 | 	}
 62 | 
 63 | 	return maxInput;
 64 | 	
 65 |     }
 66 | 
 67 |     /*
 68 |      *
 69 |      * Prints the necessary shell scripts
 70 |      *
 71 |      */
 72 |     public static void printOutput(ArrayList<ArrayList<String>> workloadData,
 73 | 				   int clusterSizeRaw,
 74 | 				   int clusterSizeWorkload,
 75 | 				   int inputPartitionSize,
 76 | 				   int inputPartitionCount, 
 77 | 				   String scriptDirPath,
 78 | 				   String hdfsInputDir,
 79 | 				   String hdfsOutputPrefix,
 80 | 				   long totalDataPerReduce, 
 81 | 				   String workloadOutputDir,
 82 | 				   String hadoopCommand,
 83 | 				   String pathToWorkGenJar,
 84 | 				   String pathToWorkGenConf) throws Exception {
 85 | 	
 86 | 
 87 | 	if (workloadData.size() > 0) {
 88 | 
 89 | 	    long maxInput = 0;
 90 | 	    String toWrite = "";
 91 | 
 92 | 	    FileWriter runAllJobs = new FileWriter(scriptDirPath + "/run-jobs-all.sh");
 93 | 
 94 | 	    toWrite = "#!/bin/bash\n";
 95 | 	    runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
 96 | 	    toWrite = "rm -r " + workloadOutputDir + "\n"; 
 97 | 	    runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
 98 | 	    toWrite = "mkdir " + workloadOutputDir + "\n";
 99 |             runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
100 | 
101 | 	    System.out.println();
102 | 	    System.out.println(workloadData.size() + " jobs in the workload.");
103 | 	    System.out.println("Generating scripts ... please wait ... ");
104 | 	    System.out.println();
105 | 
106 | 	    int written = 0;
107 | 	    
108 | 	    for (int i=0; i<workloadData.size(); i++) {
109 | 
110 | 		long sleep   = Long.parseLong(workloadData.get(i).get(INTER_JOB_SLEEP_TIME));
111 | 		long input   = Long.parseLong(workloadData.get(i).get(INPUT_DATA_SIZE));
112 | 		long shuffle = Long.parseLong(workloadData.get(i).get(SHUFFLE_DATA_SIZE));
113 |                 long output  = Long.parseLong(workloadData.get(i).get(OUTPUT_DATA_SIZE));
114 | 
115 | 		// Logic to scale sleep time such that smaller cluster = fewer jobs
116 | 		// Currently not done 
117 | 		//
118 | 		// sleep   = sleep   * clusterSizeRaw / clusterSizeWorkload; 
119 | 		
120 | 		input   = input   * clusterSizeWorkload / clusterSizeRaw;
121 | 		shuffle = shuffle * clusterSizeWorkload / clusterSizeRaw;
122 | 		output  = output  * clusterSizeWorkload / clusterSizeRaw; 
123 | 
124 | 		if (input > maxInput) maxInput = input;
125 | 		if (input < maxSeqFile(67108864)) input = maxSeqFile(67108864); // 64 MB minimum size
126 | 
127 | 		if (shuffle < 1024    ) shuffle = 1024    ;
128 | 		if (output  < 1024    ) output  = 1024    ;
129 | 
130 | 		ArrayList<Integer> inputPartitionSamples = new ArrayList<Integer>();
131 | 		long inputCopy = input; 
132 | 		java.util.Random rng = new java.util.Random();
133 | 		int tryPartitionSample = rng.nextInt(inputPartitionCount);
134 | 		while (inputCopy > 0) {
135 | 		    boolean alreadySampled = true;
136 | 		    while (alreadySampled) {
137 | 			if (inputPartitionSamples.size()>=inputPartitionCount) {
138 | 			    System.err.println();
139 | 			    System.err.println("ERROR!");
140 |                             System.err.println("Not enough partitions for input size of " + input + " bytes.");
141 | 			    System.err.println("Happened on job number " + i + ".");
142 |                             System.err.println("Input partition size is " + inputPartitionSize + " bytes.");
143 |                             System.err.println("Number of partitions is " + inputPartitionCount + ".");
144 | 			    System.err.println("Total data size is " + (((long) inputPartitionSize) * ((long) inputPartitionCount)) + " bytes < " + input + " bytes.");
145 | 			    System.err.println("Need to generate a larger input data set.");
146 | 			    System.err.println();
147 |                             throw new Exception("Input data set not large enough. Need to generate a larger data set."); 
148 | 			    // if exception thrown here, input set not large enough - generate bigger input set
149 |                         }
150 | 			alreadySampled = false;
151 | 		    }
152 | 		    inputPartitionSamples.add(new Integer(tryPartitionSample));
153 | 		    tryPartitionSample = (tryPartitionSample + 1) % inputPartitionCount;
154 | 		    inputCopy -= inputPartitionSize;
155 | 		}
156 | 
157 | 		FileWriter inputPathFile = new FileWriter(scriptDirPath + "/inputPath-job-" + i + ".txt");
158 | 		String inputPath = "";
159 | 		for (int j=0; j<inputPartitionSamples.size(); j++) {
160 | 		    inputPath = (hdfsInputDir + "/part-" + String.format("%05d", inputPartitionSamples.get(j)));
161 | 		    if (j != (inputPartitionSamples.size()-1)) inputPath += ",";
162 | 		    inputPathFile.write(inputPath.toCharArray(), 0, inputPath.length());
163 | 		}
164 | 		inputPathFile.close();
165 | 
166 | 
167 | 		// write inputPath to separate file to get around ARG_MAX limit for large clusters
168 | 
169 | 		inputPath = "inputPath-job-" + i + ".txt";
170 | 
171 | 		String outputPath = hdfsOutputPrefix + "-" + i;
172 | 
173 | 		float SIRatio = ((float) shuffle) / ((float) input  );
174 |                 float OSRatio = ((float) output ) / ((float) shuffle);
175 | 
176 | 		long numReduces = -1;
177 | 
178 | 		if (totalDataPerReduce > 0) {
179 | 		    numReduces = Math.round((shuffle + output) / ((double) totalDataPerReduce));
180 | 		    if (numReduces < 1) numReduces = 1;
181 | 		    if (numReduces > clusterSizeWorkload) numReduces = clusterSizeWorkload / 5;
182 | 		    toWrite =
183 |                         "" + hadoopCommand + " jar " + pathToWorkGenJar + " org.apache.hadoop.examples.WorkGen -conf " + pathToWorkGenConf + " " +
184 |                         "-r " + numReduces + " " + inputPath + " " + outputPath + " " + SIRatio + " " + OSRatio +
185 | 			" >> " + workloadOutputDir + "/job-" + i + ".txt 2>> " + workloadOutputDir + "/job-" + i + ".txt \n";
186 | 		} else {
187 | 		    toWrite = 
188 | 			"" + hadoopCommand + " jar " + pathToWorkGenJar + " org.apache.hadoop.examples.WorkGen -conf " + pathToWorkGenConf + " " +
189 | 			inputPath + " " + outputPath + " " + SIRatio + " " + OSRatio +
190 | 			" >> " + workloadOutputDir + "/job-" + i + ".txt 2>> " + workloadOutputDir + "/job-" + i + ".txt \n";
191 | 		}
192 | 
193 |                 FileWriter runFile = new FileWriter(scriptDirPath + "/run-job-" + i + ".sh");
194 |                 runFile.write(toWrite.toCharArray(), 0, toWrite.length());
195 |                 toWrite = "" + hadoopCommand + " dfs -rmr " + outputPath + "\n";
196 |                 runFile.write(toWrite.toCharArray(), 0, toWrite.length());
197 |                 toWrite = "# inputSize " + input + "\n";
198 |                 runFile.write(toWrite.toCharArray(), 0, toWrite.length());
199 | 
200 | 		runFile.close();
201 | 
202 | 		// works for linux type systems only
203 | 		Runtime.getRuntime().exec("chmod +x " + scriptDirPath + "/run-job-" + i + ".sh");
204 | 		
205 | 		toWrite = "./run-job-" + i + ".sh &\n";
206 | 		runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
207 | 		
208 | 
209 | 		toWrite = "sleep " + sleep + "\n";
210 | 		runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
211 | 		written++;
212 | 
213 | 
214 | 	    }
215 | 
216 | 	    System.out.println(written + " jobs written ... done.");
217 | 	    System.out.println();
218 | 
219 | 	    toWrite = "# max input " + maxInput + "\n";
220 | 	    runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
221 | 	    toWrite = "# inputPartitionSize " + inputPartitionSize + "\n";
222 | 	    runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
223 | 	    toWrite = "# inputPartitionCount " + inputPartitionCount + "\n";
224 |             runAllJobs.write(toWrite.toCharArray(), 0, toWrite.length());
225 | 	    
226 | 	    runAllJobs.close();
227 | 
228 | 	    // works for linux type systems only
229 | 	    Runtime.getRuntime().exec("chmod +x " + scriptDirPath + "/run-jobs-all.sh");
230 | 
231 | 	}
232 | 
233 |     }
234 | 
235 | 	/*
236 | 	 *
237 | 	 * Computes the size of a SequenceFile with the given number
238 | 	 * of records. We assume the following 96 byte header:
239 | 	 *
240 | 			4 bytes (magic header prefix)
241 | 			... key class name: 35 bytes for "org.apache.hadoop.io.BytesWritable" (34 characters + one-byte length)
242 | 			... value class name: 35 bytes for "org.apache.hadoop.io.BytesWritable"
243 | 			1 byte boolean (is each record value compressed?)
244 | 			1 byte boolean (is the file block compressed?)
245 | 			bytes for metadata:   in our case, there is no metadata, and we get 4 bytes of zeros
246 | 			16 bytes of sync
247 | 	 *
248 | 	 * The SequenceFile writer places a periodic marker after writing a
249 | 	 * minimum of 2000 bytes; the marker also falls at a record boundary.
250 | 	 * Therefore, unless the serialized record size is a factor of 2000, more
251 | 	 * than 2000 bytes will be written between markers. In the code below, we
252 | 	 * refer to this distance as the "markerSpacing".
253 | 	 *
254 | 	 * The SequenceFile writer can be found in:
255 | 	 * hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java
256 | 	 *
257 | 	 * There are informative constants at the top of the SequenceFile class,
258 | 	 * and the heart of the writer is the append() method of the Writer class.
259 | 	 *
260 | 	 */
261 | 
262 | 	static final int SeqFileHeaderSize = 96;
263 | 	static final int SeqFileRecordSizeUsable = 100; // max_key + max_value
264 | 	static final int SeqFileRecordSizeSerialized = 116; // usable + 4 ints
265 | 	static final int SeqFileMarkerSize = 20;
266 | 	static final double SeqFileMarkerMinSpacing = 2000.0;
267 | 
268 | 	private static int seqFileSize(int numRecords) {
269 | 		int totalSize = SeqFileHeaderSize;
270 | 
271 | 		int recordTotal = numRecords * SeqFileRecordSizeSerialized;
272 | 		totalSize += recordTotal;
273 | 
274 | 		int numRecordsBetweenMarkers = (int) Math.ceil(SeqFileMarkerMinSpacing / (SeqFileRecordSizeSerialized * 1.0));
275 | 		int markerSpacing =  numRecordsBetweenMarkers * SeqFileRecordSizeSerialized;
276 | 		int numMarkers = (int) Math.floor((totalSize * 1.0) / (markerSpacing * 1.0));
277 | 
278 | 		totalSize += numMarkers * SeqFileMarkerSize;
279 | 
280 | 		return totalSize;
281 | 	}
282 | 
283 | 	/*
284 | 	 *
285 | 	 * Computes the amount of data a SequenceFile would hold in
286 | 	 * an HDFS block of the given size. First, we estimate the number
287 | 	 * of records which will fit by inverting seqFileSize(), then we
288 | 	 * decrease until we fit within the block.
289 | 	 *
290 | 	 * To compute the inverse, we start with a simplified form of the equation
291 |      * computed by seqFileSize(), using X for the number of records:
292 | 	 *
293 | 	 * totalSize =
294 | 	 *   header + X * serialized
295 | 	 *          + markerSize * (header + X * serialized) / markerSpacing
296 | 	 * 
297 | 	 * using some algebra:
298 | 	 *
299 | 	 * (totalSize - header) * markerSpacing
300 | 	 *
301 | 	 *      = X * serialized * markerSpacing + markerSize * (header + X * serialized)
302 | 	 *
303 | 	 *
304 | 	 * (totalSize - header) * markerSpacing - markerSize * header
305 | 	 *
306 | 	 *      = X * serialized * markerSpacing + markerSize * X * serialized
307 | 	 *
308 | 	 *      = (markerSpacing + markerSize) * X * serialized
309 | 	 *
310 | 	 * We now have a Right-Hand Side which looks easy to deal with!
311 | 	 *
312 | 	 * Focusing on the Left-Hand Side, we'd like to avoid multiplying
313 | 	 * (totalSize - header) * markerSpacing as it may be a very large number.
314 | 	 * We re-write as follows:
315 | 	 *
316 | 	 * (totalSize - header) * markerSpacing - markerSize * header =
317 | 	 *      (totalSize - header - markerSize * header / markerSpacing) * markerSpacing
318 | 	 *
319 | 	 */
320 | 
321 | 	public static int maxSeqFile(int blockSize) {
322 | 
323 | 		// First, compute some values we will need. Same as in seqFileSize()
324 | 		int numRecordsBetweenMarkers = (int) Math.ceil(SeqFileMarkerMinSpacing / (SeqFileRecordSizeSerialized * 1.0));
325 | 		double markerSpacing = numRecordsBetweenMarkers * SeqFileRecordSizeSerialized * 1.0;
326 | 
327 | 		// Calculate the Left-Hand Side we wrote in the comment above
328 | 		double est = blockSize - SeqFileHeaderSize - (SeqFileMarkerSize * SeqFileHeaderSize * 1.0) / markerSpacing;
329 | 		est *= markerSpacing;
330 | 
331 | 		// Now, divide the constants from the Right-Hand Side we found above
332 | 		est /= (markerSpacing + SeqFileMarkerSize * 1.0);
333 | 		est /= (SeqFileRecordSizeSerialized * 1.0);
334 | 
335 | 		// Can't have a fractional number of records!
336 | 		int numRecords = (int) Math.ceil(est);
337 | 
338 | 		// Check if we over-estimated
339 | 		while (seqFileSize(numRecords) > blockSize) {
340 | 			numRecords--;
341 | 		}
342 | 
343 | 		return (numRecords * SeqFileRecordSizeUsable);
344 | 	}
345 | 
346 |     /*
347 |      *
348 |      * Read in command line arguments etc.
349 |      *
350 |      */
351 |     public static void main(String args[]) throws Exception {
352 | 	
353 | 	if (args.length < 10) {
354 | 
355 | 	    System.out.println();
356 | 	    System.out.println("Insufficient arguments.");
357 | 	    System.out.println();
358 | 	    System.out.println("Usage: ");
359 | 	    System.out.println();
360 | 	    System.out.println("java GenerateReplayScript");
361 | 	    System.out.println("  [path to file with workload info]");
362 | 	    System.out.println("  [number of machines in the original production cluster]");
363 | 	    System.out.println("  [number of machines in the cluster on which the workload will be run]");
364 | 	    System.out.println("  [HDFS block size]");
365 | 	    System.out.println("  [number of input partitions]");
366 | 	    System.out.println("  [output directory for the scripts]");
367 | 	    System.out.println("  [HDFS directory for the input data]");
368 | 	    System.out.println("  [amount of data per reduce task in byptes]");
369 | 	    System.out.println("  [directory for the workload output files]");
370 | 	    System.out.println("  [hadoop command on your system]");
371 | 	    System.out.println("  [path to WorkGen.jar]");
372 | 	    System.out.println("  [path to workGenKeyValue_conf.xsl]");
373 | 	    System.out.println();
374 | 
375 | 	} else {
376 | 
377 | 	    // variables
378 | 	    
379 | 	    ArrayList<ArrayList<String>> workloadData = new ArrayList<ArrayList<String>>();
380 | 
381 | 	    // read command line arguments
382 | 
383 | 	    String fileWorkloadPath = args[0];
384 | 
385 | 	    int clusterSizeRaw      = Integer.parseInt(args[1]); 
386 | 	    int clusterSizeWorkload = Integer.parseInt(args[2]); 
387 | 	    int hdfsBlockSize       = Integer.parseInt(args[3]);
388 | 	    int inputPartitionCount = Integer.parseInt(args[4]);
389 | 	    String scriptDirPath    = args[5];
390 | 	    String hdfsInputDir     = args[6];
391 | 	    String hdfsOutputPrefix = args[7];
392 | 	    long totalDataPerReduce = Long.parseLong(args[8]);
393 | 	    String workloadOutputDir = args[9];
394 | 	    String hadoopCommand = args[10];
395 | 	    String pathToWorkGenJar = args[11];
396 | 	    String pathToWorkGenConf = args[12];
397 | 
398 | 	    // parse data
399 | 
400 | 	    long maxInput = parseFileArrayList(fileWorkloadPath, workloadData);
401 | 
402 | 	    // check if maxInput fits within input data size to be generated
403 | 
404 | 	    long maxInputNeeded = maxInput * clusterSizeWorkload / clusterSizeRaw;
405 | 
406 | 	    int inputPartitionSize = maxSeqFile(hdfsBlockSize);
407 | 	    long totalInput = ((long) inputPartitionSize) * ((long) inputPartitionCount);
408 | 
409 | 	    if (maxInputNeeded > totalInput) {
410 | 
411 | 		System.err.println();
412 | 		System.err.println("ERROR!");
413 | 		System.err.println("Not enough partitions for max needed input size of " + maxInputNeeded + " bytes.");
414 | 		System.err.println("HDFS block size is " + hdfsBlockSize + " bytes.");
415 | 		System.err.println("Input partition size is " + inputPartitionSize + " bytes.");
416 | 		System.err.println("Number of partitions is " + inputPartitionCount + ".");
417 | 		System.err.println("Total actual input data size is " + totalInput + " bytes < " + maxInputNeeded + " bytes.");
418 | 		System.err.println("Need to generate a larger input data set.");
419 | 		System.err.println();
420 | 
421 | 		throw new Exception("Input data set not large enough. Need to generate a larger data set.");
422 | 	    } else {
423 | 
424 | 		System.err.println();
425 | 		System.err.println("Max needed input size " + maxInputNeeded + " bytes.");
426 | 		System.err.println("Actual input size is " + totalInput + " bytes >= " + maxInputNeeded + " bytes.");
427 |                 System.err.println("All is good.");
428 | 		System.err.println();
429 | 	    }
430 | 
431 | 		// make scriptDirPath directory if it doesn't exist
432 | 		
433 | 		File d = new File(scriptDirPath);
434 | 		if (d.exists()) {
435 | 			if (d.isDirectory()) {
436 | 				System.err.println("Warning! About to overwrite existing scripts in: " + scriptDirPath);
437 | 				System.err.print("Ok to continue? [y/n] ");
438 | 				BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
439 | 				String s = in.readLine();
440 | 				if (s == null || s.length() < 1 || s.toLowerCase().charAt(0) != 'y') {
441 | 					throw new Exception("Declined overwrite of existing directory");
442 | 				}
443 | 			} else {
444 | 				throw new Exception(scriptDirPath + " is a file.");
445 | 			}
446 | 		} else {
447 | 			d.mkdirs();
448 | 		}
449 | 
450 | 	    // print shell scripts
451 | 
452 | 	    printOutput(workloadData, clusterSizeRaw, clusterSizeWorkload, 
453 | 			inputPartitionSize, inputPartitionCount, scriptDirPath, hdfsInputDir, hdfsOutputPrefix,
454 | 			totalDataPerReduce, workloadOutputDir, hadoopCommand, pathToWorkGenJar, pathToWorkGenConf);
455 | 
456 | 
457 | 		System.out.println("Parameter values for randomwriter_conf.xsl:");
458 | 		System.out.println("test.randomwrite.total_bytes: " + totalInput);
459 | 		System.out.println("test.randomwrite.bytes_per_map: " + inputPartitionSize);
460 | 	}
461 | 
462 | 	
463 |     }
464 | }
465 | 
466 | 


--------------------------------------------------------------------------------
/workloadSuite/HDFSWrite.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package org.apache.hadoop.examples;
 20 | 
 21 | import java.io.IOException;
 22 | import java.util.Date;
 23 | import java.util.Random;
 24 | 
 25 | import org.apache.hadoop.conf.Configuration;
 26 | import org.apache.hadoop.conf.Configured;
 27 | import org.apache.hadoop.fs.Path;
 28 | import org.apache.hadoop.io.BytesWritable;
 29 | import org.apache.hadoop.io.Text;
 30 | import org.apache.hadoop.io.Writable;
 31 | import org.apache.hadoop.io.WritableComparable;
 32 | import org.apache.hadoop.mapred.ClusterStatus;
 33 | import org.apache.hadoop.mapred.FileOutputFormat;
 34 | import org.apache.hadoop.mapred.FileSplit;
 35 | import org.apache.hadoop.mapred.InputFormat;
 36 | import org.apache.hadoop.mapred.InputSplit;
 37 | import org.apache.hadoop.mapred.JobClient;
 38 | import org.apache.hadoop.mapred.JobConf;
 39 | import org.apache.hadoop.mapred.MapReduceBase;
 40 | import org.apache.hadoop.mapred.Mapper;
 41 | import org.apache.hadoop.mapred.OutputCollector;
 42 | import org.apache.hadoop.mapred.RecordReader;
 43 | import org.apache.hadoop.mapred.Reporter;
 44 | import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 45 | import org.apache.hadoop.mapred.lib.IdentityReducer;
 46 | import org.apache.hadoop.util.GenericOptionsParser;
 47 | import org.apache.hadoop.util.Tool;
 48 | import org.apache.hadoop.util.ToolRunner;
 49 | 
 50 | /**
 51 |  * This program uses map/reduce to just run a distributed job where there is
 52 |  * no interaction between the tasks and each task write a large unsorted
 53 |  * random binary sequence file of BytesWritable.
 54 |  * In order for this program to generate data for terasort with 10-byte keys
 55 |  * and 90-byte values, have the following config:
 56 |  * <xmp>
 57 |  * <?xml version="1.0"?>
 58 |  * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 59 |  * <configuration>
 60 |  *   <property>
 61 |  *     <name>test.randomwrite.min_key</name>
 62 |  *     <value>10</value>
 63 |  *   </property>
 64 |  *   <property>
 65 |  *     <name>test.randomwrite.max_key</name>
 66 |  *     <value>10</value>
 67 |  *   </property>
 68 |  *   <property>
 69 |  *     <name>test.randomwrite.min_value</name>
 70 |  *     <value>90</value>
 71 |  *   </property>
 72 |  *   <property>
 73 |  *     <name>test.randomwrite.max_value</name>
 74 |  *     <value>90</value>
 75 |  *   </property>
 76 |  *   <property>
 77 |  *     <name>test.randomwrite.total_bytes</name>
 78 |  *     <value>1099511627776</value>
 79 |  *   </property>
 80 |  * </configuration></xmp>
 81 |  * 
 82 |  * Equivalently, {@link RandomWriter} also supports all the above options
 83 |  * and ones supported by {@link GenericOptionsParser} via the command-line.
 84 |  */
 85 | public class HDFSWrite extends Configured implements Tool {
 86 |   
 87 |   /**
 88 |    * User counters
 89 |    */
 90 |   static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN }
 91 |   
 92 |   /**
 93 |    * A custom input format that creates virtual inputs of a single string
 94 |    * for each map.
 95 |    */
 96 |   static class RandomInputFormat implements InputFormat<Text, Text> {
 97 |     
 98 |     /** Accept all job confs */
 99 |     public void validateInput(JobConf job) throws IOException {
100 |     }
101 | 
102 |     /** 
103 |      * Generate the requested number of file splits, with the filename
104 |      * set to the filename of the output file.
105 |      */
106 |     public InputSplit[] getSplits(JobConf job, 
107 |                                   int numSplits) throws IOException {
108 |       InputSplit[] result = new InputSplit[numSplits];
109 |       Path outDir = FileOutputFormat.getOutputPath(job);
110 |       for(int i=0; i < result.length; ++i) {
111 |         result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
112 |                                   (String[])null);
113 |       }
114 |       return result;
115 |     }
116 | 
117 |     /**
118 |      * Return a single record (filename, "") where the filename is taken from
119 |      * the file split.
120 |      */
121 |     static class RandomRecordReader implements RecordReader<Text, Text> {
122 |       Path name;
123 |       public RandomRecordReader(Path p) {
124 |         name = p;
125 |       }
126 |       public boolean next(Text key, Text value) {
127 |         if (name != null) {
128 |           key.set(name.getName());
129 |           name = null;
130 |           return true;
131 |         }
132 |         return false;
133 |       }
134 |       public Text createKey() {
135 |         return new Text();
136 |       }
137 |       public Text createValue() {
138 |         return new Text();
139 |       }
140 |       public long getPos() {
141 |         return 0;
142 |       }
143 |       public void close() {}
144 |       public float getProgress() {
145 |         return 0.0f;
146 |       }
147 |     }
148 | 
149 |     public RecordReader<Text, Text> getRecordReader(InputSplit split,
150 |                                         JobConf job, 
151 |                                         Reporter reporter) throws IOException {
152 |       return new RandomRecordReader(((FileSplit) split).getPath());
153 |     }
154 |   }
155 | 
156 |   static class Map extends MapReduceBase
157 |     implements Mapper<WritableComparable, Writable,
158 |                       BytesWritable, BytesWritable> {
159 |     
160 |     private long numBytesToWrite;
161 |     private int minKeySize;
162 |     private int keySizeRange;
163 |     private int minValueSize;
164 |     private int valueSizeRange;
165 |     private Random random = new Random();
166 |     private BytesWritable randomKey = new BytesWritable();
167 |     private BytesWritable randomValue = new BytesWritable();
168 |     
169 |     private void randomizeBytes(byte[] data, int offset, int length) {
170 |       for(int i=offset + length - 1; i >= offset; --i) {
171 |         data[i] = (byte) random.nextInt(256);
172 |       }
173 |     }
174 |     
175 |     /**
176 |      * Given an output filename, write a bunch of random records to it.
177 |      */
178 |     public void map(WritableComparable key, 
179 |                     Writable value,
180 |                     OutputCollector<BytesWritable, BytesWritable> output, 
181 |                     Reporter reporter) throws IOException {
182 |       int itemCount = 0;
183 |       while (numBytesToWrite > 0) {
184 |         int keyLength = minKeySize + 
185 |           (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
186 |         randomKey.setSize(keyLength);
187 |         randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
188 |         int valueLength = minValueSize +
189 |           (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
190 |         randomValue.setSize(valueLength);
191 |         randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
192 |         output.collect(randomKey, randomValue);
193 |         numBytesToWrite -= keyLength + valueLength;
194 |         reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength);
195 |         reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
196 |         if (++itemCount % 200 == 0) {
197 |           reporter.setStatus("wrote record " + itemCount + ". " + 
198 |                              numBytesToWrite + " bytes left.");
199 |         }
200 |       }
201 |       reporter.setStatus("done with " + itemCount + " records.");
202 |     }
203 |     
204 |     /**
205 |      * Save the values out of the configuaration that we need to write
206 |      * the data.
207 |      */
208 |     @Override
209 |     public void configure(JobConf job) {
210 |       numBytesToWrite = job.getLong("test.randomwrite.bytes_per_map",
211 |                                     1*1024*1024*1024);
212 |       minKeySize = job.getInt("test.randomwrite.min_key", 10);
213 |       keySizeRange = 
214 |         job.getInt("test.randomwrite.max_key", 1000) - minKeySize;
215 |       minValueSize = job.getInt("test.randomwrite.min_value", 0);
216 |       valueSizeRange = 
217 |         job.getInt("test.randomwrite.max_value", 20000) - minValueSize;
218 |     }
219 |     
220 |   }
221 |   
222 |   /**
223 |    * This is the main routine for launching a distributed random write job.
224 |    * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
225 |    * The reduce doesn't do anything.
226 |    * 
227 |    * @throws IOException 
228 |    */
229 |   public int run(String[] args) throws Exception {    
230 |     if (args.length == 0) {
231 |       System.out.println("Usage: writer <out-dir>");
232 |       ToolRunner.printGenericCommandUsage(System.out);
233 |       return -1;
234 |     }
235 |     
236 |     Path outDir = new Path(args[0]);
237 |     JobConf job = new JobConf(getConf());
238 |     
239 |     job.setJarByClass(HDFSWrite.class);
240 |     job.setJobName("hdfsWrite");
241 |     FileOutputFormat.setOutputPath(job, outDir);
242 |     
243 |     job.setOutputKeyClass(BytesWritable.class);
244 |     job.setOutputValueClass(BytesWritable.class);
245 |     
246 |     job.setInputFormat(RandomInputFormat.class);
247 |     job.setMapperClass(Map.class);        
248 |     job.setReducerClass(IdentityReducer.class);
249 |     job.setOutputFormat(SequenceFileOutputFormat.class);
250 |     
251 |     JobClient client = new JobClient(job);
252 |     ClusterStatus cluster = client.getClusterStatus();
253 |     int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
254 |     long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
255 |                                              1*1024*1024*1024);
256 |     if (numBytesToWritePerMap == 0) {
257 |       System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
258 |       return -2;
259 |     }
260 |     long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
261 |          numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
262 |     /*
263 |     int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
264 |     if (numMaps == 0 && totalBytesToWrite > 0) {
265 |       numMaps = 1;
266 |       job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
267 |     }
268 |     */
269 | 
270 |     int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
271 |     job.setNumMapTasks(numMaps);
272 | 
273 |     System.out.println("client.getClusterStatus().getMaxMapTasks() gives " + cluster.getMaxMapTasks());
274 |     System.out.println("client.getClusterStatus().getMaxReduceTasks() gives " + cluster.getMaxReduceTasks());
275 | 
276 |     System.out.println("Running on " +
277 | 		       cluster.getTaskTrackers() + " nodes with " +
278 | 		       numMaps + " maps, \n" +
279 | 		       "writing " + totalBytesToWrite + " bytes with " +
280 | 		       numBytesToWritePerMap + " bytes per map."); 
281 | 
282 |     // reducer NONE
283 |     job.setNumReduceTasks(0);
284 |     
285 |     Date startTime = new Date();
286 |     System.out.println("Job started: " + startTime);
287 |     JobClient.runJob(job);
288 |     Date endTime = new Date();
289 |     System.out.println("Job ended: " + endTime);
290 |     System.out.println("The job took " + 
291 |                        (endTime.getTime() - startTime.getTime()) /1000 + 
292 |                        " seconds.");
293 |     
294 |     return 0;
295 |   }
296 |   
297 |   public static void main(String[] args) throws Exception {
298 |     int res = ToolRunner.run(new Configuration(), new HDFSWrite(), args);
299 |     System.exit(res);
300 |   }
301 | 
302 | }
303 | 


--------------------------------------------------------------------------------
/workloadSuite/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2011, Regents of the University of California.
 3 | All rights reserved.
 4 | 
 5 | HDFSWrite.java and WorkGen.java are "Derivative Works" of Apache
 6 | Hadoop version 0.20.2, and are governed by the Apache License 2.0,
 7 | found at http://www.apache.org/licenses/LICENSE-2.0.
 8 | 
 9 | The remainder of this project is governed by the "New BSD License"
10 | below.
11 | 
12 | Redistribution and use in source and binary forms, with or without
13 | modification, are permitted provided that the following conditions 
14 | are met:
15 | 
16 |  * Redistributions of source code must retain the above copyright notice,
17 | this list of conditions and the following disclaimer.
18 |  * Redistributions in binary form must reproduce the above copyright
19 | notice, this list of conditions and the following disclaimer in the
20 | documentation and/or other materials provided with the distribution.
21 |  * Neither the name of the University of California, Berkeley
22 | nor the names of its contributors may be used to endorse or promote
23 | products derived from this software without specific prior written
24 | permission.
25 | 
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
30 | COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
33 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
34 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37 | OF THE POSSIBILITY OF SUCH DAMAGE.
38 | 
39 | 


--------------------------------------------------------------------------------
/workloadSuite/README:
--------------------------------------------------------------------------------
1 | 
2 | To reduce confusion, we have consolidated the SWIM git hub
3 | wiki as the sole documentation source. 
4 | 
5 | Full documentation see https://github.com/SWIMProjectUCB/SWIM/wiki.
6 | 
7 | 


--------------------------------------------------------------------------------
/workloadSuite/WorkGen.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package org.apache.hadoop.examples;
 20 | 
 21 | import java.io.IOException;
 22 | import java.util.*;
 23 | 
 24 | import org.apache.hadoop.conf.Configuration;
 25 | import org.apache.hadoop.conf.Configured;
 26 | import org.apache.hadoop.fs.Path;
 27 | import org.apache.hadoop.io.BytesWritable;
 28 | import org.apache.hadoop.io.Writable;
 29 | import org.apache.hadoop.io.WritableComparable;
 30 | import org.apache.hadoop.mapred.*;
 31 | import org.apache.hadoop.mapred.lib.IdentityMapper;
 32 | import org.apache.hadoop.mapred.lib.IdentityReducer;
 33 | import org.apache.hadoop.util.Tool;
 34 | import org.apache.hadoop.util.ToolRunner;
 35 | import java.io.BufferedReader;
 36 | import java.io.FileReader;
 37 | 
 38 | 
 39 | /**
 40 |  * Comments here
 41 |  */
 42 | public class WorkGen extends Configured implements Tool {
 43 | 
 44 |     static int printUsage() {
 45 | 	System.out.println("sort [-m <maps>] [-r <reduces> has no effect] " +
 46 | 			   "[-inFormat <input format class>] " +
 47 | 			   "[-outFormat <output format class>] " + 
 48 | 			   "[-outKey <output key class>] " +
 49 | 			   "[-outValue <output value class>] " +
 50 | 			   "<input> <output> <shuffleInputRatio> <outputShuffleRatio>");
 51 | 	ToolRunner.printGenericCommandUsage(System.out);
 52 | 	return -1;
 53 |     }
 54 |     
 55 |     /**
 56 |      * User counters
 57 |      */
 58 |     static enum Counters { MAP_RECORDS_WRITTEN, MAP_BYTES_WRITTEN, RED_RECORDS_WRITTEN, RED_BYTES_WRITTEN };
 59 | 
 60 | 
 61 | 
 62 |     /** 
 63 |      *  Comments
 64 |      */
 65 |     static class RatioMapper extends MapReduceBase implements Mapper<WritableComparable, Writable, BytesWritable, BytesWritable> {
 66 | 	
 67 | 	private double shuffleInputRatio = 1.0d;
 68 | 
 69 | 	private int minKeySize;
 70 | 	private int keySizeRange;
 71 | 	private int minValueSize;
 72 | 	private int valueSizeRange;
 73 | 	private Random random = new Random();
 74 | 	private BytesWritable randomKey; 
 75 | 	private BytesWritable randomValue;
 76 | 
 77 | 	private void randomizeBytes(byte[] data, int offset, int length) {
 78 | 	    for(int i=offset + length - 1; i >= offset; --i) {
 79 | 		data[i] = (byte) random.nextInt(256);
 80 | 	    }
 81 | 	}
 82 | 
 83 | 	/** Input key/val pair is swallowed up, no action is taken */
 84 | 	public void map(WritableComparable key, Writable val, OutputCollector<BytesWritable, BytesWritable> output, Reporter reporter) throws IOException {
 85 | 
 86 | 	    double shuffleInputRatioTemp = shuffleInputRatio;
 87 | 	    
 88 | 	    // output floor(shuffleInputRatio) number of intermediate pairs
 89 | 	    while (shuffleInputRatioTemp >= 0.0d) {
 90 | 		int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
 91 | 		randomKey = new BytesWritable();
 92 | 		randomKey.setSize(keyLength);
 93 | 		randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
 94 | 		int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
 95 | 		randomValue = new BytesWritable();
 96 | 		randomValue.setSize(valueLength);
 97 | 		randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
 98 | 		if (shuffleInputRatioTemp >= 1.0d || (random.nextDouble() < shuffleInputRatioTemp)) {
 99 | 		    output.collect(randomKey, randomValue);
100 | 		    reporter.incrCounter(Counters.MAP_BYTES_WRITTEN, keyLength + valueLength);
101 | 		    reporter.incrCounter(Counters.MAP_RECORDS_WRITTEN, 1);
102 | 		}
103 | 		shuffleInputRatioTemp -= 1.0d;
104 | 	    } // end while
105 | 
106 | 	} // end map()
107 | 
108 | 	@Override
109 | 	public void configure(JobConf job) {
110 | 	    shuffleInputRatio = Double.parseDouble(job.getRaw("workGen.ratios.shuffleInputRatio"));
111 | 	    minKeySize        = job.getInt("workGen.randomwrite.min_key", 10);
112 | 	    keySizeRange      = job.getInt("workGen.randomwrite.max_key", 1000) - minKeySize;
113 | 	    minValueSize      = job.getInt("workGen.randomwrite.min_value", 0);
114 | 	    valueSizeRange    = job.getInt("workGen.randomwrite.max_value", 20000) - minValueSize;
115 | 	}
116 | 
117 |     } // end static class RatioMapper
118 | 
119 |     /**
120 |      *  Comments
121 |      */
122 |     static class RatioReducer extends MapReduceBase implements Reducer<WritableComparable, Writable, BytesWritable, BytesWritable> {
123 | 
124 |         private double outputShuffleRatio = 1.0d;
125 | 
126 |         private int minKeySize;
127 |         private int keySizeRange;
128 |         private int minValueSize;
129 |         private int valueSizeRange;
130 |         private Random random = new Random();
131 |         private BytesWritable randomKey;
132 |         private BytesWritable randomValue;
133 | 
134 |         private void randomizeBytes(byte[] data, int offset, int length) {
135 |             for(int i=offset + length - 1; i >= offset; --i) {
136 |                 data[i] = (byte) random.nextInt(256);
137 |             }
138 |         }
139 | 
140 | 	public void reduce(WritableComparable key, Iterator<Writable> values,
141 | 			   OutputCollector<BytesWritable, BytesWritable> output, 
142 | 			   Reporter reporter)
143 | 	    throws IOException {
144 | 
145 | 	    while (values.hasNext()) {
146 | 		Writable value = values.next();
147 | 
148 | 		double outputShuffleRatioTemp = outputShuffleRatio;
149 | 
150 | 		// output floor(outputShuffleRatio) number of intermediate pairs
151 | 		while (outputShuffleRatioTemp >= 0.0d) {
152 | 		    int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
153 | 		    randomKey = new BytesWritable();
154 | 		    randomKey.setSize(keyLength);
155 | 		    randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
156 | 		    int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
157 | 		    randomValue = new BytesWritable();
158 | 		    randomValue.setSize(valueLength);
159 | 		    randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
160 | 		    if (outputShuffleRatioTemp >= 1.0d || (random.nextDouble() < outputShuffleRatioTemp)) {
161 | 			output.collect(randomKey, randomValue);
162 | 			reporter.incrCounter(Counters.RED_BYTES_WRITTEN, keyLength + valueLength);
163 | 			reporter.incrCounter(Counters.RED_RECORDS_WRITTEN, 1);
164 | 		    }
165 | 		    outputShuffleRatioTemp -= 1.0d;		    
166 | 		} // end while
167 | 	    }
168 | 	}
169 | 
170 |         @Override
171 | 	    public void configure(JobConf job) {
172 |             outputShuffleRatio = Double.parseDouble(job.getRaw("workGen.ratios.outputShuffleRatio"));
173 |             minKeySize        = job.getInt("workGen.randomwrite.min_key", 10);
174 |             keySizeRange      = job.getInt("workGen.randomwrite.max_key", 10) - minKeySize;
175 |             minValueSize      = job.getInt("workGen.randomwrite.min_value", 90);
176 |             valueSizeRange    = job.getInt("workGen.randomwrite.max_value", 90) - minValueSize;
177 |         }
178 | 
179 |     }
180 | 
181 | 
182 |   /**
183 |    * The main driver for the program.
184 |    * Invoke this method to submit the map/reduce job.
185 |    * @throws IOException When there is communication problems with the 
186 |    *                     job tracker.
187 |    */
188 |   public int run(String[] args) throws Exception {
189 | 
190 |     JobConf jobConf = new JobConf(getConf(), WorkGen.class);
191 |     jobConf.setJobName("workGen");
192 | 
193 |     jobConf.setMapperClass(RatioMapper.class);        
194 |     jobConf.setReducerClass(RatioReducer.class);
195 | 
196 |     JobClient client = new JobClient(jobConf);
197 |     ClusterStatus cluster = client.getClusterStatus();
198 |     int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.45);
199 |     int num_maps = (int) (cluster.getMaxMapTasks() * 0.9);
200 |     String sort_reduces = jobConf.get("workGen.sort.reduces_per_host");
201 |     if (sort_reduces != null) {
202 |        num_reduces = cluster.getTaskTrackers() * 
203 |                        Integer.parseInt(sort_reduces);
204 |     }
205 |     Class<? extends InputFormat> inputFormatClass = 
206 |       SequenceFileInputFormat.class;
207 |     Class<? extends OutputFormat> outputFormatClass = 
208 |       SequenceFileOutputFormat.class;
209 |     Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
210 |     Class<? extends Writable> outputValueClass = BytesWritable.class;
211 |     List<String> otherArgs = new ArrayList<String>();
212 |     for(int i=0; i < args.length; ++i) {
213 |       try {
214 |         if ("-m".equals(args[i])) {
215 | 	    num_maps = Integer.parseInt(args[++i]);
216 |         } else if ("-r".equals(args[i])) {
217 | 	    num_reduces = Integer.parseInt(args[++i]);
218 |         } else if ("-inFormat".equals(args[i])) {
219 |           inputFormatClass = 
220 |             Class.forName(args[++i]).asSubclass(InputFormat.class);
221 |         } else if ("-outFormat".equals(args[i])) {
222 |           outputFormatClass = 
223 |             Class.forName(args[++i]).asSubclass(OutputFormat.class);
224 |         } else if ("-outKey".equals(args[i])) {
225 |           outputKeyClass = 
226 |             Class.forName(args[++i]).asSubclass(WritableComparable.class);
227 |         } else if ("-outValue".equals(args[i])) {
228 |           outputValueClass = 
229 |             Class.forName(args[++i]).asSubclass(Writable.class);
230 |         } else {
231 |           otherArgs.add(args[i]);
232 |         }
233 |       } catch (NumberFormatException except) {
234 |         System.out.println("ERROR: Integer expected instead of " + args[i]);
235 |         return printUsage();
236 |       } catch (ArrayIndexOutOfBoundsException except) {
237 |         System.out.println("ERROR: Required parameter missing from " +
238 |             args[i-1]);
239 |         return printUsage(); // exits
240 |       }
241 |     }
242 | 
243 |     // Set user-supplied (possibly default) job configs
244 |     jobConf.setNumReduceTasks(num_reduces);
245 | 
246 |     jobConf.setInputFormat(inputFormatClass);
247 |     jobConf.setOutputFormat(outputFormatClass);
248 | 
249 |     jobConf.setOutputKeyClass(outputKeyClass);
250 |     jobConf.setOutputValueClass(outputValueClass);
251 | 
252 |     // Make sure there are exactly 4 parameters left.
253 |     if (otherArgs.size() != 4) {
254 |       System.out.println("ERROR: Wrong number of parameters: " +
255 |           otherArgs.size() + " instead of 4.");
256 |       return printUsage();
257 |     }
258 |     BufferedReader input = new BufferedReader(new FileReader(otherArgs.get(0)));
259 |     String inputPaths = input.readLine();
260 |     FileInputFormat.setInputPaths(jobConf, inputPaths);
261 |     FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));
262 |     jobConf.set("workGen.ratios.shuffleInputRatio", otherArgs.get(2));
263 |     jobConf.set("workGen.ratios.outputShuffleRatio", otherArgs.get(3));
264 | 
265 |     System.out.println("Max number of map tasks " + cluster.getMaxMapTasks());
266 |     System.out.println("Max number of red tasks " + cluster.getMaxReduceTasks());
267 |     System.out.println("shuffleInputRatio  = " + Double.parseDouble(jobConf.getRaw("workGen.ratios.shuffleInputRatio")));
268 |     System.out.println("outputShuffleRatio = " + Double.parseDouble(jobConf.getRaw("workGen.ratios.outputShuffleRatio")));
269 | 
270 |     System.out.println("Running on " +
271 |         cluster.getTaskTrackers() + " nodes with " + 
272 |         num_maps + " maps and " +
273 |         num_reduces + " reduces.");
274 |     Date startTime = new Date();
275 |     Random random = new Random();
276 |     System.out.println(random.nextDouble());
277 |     System.out.println(random.nextDouble());
278 |     System.out.println("Job started: " + startTime);
279 |     JobClient.runJob(jobConf);
280 |     Date end_time = new Date();
281 |     System.out.println("Job ended: " + end_time);
282 |     System.out.println("The job took " + 
283 |         (end_time.getTime() - startTime.getTime()) /1000 + " seconds.");
284 |     return 0;
285 |   }
286 | 
287 | 
288 | 
289 |   public static void main(String[] args) throws Exception {
290 |     int res = ToolRunner.run(new Configuration(), new WorkGen(), args);
291 |     System.exit(res);
292 |   }
293 | 
294 | }
295 | 


--------------------------------------------------------------------------------
/workloadSuite/WorkloadSynthesis.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl                                                                                                      
  2 | use strict;
  3 | use warnings;
  4 | use Getopt::Long;
  5 | use Date::Parse;
  6 | use POSIX;
  7 | 
  8 | my $inPath    = undef;
  9 | my $outPrefix = undef;
 10 | my $repeats   = undef;
 11 | my $samples   = undef;
 12 | my $length    = undef;
 13 | 
 14 | my $traceStart = undef;
 15 | my $traceEnd   = undef;
 16 | 
 17 | my $cmdline_result = GetOptions("inPath=s"     => \$inPath, 
 18 | 				"outPrefix=s"  => \$outPrefix, 
 19 | 				"repeats=i"    => \$repeats, 
 20 | 				"samples=i"    => \$samples, 
 21 | 				"length=i"     => \$length, 
 22 | 				"traceStart=i" => \$traceStart,
 23 | 				"traceEnd=i"   => \$traceEnd   );
 24 | 
 25 | my @all_data = ();
 26 | 
 27 | close(INPUT_FILE);
 28 | open(INPUT_FILE, "< $inPath") or die "Cannot open $inPath";
 29 | 
 30 | my $dataSize = 0;
 31 | while (<INPUT_FILE>) {
 32 | 
 33 |     #if (($dataSize % 100000) == 0) { print "read line \#" . $dataSize . "\n"; }
 34 | 
 35 |     chomp;
 36 |     my $line = $_;
 37 |     my @fields = split(/\t/, $line);
 38 | 
 39 |     $all_data[$dataSize][0] = $fields[0];   # unique_job_id
 40 |     $all_data[$dataSize][1] = $fields[1];   # job_name
 41 |     $all_data[$dataSize][2] = $fields[2];   # map_input_bytes
 42 |     $all_data[$dataSize][3] = $fields[3];   # shuffle_bytes
 43 |     $all_data[$dataSize][4] = $fields[4];   # reduce_output_bytes
 44 |     $all_data[$dataSize][5] = $fields[5];   # submit_time_seconds
 45 |     $all_data[$dataSize][6] = $fields[6];   # duration_seconds
 46 |     $all_data[$dataSize][7] = $fields[7];   # map_time_task_seconds
 47 |     $all_data[$dataSize][8] = $fields[8];   # red_time_task_seconds
 48 |     $all_data[$dataSize][9] = $fields[9];   # total_time_task_seconds
 49 |     $all_data[$dataSize][10] = $fields[12]; # input path
 50 |     $all_data[$dataSize][11] = $fields[13]; # output path
 51 |     
 52 |     $dataSize++;
 53 | 
 54 | }
 55 | 
 56 | #print "total lines " . $dataSize . "\n";
 57 | 
 58 | close(INPUT_FILE);
 59 | 
 60 | sample_and_print($inPath, $outPrefix);
 61 | 
 62 | sub sample_and_print {
 63 | 
 64 |     my ($in_path, $out_prefix) = @_;
 65 | 
 66 |     for (my $i=0; $i<$repeats; $i++) {
 67 | 		
 68 | 	my $j = 0;
 69 | 	my $startPoint = 0;
 70 | 	my $remainder = 0;
 71 | 
 72 | 	my $jobNumber = 0;
 73 | 	my $timeSoFar = 0;
 74 | 
 75 | 	my %inputHash = ();
 76 | 	my %outputHash = ();
 77 | 
 78 | 	# truncate previously existing file and open new one for append
 79 | 
 80 | 	close(OUTPUT_FILE);
 81 | 	open(OUTPUT_FILE, "> $out_prefix\_$i") or die "Cannot open $out_prefix\_$i";
 82 | 	close(OUTPUT_FILE);
 83 | 	open(OUTPUT_FILE, ">> $out_prefix\_$i") or die "Cannot open $out_prefix\_$i";
 84 | 
 85 | 	# sample and print
 86 | 
 87 | 	for ($j=0; $j<$samples; $j++) {
 88 | 
 89 | 	    my $startTime = $traceStart + rand() * ($traceEnd - $traceStart - $length);
 90 | 	    my $endTime = $startTime + $length;
 91 | 
 92 |             # book keeping ...
 93 | 
 94 |             my $prev = $startTime - $remainder;
 95 |             $remainder = $length + $remainder;
 96 | 
 97 | 	    # binary search to find index of first job with job submit time >= $startTime
 98 | 
 99 | 	    my $min = 0;
100 | 	    my $max = $dataSize - 1;
101 | 	    my $mid = $min + floor(($max - $min)/2);
102 | 
103 | 	    while (($min <= $max) && ($all_data[$mid][5] != $startTime)){
104 | 		$mid = $min + floor(($max - $min)/2);
105 | 		if ($startTime >= ($all_data[$mid][5])) {
106 | 		    $min = $mid + 1;
107 | 		} else {
108 | 		    $max = $mid - 1;
109 | 		}
110 | 	    }
111 | 
112 | 	    # print out workload
113 | 
114 | 	    for (my $k=$mid; $all_data[$k][5] <= $endTime; $k++) {
115 | 
116 | 		if ($all_data[$k][5] >= $startTime && $all_data[$k][5] <= $endTime) {
117 | 
118 | 		    $timeSoFar += ($all_data[$k][5] - floor($prev));
119 | 
120 | 		    print OUTPUT_FILE "job" . $jobNumber . "\t";
121 | 		    print OUTPUT_FILE $timeSoFar . "\t";
122 | 		    print OUTPUT_FILE ($all_data[$k][5] - floor($prev)) . "\t"; # inter-job time gap seconds
123 | 		    print OUTPUT_FILE $all_data[$k][2] . "\t";
124 | 		    print OUTPUT_FILE $all_data[$k][3] . "\t";
125 | 		    print OUTPUT_FILE $all_data[$k][4] . "\t";
126 | 		    
127 |                     # print anonymized input path if info available, else print TAB
128 | 		    if (defined($all_data[$k][10])) {
129 | 			my $inputPath = $all_data[$k][10];
130 | 			if (defined($inputHash{$inputPath})) {
131 | 			    $inputPath = $inputHash{$inputPath};
132 | 			} else {
133 | 			    $inputHash{$inputPath} = scalar(keys( %inputHash ));
134 | 			    $inputPath =  scalar(keys( %inputHash ));
135 | 			}
136 | 			$inputPath = "inputPath" . $inputPath;
137 | 			print OUTPUT_FILE $inputPath . "\t";
138 | 		    } else {
139 | 			print OUTPUT_FILE "\t";
140 | 		    }
141 | 		    
142 | 		    # print anonymized output path if info available, else print TAB
143 | 		    if (defined($all_data[$k][11])) {
144 | 			my $outputPath = $all_data[$k][11];
145 |                         if (defined($outputHash{$outputPath})) {
146 |                             $outputPath = $outputHash{$outputPath};
147 |                         } else {
148 |                             $outputHash{$outputPath} = scalar(keys( %outputHash ));
149 |                             $outputPath =  scalar(keys( %outputHash ));
150 |                         }
151 |                         $outputPath = "outputPath" . $outputPath;
152 | 			print OUTPUT_FILE $outputPath . "\t";
153 | 		    } else {
154 |                         print OUTPUT_FILE "\t";
155 |                     }
156 | 		    
157 | 		    print OUTPUT_FILE "\n";
158 | 
159 | 		    $prev = $all_data[$k][5];
160 | 		    $remainder = $endTime - $all_data[$k][5];
161 | 		    $jobNumber++;
162 | 		}
163 | 
164 | 	    }
165 | 
166 | 	    
167 | 	} # end for ($j=0; $j<$samples; $j++)
168 | 
169 | 	close(OUTPUT_FILE);
170 | 
171 |     } # end for (my $i=0; $i<$repeats; $i++)
172 | 
173 | } # end sub sample_and_print
174 | 
175 | 


--------------------------------------------------------------------------------
/workloadSuite/parse-hadoop-jobhistory.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | 
  3 | # Log Parger for Hadoop jobHistory files
  4 | 
  5 | ################################################################
  6 | #
  7 | # Copyright (c) 2011, Regents of the University of California.
  8 | # All rights reserved.
  9 | #
 10 | # This file is governed by the "New BSD License" below.
 11 | #
 12 | #
 13 | #
 14 | # Redistribution and use in source and binary forms, with or without
 15 | # modification, are permitted provided that the following conditions 
 16 | # are met:
 17 | #
 18 | # * Redistributions of source code must retain the above copyright notice,
 19 | # this list of conditions and the following disclaimer.
 20 | # * Redistributions in binary form must reproduce the above copyright
 21 | # notice, this list of conditions and the following disclaimer in the
 22 | # documentation and/or other materials provided with the distribution.
 23 | # * Neither the name of the University of California, Berkeley
 24 | # nor the names of its contributors may be used to endorse or promote
 25 | # products derived from this software without specific prior written
 26 | # permission.
 27 | #
 28 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 29 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 30 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 31 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 32 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 33 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 34 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 35 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 36 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 37 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 38 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 39 | # OF THE POSSIBILITY OF SUCH DAMAGE.
 40 | #
 41 | ################################################################
 42 | 
 43 | 
 44 | use List::Util qw[min max];
 45 | use Digest::MD5 qw(md5 md5_hex md5_base64);
 46 | 
 47 | $historyDir = $ARGV[0];
 48 | 
 49 | opendir(my $HIST_DIR, $historyDir);
 50 | #my @directory_contents = readdir(HIST_DIR);
 51 | #closedir(HIST_DIR);
 52 | 
 53 | my %jobs = {};
 54 | my $files = 0;
 55 | 
 56 | #foreach my $dentry ( @directory_contents ) {
 57 | 
 58 | while  (
 59 | 	defined( my $dentry = readdir $HIST_DIR )
 60 | 	) {
 61 | 
 62 |     #if (!($dentry =~ /^*conf*/) && !($dentry =~ /^*crc/)) {
 63 |     $files++;
 64 | 
 65 |     if ($files % 5000 == 0) {
 66 | 	print STDERR $files . "\n";
 67 |     }
 68 |     if (!($dentry =~ /^*crc/)) {
 69 | 	#print "$historyDir\/$dentry\n";
 70 | 
 71 | 	$file = "$historyDir\/$dentry";
 72 | 	open (FILE, $file);
 73 | 	$total_map_time = 0;
 74 | 	$total_reduce_time = 0;
 75 | 	$submit_time = 0;
 76 | 	$launch_time = 0;
 77 | 	$finish_time = 0;
 78 | 	my $status = "";
 79 | 	my $job_id = "";
 80 | 	my $job_name="";
 81 | 	my $map_input_bytes = 0;
 82 | 	my $shuffle_bytes = 0;
 83 | 	my $red_output_bytes = 0;
 84 | 	my $map_input_bytes_per_record = 0;
 85 | 	my $red_output_records = 0;
 86 | 
 87 | 	#my %tasks = {};
 88 | 	#my %jobs = {};
 89 | 
 90 | 	#if (1==2) {
 91 | 	while (<FILE>) {
 92 | 	        
 93 | 	    
 94 | 	    chomp;
 95 | 	    ($line) = split("\n");
 96 | 	    
 97 | 	    
 98 | 	    if($dentry =~ /job_(\d+)_(\d+)_conf.xml/) {
 99 | 		$job_id = "job_" . $1 . "_" . $2;
100 | 		$jobs{$job_id}{"checkedPaths"} = 1;
101 | 
102 |                 if($line =~ /\<property\>(.*?)\<name\>mapred\.input\.dir\<\/name\>\<value\>(.*?)\<\/value\>\<\/property\>/)
103 |                 {
104 |                     $jobs{$job_id}{"input_dir"} = md5_hex($2);
105 |                 }
106 |                 if($line =~ /\<property\>(.*?)\<name\>mapred\.output\.dir\<\/name\>\<value\>(.*?)\<\/value\>\<\/property\>/)
107 |                 {
108 |                     $jobs{$job_id}{"output_dir"} = md5_hex($2);
109 |                 }
110 | 	    }
111 | 	        
112 | 	    if($line =~ /Job JOBID="(\S+)"/) {
113 | 		$job_id = $1;
114 | 		$jobs{$job_id}{"checkedStats"} = 1;
115 | 		
116 | 		if($line =~ /JOBNAME="(.*?)"/)
117 | 		{
118 | 		    $jobs{$job_id}{"job_name"} = $1;
119 | 		}
120 | 		if ($line =~ /SUBMIT_TIME="(\d+)"/) {
121 | 		    $jobs{$job_id}{"submit_time"} = $1;
122 | 		}
123 | 
124 | 		if ($line =~ /LAUNCH_TIME="(\d+)"/) {
125 | 		    $jobs{$job_id}{"launch_time"} = $1;
126 | 		}
127 | 
128 | 		if ($line =~ /FINISH_TIME="(\d+)"/) {
129 | 		    $jobs{$job_id}{"finish_time"} = $1;
130 | 		}
131 | 		
132 | 		if($line =~ /TOTAL_MAPS\="(\d+)"/) {
133 | 		    $jobs{$job_id}{"maps"} = $1;
134 | 		}
135 | 		if ($line =~ /TOTAL_REDUCES\="(\d+)"/) {
136 | 		    $jobs{$job_id}{"reduces"} = $1;
137 | 		}
138 | 
139 | 		if ($line =~ /JOB_STATUS="(\w+)"/) {
140 | 		    $jobs{$job_id}{"status"} = $1;
141 | 		}
142 | 
143 |                 if ($line =~ /\(HDFS_BYTES_READ\)\((\d+)\)/) {
144 |                     $jobs{$job_id}{"map_input_bytes"} = $1;
145 |                 }
146 | 
147 |                 if ($line =~ /\(Map input records\)\((\d+)\)/) {
148 | 		    if ($1>0) {
149 | 			$jobs{$job_id}{"map_input_bytes_per_record"} = $jobs{$job_id}{"map_input_bytes"} / $1;
150 | 		    } 
151 | 		}
152 | 
153 |                 if ($line =~ /\(Map output records\)\((\d+)\)/) {
154 |                     $jobs{$job_id}{"shuffle_bytes"} = $1 * $jobs{$job_id}{"map_input_bytes_per_record"};
155 |                 }
156 | 
157 |                 if ($line =~ /\(HDFS_BYTES_WRITTEN\)\((\d+)\)/) {
158 |                     $jobs{$job_id}{"red_output_bytes"} = $1;
159 |                 }
160 | 
161 | 		if ($line =~ /\(Map output bytes\)\((\d+)\)/) {
162 |                     $jobs{$job_id}{"shuffle_bytes"} = $1;
163 |                 }
164 | 
165 | 		
166 | 	    }
167 | 	    
168 | 	    if ($line =~ /Task TASKID="(\S+)"/)
169 | 	    {
170 | 		$task_id = $1;
171 | 		
172 | 		if($line =~ /TASK_TYPE="(\w+)"/) { 
173 | 		    $jobs{$job_id}{"tasks"}{$task_id}{"task_type"} = $1;
174 | 		}
175 | 		if ($line =~ /START_TIME="(\d+)"/) {
176 | 		    $jobs{$job_id}{"tasks"}{$task_id}{"start_time"} = $1;
177 | 		}
178 | 
179 | 
180 | 		if($line =~ /TASK_TYPE="(\w+)"/) {  
181 | 		    $jobs{$job_id}{"tasks"}{$task_id}{"type"} = $1;
182 | 	        }
183 | 		if($line =~ /TASK_STATUS="(\w+)"/) {
184 | 		    #$status = $1;
185 | 		    $jobs{$job_id}{"tasks"}{$task_id}{"status"} = $1;
186 | 		}
187 | 		if($line =~ /FINISH_TIME="(\d+)"/) {
188 | 		    $jobs{$job_id}{"tasks"}{$task_id}{"end_time"} = $1;
189 | 		}
190 | 		$jobs{$job_id}{"tasks"}{$task_id}{"execution_time"} = $jobs{$job_id}{"tasks"}{$task_id}{"end_time"} - $jobs{$job_id}{"tasks"}{$task_id}{"start_time"};
191 | 
192 | 
193 | 		if ($jobs{$job_id}{"tasks"}{$task_id}{"type"} =~ /MAP/ and $jobs{$job_id}{"tasks"}{$task_id}{"status"} =~ /SUCCESS/) {
194 | 		    $jobs{$job_id}{"total_map_time"} += $jobs{$job_id}{"tasks"}{$task_id}{"execution_time"};
195 | 		}
196 | 		if ($jobs{$job_id}{"tasks"}{$task_id}{"type"} =~ /REDUCE/ and $jobs{$job_id}{"tasks"}{$task_id}{"status"} =~ /SUCCESS/) {
197 | 		    $jobs{$job_id}{"total_reduce_time"} += $jobs{$job_id}{"tasks"}{$task_id}{"execution_time"};
198 | 		    if($line =~ /\(HDFS_BYTES_WRITTEN\)\((\d+)\)/) {
199 | 			$jobs{$job_id}{"red_output_bytes"} += $1;
200 | 		    }
201 | 		    if($line =~ /\(Reduce output records\)\((\d+)\)/) {
202 | 			$jobs{$job_id}{"red_output_records"} += $1;
203 | 		    }
204 | 		}
205 | 		
206 | 	    }
207 | 			
208 | 		
209 | 	                 
210 | 	}
211 | 	close (FILE);
212 | 	if ($jobs{$job_id}{"status"} =~ /SUCCESS/) { 
213 | 	    if ($jobs{$job_id}{"red_output_bytes"} == 0 && $jobs{$job_id}{"red_output_records"} > 0) {
214 | 		$jobs{$job_id}{"red_output_bytes"} = $jobs{$job_id}{"red_output_records"} * $jobs{$job_id}{"map_input_bytes_per_record"};
215 | 	    }
216 | 	    if ($jobs{$job_id}{"reduces"} ==0 && $jobs{$job_id}{"red_output_bytes"} == 0) {
217 | 		$jobs{$job_id}{"red_output_bytes"} = $jobs{$job_id}{"shuffle_bytes"};
218 | 		$jobs{$job_id}{"shuffle_bytes"} = 0;
219 |             }
220 | 	    #print 
221 | 		#"$job_id\t$job_name\t$map_input_bytes\t$shuffle_bytes\t$red_output_bytes\t" . ($submit_time / 1000) . "\t" . 
222 | 		#(($finish_time - $launch_time) / 1000) . "\t" . ($total_map_time/1000) . "\t" . 
223 | 		#($total_reduce_time/1000 ) . "\t" . (($total_map_time + $total_reduce_time) / 1000) . "\t$maps\t$reduces";
224 | 	    #print "\n";
225 | 	} else {
226 | 
227 | 	}
228 | 
229 | 	if (defined($jobs{$job_id}{"checkedPaths"}) && defined($jobs{$job_id}{"checkedStats"}) && $jobs{$job_id}{"status"} =~ /SUCCESS/) {
230 | 	    print
231 | 		"$job_id\t" .
232 | 		$jobs{$job_id}{"job_name"} . "\t" .
233 | 		$jobs{$job_id}{"map_input_bytes"} . "\t" .
234 | 		$jobs{$job_id}{"shuffle_bytes"} . "\t" .
235 | 		$jobs{$job_id}{"red_output_bytes"} . "\t" .
236 | 		($jobs{$job_id}{"submit_time"} / 1000) . "\t" .
237 | 		(($jobs{$job_id}{"finish_time"} - $jobs{$job_id}{"launch_time"}) / 1000) . "\t" .
238 | 		($jobs{$job_id}{"total_map_time"} /1000) . "\t" .
239 | 		($jobs{$job_id}{"total_reduce_time"} /1000 ) . "\t" .
240 | 		(($jobs{$job_id}{"total_map_time"} + $jobs{$job_id}{"total_reduce_time"}) / 1000) . "\t" .
241 | 		$jobs{$job_id}{"maps"} . "\t" .
242 | 		$jobs{$job_id}{"reduces"} . "\t" .
243 | 		$jobs{$job_id}{"input_dir"} . "\t" .
244 | 		$jobs{$job_id}{"output_dir"};
245 | 	    print "\n";
246 | 	    delete($jobs{$job_id});
247 | 	}
248 | 	
249 |     }
250 | }
251 | 
252 | #exit; 
253 | 
254 | foreach $job_id (keys %jobs) {
255 |     if ($jobs{$job_id}{"status"} =~ /SUCCESS/) {
256 | 	print
257 | 	    "$job_id\t" . 
258 | 	    $jobs{$job_id}{"job_name"} . "\t" . 
259 | 	    $jobs{$job_id}{"map_input_bytes"} . "\t" . 
260 | 	    $jobs{$job_id}{"shuffle_bytes"} . "\t" . 
261 | 	    $jobs{$job_id}{"red_output_bytes"} . "\t" . 
262 | 	    ($jobs{$job_id}{"submit_time"} / 1000) . "\t" .
263 | 	    (($jobs{$job_id}{"finish_time"} - $jobs{$job_id}{"launch_time"}) / 1000) . "\t" . 
264 | 	    ($jobs{$job_id}{"total_map_time"} /1000) . "\t" .
265 | 	    ($jobs{$job_id}{"total_reduce_time"} /1000 ) . "\t" . 
266 | 	    (($jobs{$job_id}{"total_map_time"} + $jobs{$job_id}{"total_reduce_time"}) / 1000) . "\t" . 
267 | 	    $jobs{$job_id}{"maps"} . "\t" . 
268 | 	    $jobs{$job_id}{"reduces"} . "\t" . 
269 | 	    $jobs{$job_id}{"input_dir"} . "\t" . 
270 | 	    $jobs{$job_id}{"output_dir"};
271 | 	print "\n";
272 |     }
273 | }
274 | 
275 | closedir($HIST_DIR);
276 | 
277 | exit;
278 | 


--------------------------------------------------------------------------------
/workloadSuite/randomwriter_conf.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> 
 3 | <configuration>
 4 |    <property>
 5 |      <name>test.randomwrite.min_key</name>
 6 |      <value>10</value>
 7 |    </property>
 8 |    <property>
 9 |      <name>test.randomwrite.max_key</name>
10 |      <value>10</value>
11 |    </property>
12 |    <property>
13 |      <name>test.randomwrite.min_value</name>
14 |      <value>90</value>
15 |    </property>
16 |    <property>
17 |      <name>test.randomwrite.max_value</name>
18 |      <value>90</value>
19 |    </property>
20 |    <property>
21 |      <name>test.randomwrite.total_bytes</name>
22 |      <value>4294967296</value>
23 |    </property>
24 |    <property>
25 |      <name>test.randomwrite.bytes_per_map</name>
26 |      <value>67108864</value>
27 |    </property>
28 | </configuration>
29 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-0.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-1.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-10.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-11.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00005


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-12.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00005


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-13.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00008


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-14.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00007


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-15.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-16.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-17.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006,workGenInput/part-00007,workGenInput/part-00008


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-18.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-19.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00005,workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-2.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00002


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-20.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-21.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00000


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-22.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00002


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-23.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00001


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-24.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00008


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-25.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-26.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00002


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-27.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-28.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-29.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-3.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00001


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-30.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00005


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-31.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-32.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00002


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-33.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00001


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-34.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00008


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-35.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00005


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-36.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-37.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00000


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-38.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-39.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00008


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-4.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00002


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-40.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-41.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-42.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00006


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-43.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00008


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-44.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00000


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-45.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00001


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-46.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00007


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-47.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00004


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-48.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00000


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-49.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00002


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-5.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-6.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00003


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-7.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00004


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-8.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00002


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/inputPath-job-9.txt:
--------------------------------------------------------------------------------
1 | workGenInput/part-00000


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-0.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-0.txt workGenOutputTest-0 5.810261E-4 0.26818323 >> workGenLogs/job-0.txt 2>> workGenLogs/job-0.txt 
2 | hadoop dfs -rmr workGenOutputTest-0
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-1.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-1.txt workGenOutputTest-1 4.223287E-4 0.2541811 >> workGenLogs/job-1.txt 2>> workGenLogs/job-1.txt 
2 | hadoop dfs -rmr workGenOutputTest-1
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-10.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-10.txt workGenOutputTest-10 1.5258789E-5 1.0 >> workGenLogs/job-10.txt 2>> workGenLogs/job-10.txt 
2 | hadoop dfs -rmr workGenOutputTest-10
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-11.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-11.txt workGenOutputTest-11 1.5258789E-5 1.0 >> workGenLogs/job-11.txt 2>> workGenLogs/job-11.txt 
2 | hadoop dfs -rmr workGenOutputTest-11
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-12.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-12.txt workGenOutputTest-12 5.777925E-4 0.02640877 >> workGenLogs/job-12.txt 2>> workGenLogs/job-12.txt 
2 | hadoop dfs -rmr workGenOutputTest-12
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-13.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-13.txt workGenOutputTest-13 1.5258789E-5 1.0 >> workGenLogs/job-13.txt 2>> workGenLogs/job-13.txt 
2 | hadoop dfs -rmr workGenOutputTest-13
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-14.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-14.txt workGenOutputTest-14 1.5258789E-5 1.0 >> workGenLogs/job-14.txt 2>> workGenLogs/job-14.txt 
2 | hadoop dfs -rmr workGenOutputTest-14
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-15.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-15.txt workGenOutputTest-15 2.7373433E-5 0.55743057 >> workGenLogs/job-15.txt 2>> workGenLogs/job-15.txt 
2 | hadoop dfs -rmr workGenOutputTest-15
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-16.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-16.txt workGenOutputTest-16 1.5258789E-5 1.0 >> workGenLogs/job-16.txt 2>> workGenLogs/job-16.txt 
2 | hadoop dfs -rmr workGenOutputTest-16
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-17.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 4 inputPath-job-17.txt workGenOutputTest-17 1.2676634 0.2764548 >> workGenLogs/job-17.txt 2>> workGenLogs/job-17.txt 
2 | hadoop dfs -rmr workGenOutputTest-17
3 | # inputSize 171246518
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-18.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-18.txt workGenOutputTest-18 1.5258789E-5 1.0 >> workGenLogs/job-18.txt 2>> workGenLogs/job-18.txt 
2 | hadoop dfs -rmr workGenOutputTest-18
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-19.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-19.txt workGenOutputTest-19 0.574622 3.951307E-4 >> workGenLogs/job-19.txt 2>> workGenLogs/job-19.txt 
2 | hadoop dfs -rmr workGenOutputTest-19
3 | # inputSize 79607743
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-2.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-2.txt workGenOutputTest-2 1.47596E-4 0.39293286 >> workGenLogs/job-2.txt 2>> workGenLogs/job-2.txt 
2 | hadoop dfs -rmr workGenOutputTest-2
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-20.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-20.txt workGenOutputTest-20 1.5258789E-5 1.0 >> workGenLogs/job-20.txt 2>> workGenLogs/job-20.txt 
2 | hadoop dfs -rmr workGenOutputTest-20
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-21.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-21.txt workGenOutputTest-21 1.5258789E-5 1.0 >> workGenLogs/job-21.txt 2>> workGenLogs/job-21.txt 
2 | hadoop dfs -rmr workGenOutputTest-21
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-22.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-22.txt workGenOutputTest-22 1.5258789E-5 1.0 >> workGenLogs/job-22.txt 2>> workGenLogs/job-22.txt 
2 | hadoop dfs -rmr workGenOutputTest-22
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-23.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-23.txt workGenOutputTest-23 1.5258789E-5 1.0 >> workGenLogs/job-23.txt 2>> workGenLogs/job-23.txt 
2 | hadoop dfs -rmr workGenOutputTest-23
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-24.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-24.txt workGenOutputTest-24 1.5258789E-5 1.0 >> workGenLogs/job-24.txt 2>> workGenLogs/job-24.txt 
2 | hadoop dfs -rmr workGenOutputTest-24
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-25.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-25.txt workGenOutputTest-25 0.011125132 1.6002492 >> workGenLogs/job-25.txt 2>> workGenLogs/job-25.txt 
2 | hadoop dfs -rmr workGenOutputTest-25
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-26.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-26.txt workGenOutputTest-26 0.001182422 0.16898338 >> workGenLogs/job-26.txt 2>> workGenLogs/job-26.txt 
2 | hadoop dfs -rmr workGenOutputTest-26
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-27.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-27.txt workGenOutputTest-27 0.0018562526 0.10764143 >> workGenLogs/job-27.txt 2>> workGenLogs/job-27.txt 
2 | hadoop dfs -rmr workGenOutputTest-27
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-28.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-28.txt workGenOutputTest-28 7.812679E-5 0.435247 >> workGenLogs/job-28.txt 2>> workGenLogs/job-28.txt 
2 | hadoop dfs -rmr workGenOutputTest-28
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-29.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-29.txt workGenOutputTest-29 1.5258789E-5 1.0 >> workGenLogs/job-29.txt 2>> workGenLogs/job-29.txt 
2 | hadoop dfs -rmr workGenOutputTest-29
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-3.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-3.txt workGenOutputTest-3 1.5258789E-5 1.0 >> workGenLogs/job-3.txt 2>> workGenLogs/job-3.txt 
2 | hadoop dfs -rmr workGenOutputTest-3
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-30.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-30.txt workGenOutputTest-30 7.708371E-5 0.1979509 >> workGenLogs/job-30.txt 2>> workGenLogs/job-30.txt 
2 | hadoop dfs -rmr workGenOutputTest-30
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-31.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-31.txt workGenOutputTest-31 0.7998113 8.2158303E-4 >> workGenLogs/job-31.txt 2>> workGenLogs/job-31.txt 
2 | hadoop dfs -rmr workGenOutputTest-31
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-32.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-32.txt workGenOutputTest-32 1.5258789E-5 1.0 >> workGenLogs/job-32.txt 2>> workGenLogs/job-32.txt 
2 | hadoop dfs -rmr workGenOutputTest-32
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-33.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-33.txt workGenOutputTest-33 1.5258789E-5 1.0 >> workGenLogs/job-33.txt 2>> workGenLogs/job-33.txt 
2 | hadoop dfs -rmr workGenOutputTest-33
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-34.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 5 inputPath-job-34.txt workGenOutputTest-34 1.5258789E-5 304735.12 >> workGenLogs/job-34.txt 2>> workGenLogs/job-34.txt 
2 | hadoop dfs -rmr workGenOutputTest-34
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-35.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-35.txt workGenOutputTest-35 1.5258789E-5 67.74121 >> workGenLogs/job-35.txt 2>> workGenLogs/job-35.txt 
2 | hadoop dfs -rmr workGenOutputTest-35
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-36.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-36.txt workGenOutputTest-36 1.5258789E-5 4410.825 >> workGenLogs/job-36.txt 2>> workGenLogs/job-36.txt 
2 | hadoop dfs -rmr workGenOutputTest-36
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-37.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 2 inputPath-job-37.txt workGenOutputTest-37 1.5258789E-5 149384.83 >> workGenLogs/job-37.txt 2>> workGenLogs/job-37.txt 
2 | hadoop dfs -rmr workGenOutputTest-37
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-38.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 2 inputPath-job-38.txt workGenOutputTest-38 1.5258789E-5 1398101.2 >> workGenLogs/job-38.txt 2>> workGenLogs/job-38.txt 
2 | hadoop dfs -rmr workGenOutputTest-38
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-39.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 2 inputPath-job-39.txt workGenOutputTest-39 1.5258789E-5 143652.4 >> workGenLogs/job-39.txt 2>> workGenLogs/job-39.txt 
2 | hadoop dfs -rmr workGenOutputTest-39
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-4.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-4.txt workGenOutputTest-4 1.5258789E-5 160.12402 >> workGenLogs/job-4.txt 2>> workGenLogs/job-4.txt 
2 | hadoop dfs -rmr workGenOutputTest-4
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-40.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 6 inputPath-job-40.txt workGenOutputTest-40 1.5258789E-5 419971.2 >> workGenLogs/job-40.txt 2>> workGenLogs/job-40.txt 
2 | hadoop dfs -rmr workGenOutputTest-40
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-41.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-41.txt workGenOutputTest-41 1.5258789E-5 1.0 >> workGenLogs/job-41.txt 2>> workGenLogs/job-41.txt 
2 | hadoop dfs -rmr workGenOutputTest-41
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-42.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-42.txt workGenOutputTest-42 1.5258789E-5 35321.98 >> workGenLogs/job-42.txt 2>> workGenLogs/job-42.txt 
2 | hadoop dfs -rmr workGenOutputTest-42
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-43.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-43.txt workGenOutputTest-43 0.017437875 0.29772884 >> workGenLogs/job-43.txt 2>> workGenLogs/job-43.txt 
2 | hadoop dfs -rmr workGenOutputTest-43
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-44.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-44.txt workGenOutputTest-44 1.5258789E-5 5607.967 >> workGenLogs/job-44.txt 2>> workGenLogs/job-44.txt 
2 | hadoop dfs -rmr workGenOutputTest-44
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-45.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-45.txt workGenOutputTest-45 3.1377375E-4 0.42883602 >> workGenLogs/job-45.txt 2>> workGenLogs/job-45.txt 
2 | hadoop dfs -rmr workGenOutputTest-45
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-46.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-46.txt workGenOutputTest-46 1.5258789E-5 1.0 >> workGenLogs/job-46.txt 2>> workGenLogs/job-46.txt 
2 | hadoop dfs -rmr workGenOutputTest-46
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-47.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-47.txt workGenOutputTest-47 1.5258789E-5 1.0 >> workGenLogs/job-47.txt 2>> workGenLogs/job-47.txt 
2 | hadoop dfs -rmr workGenOutputTest-47
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-48.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-48.txt workGenOutputTest-48 3.0110776E-4 0.050675508 >> workGenLogs/job-48.txt 2>> workGenLogs/job-48.txt 
2 | hadoop dfs -rmr workGenOutputTest-48
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-49.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-49.txt workGenOutputTest-49 1.5258789E-5 1.0 >> workGenLogs/job-49.txt 2>> workGenLogs/job-49.txt 
2 | hadoop dfs -rmr workGenOutputTest-49
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-5.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-5.txt workGenOutputTest-5 8.070469E-5 0.18906942 >> workGenLogs/job-5.txt 2>> workGenLogs/job-5.txt 
2 | hadoop dfs -rmr workGenOutputTest-5
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-6.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-6.txt workGenOutputTest-6 1.5258789E-5 8.972656 >> workGenLogs/job-6.txt 2>> workGenLogs/job-6.txt 
2 | hadoop dfs -rmr workGenOutputTest-6
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-7.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-7.txt workGenOutputTest-7 6.368756E-5 0.2395882 >> workGenLogs/job-7.txt 2>> workGenLogs/job-7.txt 
2 | hadoop dfs -rmr workGenOutputTest-7
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-8.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-8.txt workGenOutputTest-8 6.606579E-4 0.21064146 >> workGenLogs/job-8.txt 2>> workGenLogs/job-8.txt 
2 | hadoop dfs -rmr workGenOutputTest-8
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-job-9.sh:
--------------------------------------------------------------------------------
1 | hadoop jar WorkGen.jar org.apache.hadoop.examples.WorkGen -conf /usr/lib/hadoop-0.20.2/conf/workGenKeyValue_conf.xsl -r 1 inputPath-job-9.txt workGenOutputTest-9 5.8835745E-4 0.4086972 >> workGenLogs/job-9.txt 2>> workGenLogs/job-9.txt 
2 | hadoop dfs -rmr workGenOutputTest-9
3 | # inputSize 67108864
4 | 


--------------------------------------------------------------------------------
/workloadSuite/scriptsTest/run-jobs-all.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | rm -r workGenLogs
  3 | mkdir workGenLogs
  4 | ./run-job-0.sh &
  5 | sleep 49
  6 | ./run-job-1.sh &
  7 | sleep 52
  8 | ./run-job-2.sh &
  9 | sleep 21
 10 | ./run-job-3.sh &
 11 | sleep 75
 12 | ./run-job-4.sh &
 13 | sleep 11
 14 | ./run-job-5.sh &
 15 | sleep 141
 16 | ./run-job-6.sh &
 17 | sleep 24
 18 | ./run-job-7.sh &
 19 | sleep 6
 20 | ./run-job-8.sh &
 21 | sleep 84
 22 | ./run-job-9.sh &
 23 | sleep 24
 24 | ./run-job-10.sh &
 25 | sleep 10
 26 | ./run-job-11.sh &
 27 | sleep 112
 28 | ./run-job-12.sh &
 29 | sleep 57
 30 | ./run-job-13.sh &
 31 | sleep 32
 32 | ./run-job-14.sh &
 33 | sleep 26
 34 | ./run-job-15.sh &
 35 | sleep 206
 36 | ./run-job-16.sh &
 37 | sleep 182
 38 | ./run-job-17.sh &
 39 | sleep 16
 40 | ./run-job-18.sh &
 41 | sleep 52
 42 | ./run-job-19.sh &
 43 | sleep 5
 44 | ./run-job-20.sh &
 45 | sleep 2
 46 | ./run-job-21.sh &
 47 | sleep 27
 48 | ./run-job-22.sh &
 49 | sleep 20
 50 | ./run-job-23.sh &
 51 | sleep 28
 52 | ./run-job-24.sh &
 53 | sleep 23
 54 | ./run-job-25.sh &
 55 | sleep 135
 56 | ./run-job-26.sh &
 57 | sleep 33
 58 | ./run-job-27.sh &
 59 | sleep 43
 60 | ./run-job-28.sh &
 61 | sleep 29
 62 | ./run-job-29.sh &
 63 | sleep 140
 64 | ./run-job-30.sh &
 65 | sleep 4
 66 | ./run-job-31.sh &
 67 | sleep 31
 68 | ./run-job-32.sh &
 69 | sleep 25
 70 | ./run-job-33.sh &
 71 | sleep 21
 72 | ./run-job-34.sh &
 73 | sleep 110
 74 | ./run-job-35.sh &
 75 | sleep 118
 76 | ./run-job-36.sh &
 77 | sleep 1
 78 | ./run-job-37.sh &
 79 | sleep 0
 80 | ./run-job-38.sh &
 81 | sleep 1
 82 | ./run-job-39.sh &
 83 | sleep 1
 84 | ./run-job-40.sh &
 85 | sleep 4
 86 | ./run-job-41.sh &
 87 | sleep 200
 88 | ./run-job-42.sh &
 89 | sleep 47
 90 | ./run-job-43.sh &
 91 | sleep 53
 92 | ./run-job-44.sh &
 93 | sleep 42
 94 | ./run-job-45.sh &
 95 | sleep 324
 96 | ./run-job-46.sh &
 97 | sleep 37
 98 | ./run-job-47.sh &
 99 | sleep 68
100 | ./run-job-48.sh &
101 | sleep 41
102 | ./run-job-49.sh &
103 | sleep 33
104 | # max input 171246518
105 | # inputPartitionSize 67108864
106 | # inputPartitionCount 10
107 | 


--------------------------------------------------------------------------------
/workloadSuite/workGenKeyValue_conf.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> 
 3 | <configuration>
 4 |    <property>
 5 |      <name>workGen.randomwrite.min_key</name>
 6 |      <value>10</value>
 7 |    </property>
 8 |    <property>
 9 |      <name>workGen.randomwrite.max_key</name>
10 |      <value>10</value>
11 |    </property>
12 |    <property>
13 |      <name>workGen.randomwrite.min_value</name>
14 |      <value>90</value>
15 |    </property>
16 |    <property>
17 |      <name>workGen.randomwrite.max_value</name>
18 |      <value>90</value>
19 |    </property>
20 |    <property>
21 |      <name>workGen.randomwrite.total_bytes</name>
22 |      <value>10737418240</value>
23 |    </property>
24 | </configuration>
25 | 


--------------------------------------------------------------------------------