├── src ├── python │ ├── Join-200G │ │ ├── __init__.py │ │ ├── JoinGCTime.py │ │ ├── JoinTaskExecutionTimeNoSplit.py │ │ └── JoinTaskExecutionTimeComparison.py │ ├── plotter │ │ ├── __init__.py │ │ ├── ScatterPlotter.py │ │ ├── sharedy.py │ │ ├── BoxPlotter.py │ │ ├── BoxPlotter2.py │ │ ├── BoxPlotter4.py │ │ ├── BoxPlotter3.py │ │ ├── GroupBoxPlotter.py │ │ ├── TimeSeriesPlotter.py │ │ └── TimeSeriesPlotter2.py │ ├── reader │ │ ├── __init__.py │ │ └── FileReader.py │ ├── statistics │ │ ├── __init__.py │ │ ├── BoxPlotStatistics.py │ │ └── HistogramStatistics.py │ ├── analyzer │ │ └── __init__.py │ ├── matplotlib │ │ ├── Histogram.py │ │ └── BrokenAxis.py │ ├── GroupBy │ │ ├── GroupByTaskExecutionTime.py │ │ ├── GroupBy-GC-Time.py │ │ ├── GroupBySkewTasks.py │ │ └── GroupBySkewTasks2.py │ ├── PageRank │ │ ├── PageRankTaskGCTime.py │ │ └── PageRankTaskExecutionTime.py │ ├── SVM │ │ ├── SVMTaskExecutionTime.py │ │ └── SVMTaskGCTime.py │ ├── AggregateByKey │ │ ├── AggregateByKey-GC-Time.py │ │ ├── AggregateByKeyTaskExecutionTime.py │ │ └── AggregateByKeySpilledTasks.py │ ├── SVM05 │ │ ├── SVMTaskExecutionTime.py │ │ └── SVMTaskGCTime.py │ └── Join │ │ ├── JoinGCTime.py │ │ └── JoinTaskExecutionTime.py ├── main │ └── java │ │ ├── util │ │ ├── TaskLogFetcher.java │ │ ├── FileChecker.java │ │ ├── RelativeDifference.java │ │ ├── FileTextWriter.java │ │ ├── JobJsonPrinter.java │ │ ├── JsonFileReader.java │ │ ├── AppListFetcher.java │ │ ├── CommandRunner.java │ │ ├── HtmlFetcher.java │ │ ├── DateParser.java │ │ ├── JxlUtil.java │ │ └── GCViewerNoneGUI.java │ │ ├── analyzer │ │ ├── TaskMetricsCorrelationAnalyzer.java │ │ ├── TaskExecutorCPUUsageAnalyzer.java │ │ └── ExecutorAnalyzer.java │ │ ├── appinfo │ │ ├── ResourceMetrics.java │ │ ├── TopMetrics.java │ │ ├── SpillMetrics.java │ │ ├── Job.java │ │ ├── Task.java │ │ └── Stage.java │ │ ├── gc │ │ ├── GCStatistics.java │ │ ├── ExecutorGCLogParserWithGCeasy.java │ │ ├── ExecutorGCLogParserByGCPlot.java │ │ ├── G1GCViewerLogParser.java │ │ └── GCViewerParser.java │ │ ├── statstics │ │ ├── ComputedAppStatistics.java │ │ └── StageStatistics.java │ │ ├── parser │ │ ├── StagesJsonParser.java │ │ ├── StageTasksJsonParser.java │ │ ├── JobsJsonParser.java │ │ └── ExecutorsJsonParser.java │ │ └── generalGC │ │ ├── HeapUsage.java │ │ └── G1GCLogParser.java └── test │ └── java │ ├── MemoryTest.java │ ├── StageTasksJsonParserTest.java │ ├── TopMetricsAnalyzer.java │ ├── HeapSizeDifferences.java │ └── ComputeStatistics.java ├── .gitignore └── pom.xml /src/python/Join-200G/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/python/plotter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/python/reader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/python/statistics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/python/analyzer/__init__.py: -------------------------------------------------------------------------------- 1 | from . import reader -------------------------------------------------------------------------------- /src/main/java/util/TaskLogFetcher.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | /** 4 | * Created by xulijie on 17-10-24. 5 | */ 6 | public class TaskLogFetcher { 7 | 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/analyzer/TaskMetricsCorrelationAnalyzer.java: -------------------------------------------------------------------------------- 1 | package analyzer; 2 | 3 | /** 4 | * Created by xulijie on 17-9-22. 5 | */ 6 | public class TaskMetricsCorrelationAnalyzer { 7 | public static void main(String[] args) { 8 | 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/appinfo/ResourceMetrics.java: -------------------------------------------------------------------------------- 1 | package appinfo; 2 | 3 | import util.Statistics; 4 | 5 | /** 6 | * Created by xulijie on 17-11-3. 7 | */ 8 | public class ResourceMetrics { 9 | private Statistics CPUusage; 10 | private Statistics memoryUsage; 11 | 12 | public ResourceMetrics() { 13 | 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/python/reader/FileReader.py: -------------------------------------------------------------------------------- 1 | # class FileReader: 2 | # 3 | # @staticmethod 4 | def readLines(textFile): 5 | lines = [] 6 | file = open(textFile) 7 | while True: 8 | line = file.readline() 9 | if not line: 10 | break 11 | lines.append(line.strip('\n')) 12 | 13 | return lines 14 | -------------------------------------------------------------------------------- /src/main/java/util/FileChecker.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * Created by xulijie on 17-7-18. 7 | */ 8 | public class FileChecker { 9 | 10 | public static boolean isGCFile(String file) { 11 | List lines = JsonFileReader.readFileLines(file); 12 | return lines.size() > 1; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/analyzer/TaskExecutorCPUUsageAnalyzer.java: -------------------------------------------------------------------------------- 1 | package analyzer; 2 | 3 | /** 4 | * Created by xulijie on 17-11-30. 5 | */ 6 | public class TaskExecutorCPUUsageAnalyzer { 7 | 8 | public static void main(String[] args) { 9 | String appJsonRootDir = "/Users/xulijie/Documents/GCResearch/Experiments-11-17/medianProfiles/"; 10 | int executorId = 0; 11 | int TaskIndex = 0; 12 | 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/test/java/MemoryTest.java: -------------------------------------------------------------------------------- 1 | import java.util.ArrayList; 2 | import java.util.List; 3 | 4 | /** 5 | * Created by xulijie on 17-7-11. 6 | */ 7 | public class MemoryTest { 8 | public static void main(String args[]) { 9 | 10 | int[] array = new int[1024]; 11 | 12 | List arrayList = new ArrayList(); 13 | 14 | for (int i = 1; i < Integer.MAX_VALUE; i++) { 15 | arrayList.add(array); 16 | } 17 | 18 | System.out.println(arrayList.size()); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/util/RelativeDifference.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | /** 4 | * Created by xulijie on 17-9-21. 5 | */ 6 | 7 | // https://en.wikipedia.org/wiki/Relative_change_and_difference 8 | public class RelativeDifference { 9 | 10 | public static double getRelativeDifference(double x, double x_reference) { 11 | if (x_reference < 0 || x < 0) 12 | return -1; 13 | if (x_reference == 0 && x == 0) 14 | return 0; 15 | return Math.abs(x - x_reference) / Math.max(x_reference, x); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/util/FileTextWriter.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.io.*; 4 | 5 | /** 6 | * Created by xulijie on 17-6-23. 7 | */ 8 | public class FileTextWriter { 9 | 10 | public static void write(String file, String text) { 11 | File outputFile = new File(file); 12 | if (!outputFile.getParentFile().exists()) 13 | outputFile.getParentFile().mkdirs(); 14 | 15 | try { 16 | 17 | PrintWriter writer = new PrintWriter(new FileWriter(outputFile)); 18 | writer.print(text); 19 | writer.close(); 20 | 21 | } catch (FileNotFoundException e) { 22 | e.printStackTrace(); 23 | } catch (IOException e) { 24 | e.printStackTrace(); 25 | } 26 | 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/appinfo/TopMetrics.java: -------------------------------------------------------------------------------- 1 | package appinfo; 2 | 3 | /** 4 | * Created by xulijie on 17-11-4. 5 | */ 6 | public class TopMetrics { 7 | 8 | String time; 9 | double CPUusage; 10 | double memoryUsage; // GB 11 | 12 | public TopMetrics(String time, double CPUusage, double memoryUsage) { 13 | this.time = time; 14 | this.CPUusage = CPUusage; 15 | this.memoryUsage = memoryUsage; 16 | } 17 | 18 | public String getTime() { 19 | return time; 20 | } 21 | 22 | public double getCPUusage() { 23 | return CPUusage; 24 | } 25 | 26 | public double getMemoryUsage() { 27 | return memoryUsage; 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return "[" + time + "] CPU = " 33 | + String.format("%.2f", CPUusage) 34 | + ", Memory = " 35 | + String.format("%.2f", memoryUsage); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/gc/GCStatistics.java: -------------------------------------------------------------------------------- 1 | package gc; 2 | 3 | /** 4 | * Created by xulijie on 18-5-3. 5 | */ 6 | public class GCStatistics { 7 | 8 | private double STWPauseTime; 9 | private double youngGCTime; 10 | private double fullGCTime; 11 | private double concurrentGCTime; 12 | 13 | public GCStatistics(double STWPauseTime, double youngGCTime, double fullGCTime, double concurrentGCTime) { 14 | this.STWPauseTime = STWPauseTime; 15 | this.youngGCTime = youngGCTime; 16 | this.fullGCTime = fullGCTime; 17 | this.concurrentGCTime = concurrentGCTime; 18 | } 19 | 20 | public double getSTWPauseTime() { 21 | return STWPauseTime; 22 | } 23 | 24 | public double getYoungGCTime() { 25 | return youngGCTime; 26 | } 27 | 28 | public double getFullGCTime() { 29 | return fullGCTime; 30 | } 31 | 32 | public double getConcurrentGCTime() { 33 | return concurrentGCTime; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/util/JobJsonPrinter.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.io.File; 4 | 5 | /** 6 | * Created by xulijie on 17-8-2. 7 | */ 8 | 9 | public class JobJsonPrinter { 10 | 11 | public static void main(String[] args) { 12 | String appJsonDir = "/Users/xulijie/Documents/GCResearch/Experiments/profiles/SVM-0.5"; 13 | 14 | File appDir = new File(appJsonDir); 15 | 16 | for (File appJsonFile : appDir.listFiles()) { 17 | if (appJsonFile.isDirectory() && appJsonFile.getName().contains("app")) { 18 | 19 | // RDDJoin-CMS-4-28G-0.5_app-20170623114155-0011 20 | String fileName = appJsonFile.getName(); 21 | String appId = fileName.substring(fileName.lastIndexOf("app")); 22 | File jobFile = new File(appJsonFile, "jobs.json"); 23 | 24 | String lines = JsonFileReader.readFile(jobFile.getAbsolutePath()); 25 | 26 | System.out.println("[app] " + fileName); 27 | System.out.println(lines); 28 | System.out.println(""); 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/python/plotter/ScatterPlotter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # #basic 5 | # 6 | # fig, axes = plt.subplots(nrows=2, ncols=1, sharey=True) # sharey='row') 7 | # 8 | # plt.subplot(211) 9 | # plt.scatter([1, 2, 3], [4, 5, 6]) 10 | # 11 | # # with label 12 | # plt.subplot(212) 13 | # plt.scatter([7, 8, 9], [10, 11, 12]) 14 | # 15 | # plt.show() 16 | 17 | class ScatterPlotter: 18 | @staticmethod 19 | def plotTaskMetrics(xValues, yValues, xLabel, yLabel, file): 20 | parallel = plt.scatter(xValues["Parallel"], yValues["Parallel"], marker='o', color="red", alpha=0.1, edgecolors='none') 21 | cms = plt.scatter(xValues["CMS"], yValues["CMS"], marker='o', color='blue', alpha=0.1, edgecolors='none') 22 | g1 = plt.scatter(xValues["G1"], yValues["G1"], marker='o', color='green', alpha=0.1, edgecolors='none') 23 | 24 | plt.xlabel(xLabel) 25 | plt.ylabel(yLabel) 26 | plt.title(xLabel + "-" + yLabel) 27 | plt.legend((parallel, cms, g1), 28 | ("Parallel", "CMS", "G1"), 29 | scatterpoints=1, 30 | loc='lower right', 31 | ncol=1, 32 | fontsize=8) 33 | # plt.show() 34 | plt.savefig(file, dpi=150, bbox_inches='tight') -------------------------------------------------------------------------------- /src/python/plotter/sharedy.py: -------------------------------------------------------------------------------- 1 | """Examples illustrating the use of plt.subplots(). 2 | 3 | This function creates a figure and a grid of subplots with a single call, while 4 | providing reasonable control over how the individual plots are created. For 5 | very refined tuning of subplot creation, you can still use add_subplot() 6 | directly on a new figure. 7 | """ 8 | 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import matplotlib 12 | 13 | #matplotlib.font_manager._rebuild() 14 | 15 | # Simple data to display in various forms 16 | x = np.linspace(0, 2 * np.pi, 400) 17 | y = np.sin(x ** 2) 18 | 19 | plt.close('all') 20 | 21 | plt.rc('font', family='Helvetica') 22 | #plt.rc('font', family='Arial') 23 | 24 | # Four axes, returned as a 2-d array 25 | f, axarr = plt.subplots(2, 2) 26 | axarr[0, 0].plot(x, y) 27 | axarr[0, 0].set_title('Axis [0,0]') 28 | axarr[0, 1].scatter(x, y) 29 | axarr[0, 1].set_title('Axis [0,1]') 30 | axarr[1, 0].plot(x, y ** 2) 31 | axarr[1, 0].set_title('Axis [1,0]') 32 | axarr[1, 1].scatter(x, y ** 2) 33 | axarr[1, 1].set_title('Axis [1,1]') 34 | # Fine-tune figure; hide x ticks for top plots and y ticks for right plots 35 | plt.setp([a.get_xticklabels() for a in axarr[0, :]], visible=False) 36 | plt.setp([a.get_yticklabels() for a in axarr[:, 1]], visible=False) 37 | 38 | 39 | plt.show() -------------------------------------------------------------------------------- /src/test/java/StageTasksJsonParserTest.java: -------------------------------------------------------------------------------- 1 | import com.google.gson.*; 2 | import util.FileTextWriter; 3 | import util.JsonFileReader; 4 | 5 | import java.io.File; 6 | import java.util.Map; 7 | 8 | 9 | public class StageTasksJsonParserTest { 10 | 11 | public static void parseTaskJson(File attemptFile) { 12 | String stageTasksJson = JsonFileReader.readFile(attemptFile.getAbsolutePath()); 13 | 14 | try { 15 | JsonParser parser = new JsonParser(); 16 | JsonElement el = parser.parse(stageTasksJson); 17 | JsonObject tasksObject = el.getAsJsonObject().get("tasks").getAsJsonObject(); 18 | 19 | for (Map.Entry taskEntry : tasksObject.entrySet()) { 20 | JsonObject taskObject = taskEntry.getValue().getAsJsonObject(); 21 | 22 | 23 | } 24 | 25 | } catch (JsonIOException e) { 26 | e.printStackTrace(); 27 | } catch (JsonSyntaxException e) { 28 | e.printStackTrace(); 29 | } 30 | 31 | 32 | } 33 | 34 | public static void main (String[] args) { 35 | String attemptFile = "/Users/xulijie/Documents/GCResearch/Experiments/profiles/RDDJoin-CMS-4-28G-0.5_app-20170623114155-0011/job-0/stage-0/attempt-0.json"; 36 | 37 | // parseTaskJson(new File(attemptFile)); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/statstics/ComputedAppStatistics.java: -------------------------------------------------------------------------------- 1 | package statstics; 2 | 3 | import util.Statistics; 4 | 5 | import java.util.HashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | /** 10 | * Created by xulijie on 17-9-21. 11 | */ 12 | public class ComputedAppStatistics { 13 | // e.g., GroupByRDD-CMS-1-7G-0.5 14 | private String appName; 15 | private String gcName; 16 | private String dataMode; 17 | 18 | Map statisticsMap = new HashMap(); 19 | 20 | 21 | public ComputedAppStatistics(String appName, String gcName, String dataMode, List lines) { 22 | this.appName = appName; 23 | this.gcName = gcName; 24 | this.dataMode = dataMode; 25 | // k: [stage0.duration], v: Statistics(mean, etc.) 26 | for (String line: lines) 27 | if (line.startsWith("[")) 28 | parseStatisticsLine(line); 29 | } 30 | 31 | private void parseStatisticsLine(String line) { 32 | Statistics statistics = new Statistics(line); 33 | statisticsMap.put(statistics.getMetricName(), statistics); 34 | } 35 | 36 | public double getMetric(String metricName, String statName) { 37 | return statisticsMap.get(metricName).get(statName); 38 | } 39 | 40 | public String getAppName() { 41 | return appName; 42 | } 43 | 44 | public String getGcName() { 45 | return gcName; 46 | } 47 | 48 | public String getDataMode() { 49 | return dataMode; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *#*# 2 | *.#* 3 | *.iml 4 | *.ipr 5 | *.iws 6 | *.pyc 7 | *.pyo 8 | *.swp 9 | *~ 10 | .DS_Store 11 | .cache 12 | .classpath 13 | .ensime 14 | .ensime_cache/ 15 | .ensime_lucene 16 | .generated-mima* 17 | .idea/ 18 | .idea_modules/ 19 | .project 20 | .pydevproject 21 | .scala_dependencies 22 | .settings 23 | /lib/ 24 | R-unit-tests.log 25 | R/unit-tests.out 26 | R/cran-check.out 27 | R/pkg/vignettes/sparkr-vignettes.html 28 | build/*.jar 29 | build/apache-maven* 30 | build/scala* 31 | build/zinc* 32 | cache 33 | checkpoint 34 | conf/*.cmd 35 | conf/*.conf 36 | conf/*.properties 37 | conf/*.sh 38 | conf/*.xml 39 | conf/java-opts 40 | conf/slaves 41 | dependency-reduced-pom.xml 42 | derby.log 43 | dev/create-release/*final 44 | dev/create-release/*txt 45 | dev/pr-deps/ 46 | dist/ 47 | docs/_site 48 | docs/api 49 | lib_managed/ 50 | lint-r-report.log 51 | log/ 52 | logs/ 53 | out/ 54 | project/boot/ 55 | project/build/target/ 56 | project/plugins/lib_managed/ 57 | project/plugins/project/build.properties 58 | project/plugins/src_managed/ 59 | project/plugins/target/ 60 | python/lib/pyspark.zip 61 | python/deps 62 | python/pyspark/python 63 | reports/ 64 | scalastyle-on-compile.generated.xml 65 | scalastyle-output.xml 66 | scalastyle.txt 67 | spark-*-bin-*.tgz 68 | spark-tests.log 69 | src_managed/ 70 | streaming-tests.log 71 | target/ 72 | unit-tests.log 73 | work/ 74 | 75 | # For Hive 76 | TempStatsStore/ 77 | metastore/ 78 | metastore_db/ 79 | sql/hive-thriftserver/test_warehouses 80 | warehouse/ 81 | spark-warehouse/ 82 | 83 | # For R session data 84 | .RData 85 | .RHistory 86 | .Rhistory 87 | *.Rproj 88 | *.Rproj.* 89 | .Rproj.user 90 | .idea/ 91 | *.pdf 92 | -------------------------------------------------------------------------------- /src/python/plotter/BoxPlotter.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib 3 | 4 | class BoxPlotter: 5 | @staticmethod 6 | def plotStatisticsByGCAlgo(statistics, file, sucessfulAppNum): 7 | 8 | fig, axes = plt.subplots(ncols=3, sharey=True, figsize=(8,4)) 9 | fig.subplots_adjust(wspace=0) 10 | 11 | 12 | Parallel = statistics.Parallel 13 | CMS = statistics.CMS 14 | G1 = statistics.G1 15 | 16 | colors = ['pink', 'lightblue', 'lightgreen'] 17 | 18 | i = 0 19 | 20 | for ax, stats in zip(axes, [Parallel, CMS, G1]): 21 | list = [stats['E-1'], stats['E-2'], stats['E-4']] 22 | 23 | bplot = ax.bxp(list, showfliers=False, showmeans=True, patch_artist=True) 24 | for patch, color in zip(bplot['boxes'], colors): 25 | patch.set_facecolor(color) 26 | 27 | # ax.set_xticklabels(['E1', 'E2', 'E4'], fontsize=20) 28 | ax.set_xticklabels(['', '', ''], fontsize=1) 29 | # ax.set_xlabel(xlabel=stats['label'], fontsize=22) 30 | ax.tick_params(axis='y', labelsize=20) 31 | 32 | ax2 = ax.twiny() # ax2 is responsible for "top" axis and "right" axis 33 | ax2.set_xticks(ax.get_xticks()) 34 | ax2.set_xticklabels(sucessfulAppNum[i], fontsize=20) 35 | i += 1 36 | ax2.set_xlim(ax.get_xlim()) 37 | 38 | ax.margins(0.05) # Optional 39 | 40 | axes[0].set_ylabel(statistics.ylabel, fontsize=20) 41 | fig.suptitle(statistics.title, fontsize=22, y=1.07) 42 | 43 | 44 | 45 | matplotlib.font_manager._rebuild() 46 | 47 | plt.show() 48 | 49 | #plt.savefig(file, dpi=150, bbox_inches='tight') -------------------------------------------------------------------------------- /src/test/java/TopMetricsAnalyzer.java: -------------------------------------------------------------------------------- 1 | import util.JsonFileReader; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * Created by xulijie on 17-11-3. 7 | */ 8 | public class TopMetricsAnalyzer { 9 | public static void main(String[] args) { 10 | String file = "/Users/xulijie/Documents/GCResearch/NewExperiments/profiles/RDDJoin-0.5/topMetrics/aliSlave1/rjoin-CMS-1-6656m-0.5-n1.top"; 11 | 12 | List lines = JsonFileReader.readFileLines(file); 13 | String time = ""; 14 | 15 | for (String line : lines) { 16 | if (line.startsWith("top")) { 17 | time = line.substring(line.indexOf("-") + 2, line.indexOf("up") - 1); 18 | 19 | } else if (line.trim().endsWith("java")) { 20 | String[] metrics = line.trim().split("\\s+"); 21 | String PID = metrics[0]; 22 | double CPU = Double.parseDouble(metrics[8]); 23 | String memoryStr = metrics[5]; 24 | double memory; 25 | if (memoryStr.endsWith("g")) { 26 | memory = Double.parseDouble(memoryStr.substring(0, memoryStr.indexOf("g"))); 27 | } else if (memoryStr.endsWith("t")) { 28 | memory = Double.parseDouble(memoryStr.substring(0, memoryStr.indexOf("t"))); 29 | memory = memory * 1024; 30 | } else { 31 | memory = Double.parseDouble(memoryStr); 32 | memory = memory / 1024 / 1024; 33 | } 34 | 35 | 36 | 37 | 38 | System.out.println("[" + time + "] PID = " + PID + ", CPU = " 39 | + CPU + ", Memory = " + memory); 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/appinfo/SpillMetrics.java: -------------------------------------------------------------------------------- 1 | package appinfo; 2 | 3 | import util.DateParser; 4 | 5 | /** 6 | * Created by xulijie on 18-4-3. 7 | */ 8 | 9 | // [Task 84 SpillMetrics] release = 3.7 GB, writeTime = 40 s, recordsWritten = 86, bytesWritten = 567.5 MB 10 | 11 | public class SpillMetrics { 12 | private long startTime; // 1511175278 13 | private long endTime; 14 | private int taskId; 15 | private double spilledMemoryGB; 16 | private double spillDuration; 17 | private long recordsWritten; 18 | private double bytesWrittenMB; 19 | 20 | public SpillMetrics(String endTime, int taskId, double spilledMemoryGB, double spillDuration, 21 | long recordsWritten, double bytesWrittenMB) { 22 | 23 | this.taskId = taskId; 24 | this.spilledMemoryGB = spilledMemoryGB; 25 | this.spillDuration = spillDuration; 26 | this.recordsWritten = recordsWritten; 27 | this.bytesWrittenMB = bytesWrittenMB; 28 | this.endTime = DateParser.getTimeStamp(endTime); 29 | this.startTime = this.endTime - (long) spillDuration; 30 | 31 | } 32 | 33 | public int getTaskId() { 34 | return taskId; 35 | } 36 | 37 | public double getSpilledMemoryGB() { 38 | return spilledMemoryGB; 39 | } 40 | 41 | public double getSpillDuration() { 42 | return spillDuration; 43 | } 44 | 45 | public long getRecordsWritten() { 46 | return recordsWritten; 47 | } 48 | 49 | public double getBytesWrittenMB() { 50 | return bytesWrittenMB; 51 | } 52 | 53 | public long getStartTime() { 54 | return startTime; 55 | } 56 | 57 | public long getEndTime() { 58 | return endTime; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/test/java/HeapSizeDifferences.java: -------------------------------------------------------------------------------- 1 | import java.lang.management.ManagementFactory; 2 | import java.util.ArrayList; 3 | import java.util.Collection; 4 | import java.util.List; 5 | 6 | /** 7 | * Created by xulijie on 17-8-29. 8 | */ 9 | public class HeapSizeDifferences { 10 | 11 | static Collection objects = new ArrayList(); 12 | static long lastMaxMemory = 0; 13 | 14 | public static void main(String[] args) { 15 | try { 16 | List inputArguments = ManagementFactory.getRuntimeMXBean().getInputArguments(); 17 | System.out.println("Running with: " + inputArguments); 18 | while (true) { 19 | printMaxMemory(); 20 | consumeSpace(); 21 | } 22 | } catch (OutOfMemoryError e) { 23 | freeSpace(); 24 | printMaxMemory(); 25 | } 26 | } 27 | 28 | static void printMaxMemory() { 29 | long currentMaxMemory = Runtime.getRuntime().maxMemory(); 30 | if (currentMaxMemory != lastMaxMemory) { 31 | lastMaxMemory = currentMaxMemory; 32 | System.out.format("Runtime.getRuntime().maxMemory(): %,dK, %.2fMB, %.2fGB.%n", 33 | currentMaxMemory / 1024, (float) currentMaxMemory / 1024 / 1024, 34 | (float) currentMaxMemory / 1024 / 1024 / 1024); 35 | 36 | double memoryStore = (currentMaxMemory - 300 * 1024 * 1024) * 0.6 * 0.5; 37 | System.out.format("memoryStore: %.2fGB.%n", memoryStore / 1024 / 1024 / 1024); 38 | } 39 | } 40 | 41 | static void consumeSpace() { 42 | objects.add(new int[1_000_000]); 43 | } 44 | 45 | static void freeSpace() { 46 | objects.clear(); 47 | } 48 | 49 | // Runtime.getRuntime().maxMemory(): 4,194,304K. G1-4G 50 | // Runtime.getRuntime().maxMemory(): 4,160,256K. CMS-4G 51 | // Runtime.getRuntime().maxMemory(): 3,728,384K. Parallel-4G 52 | } 53 | -------------------------------------------------------------------------------- /src/python/matplotlib/Histogram.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | fig = plt.figure(figsize=(3.2, 2.4)) 22 | ax = fig.add_subplot(111) 23 | plt.subplots_adjust(left=0.19, bottom=0.11, right=0.98, top=0.87, 24 | wspace=0.03, hspace=0.04) 25 | 26 | #plt.tight_layout() 27 | 28 | xvals = [19, 60, 0] 29 | yvals = [22, 8, 27] 30 | zvals = [14, 2, 28] 31 | 32 | rects1 = ax.bar(ind, xvals, width, color='hotpink', edgecolor='black', hatch="///") 33 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 34 | rects3 = ax.bar(ind+width*2, zvals, width, color='dodgerblue', edgecolor='black', hatch='\\\\\\') 35 | 36 | ax.set_ylabel('GC time (s)', color='black') 37 | ax.set_xticks(ind+width) 38 | ax.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black' ) 39 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 40 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, fontsize=10 ) 41 | ax.set_ylim(0, 100) # The ceil 42 | #plt.xlim(-0.3, 2.76) # The ceil 43 | ax.set_xlim(-0.3, 2.76) # The ceil 44 | 45 | #plt.title("(a) GroupBy-task-execution-time", fontsize=12) 46 | plt.title("(b) GroupBy-task-GC-time", fontsize=12) 47 | 48 | 49 | def autolabel(rects): 50 | for rect in rects: 51 | h = rect.get_height() 52 | ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h), 53 | ha='center', va='bottom') 54 | 55 | autolabel(rects1) 56 | autolabel(rects2) 57 | autolabel(rects3) 58 | 59 | 60 | plt.show() -------------------------------------------------------------------------------- /src/main/java/gc/ExecutorGCLogParserWithGCeasy.java: -------------------------------------------------------------------------------- 1 | package gc; 2 | 3 | import com.google.gson.JsonElement; 4 | import com.google.gson.JsonParser; 5 | import util.CommandRunner; 6 | import util.FileTextWriter; 7 | 8 | import java.io.BufferedReader; 9 | import java.io.IOException; 10 | import java.io.InputStreamReader; 11 | 12 | /** 13 | * Created by xulijie on 17-9-2. 14 | */ 15 | public class ExecutorGCLogParserWithGCeasy { 16 | 17 | public static void parseExecutorGCLog(String gcLogFile, String gcMetricsFile) { 18 | String curl = "curl -XPOST --data-binary @" + gcLogFile 19 | + " http://localhost:8080/analyzeGC?apiKey=e094a34e-c3eb-4c9a-8254-f0dd107245cc --header Content-Type:text"; 20 | String metricsJson = CommandRunner.execCurl(curl); 21 | 22 | if(metricsJson.trim().isEmpty()) 23 | System.err.println("Error in parsing " + gcLogFile); 24 | else { 25 | 26 | FileTextWriter.write(gcMetricsFile, metricsJson); 27 | } 28 | 29 | } 30 | 31 | 32 | public static void main(String[] args) { 33 | String gcLogFile = "/Users/xulijie/Documents/GCResearch/Experiments/profiles/GroupByRDD-0.5-2/GroupByRDD-CMS-4-28G-0.5_app-20170721104729-0019/executors/0/stdout"; 34 | 35 | String curl = "curl -XPOST --data-binary @" + gcLogFile 36 | + " http://localhost:8080/analyzeGC?apiKey=e094a34e-c3eb-4c9a-8254-f0dd107245cc --header Content-Type:text"; 37 | 38 | long start = System.currentTimeMillis(); 39 | String metricsJson = CommandRunner.execCurl(curl); 40 | System.out.println(metricsJson); 41 | long end = System.currentTimeMillis(); 42 | 43 | System.out.println((end - start) / 1000 + "s"); 44 | // execCurl(); 45 | } 46 | } 47 | 48 | // curl -X POST --data-binary @/Users/xulijie/Documents/GCResearch/Experiments/profiles/GroupByRDD-0.5-2/GroupByRDD-Parallel-2-14G-0.5_app-20170721101243-0006/executors/12/stdout http://localhost:8080/analyzeGC?apiKey=e094a34e-c3eb-4c9a-8254-f0dd107245cc --header "Content-Type:text" 49 | 50 | -------------------------------------------------------------------------------- /src/main/java/parser/StagesJsonParser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import appinfo.Application; 4 | import com.google.gson.*; 5 | import util.HtmlFetcher; 6 | import util.FileTextWriter; 7 | 8 | import java.io.*; 9 | 10 | public class StagesJsonParser { 11 | 12 | private String appURL; 13 | private String appDir; 14 | 15 | 16 | public StagesJsonParser() {} 17 | 18 | public StagesJsonParser(String appURL, String appDir) { 19 | this.appURL = appURL; 20 | this.appDir = appDir; 21 | } 22 | 23 | public void saveStagesJson(Application app) { 24 | // http://masterIP:18080/api/v1/applications/app-20170618202557-0295/stages 25 | String stagesURL = appURL + "/stages"; 26 | 27 | // "profiles/WordCount-CMS-4-28_app-20170618202557-0295/stages.json" 28 | String stagesJsonFile = appDir + File.separatorChar + "stages.json"; 29 | 30 | String stagesJson = HtmlFetcher.fetch(stagesURL); 31 | FileTextWriter.write(stagesJsonFile, stagesJson); 32 | 33 | parseStagesJson(stagesJson, app); 34 | } 35 | 36 | public void parseStagesJson(String stagesJson, Application app) { 37 | 38 | try { 39 | JsonParser parser = new JsonParser(); 40 | JsonElement el = parser.parse(stagesJson); 41 | JsonArray stageJsonArray = null; 42 | 43 | if (el.isJsonArray()) 44 | stageJsonArray = el.getAsJsonArray(); 45 | else { 46 | System.err.println("Error in parsing the stages json!"); 47 | System.exit(1); 48 | } 49 | 50 | for (JsonElement stageElem : stageJsonArray) { 51 | JsonObject stageObject = stageElem.getAsJsonObject(); 52 | // stageId = 0, attemptId = 1 53 | // stageId = 0, attemptId = 0 54 | app.addStage(stageObject); 55 | } 56 | 57 | } catch (JsonIOException e) { 58 | e.printStackTrace(); 59 | } catch (JsonSyntaxException e) { 60 | e.printStackTrace(); 61 | } 62 | } 63 | 64 | } -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | cn.ac.iscas 8 | SparkProfiler 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | net.sourceforge.jexcelapi 14 | jxl 15 | 2.6.12 16 | 17 | 18 | 19 | org.jsoup 20 | jsoup 21 | 1.10.3 22 | 23 | 24 | 25 | com.google.code.gson 26 | gson 27 | 2.8.1 28 | 29 | 30 | 31 | 32 | org.apache.commons 33 | commons-math3 34 | 3.6.1 35 | 36 | 37 | 38 | com.github 39 | gcviewer 40 | 1.36-SNAPSHOT 41 | system 42 | ${project.basedir}/lib/gcviewer-1.36-SNAPSHOT.jar 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | org.apache.maven.plugins 51 | maven-compiler-plugin 52 | 53 | 1.8 54 | 1.8 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /src/python/GroupBy/GroupByTaskExecutionTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.20, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | legend_properties = {'weight':'bold'} 29 | 30 | xvals = [62, 38, 70] 31 | yvals = [60, 40, 31] 32 | zvals = [62, 43, 16] 33 | 34 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 35 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 36 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 37 | 38 | ax.set_ylabel('Time (s)', color='black') 39 | ax.set_xticks(ind+width) 40 | ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 41 | 42 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 43 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 44 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 45 | ax.set_ylim(0, 100) # The ceil 46 | #plt.xlim(-0.3, 2.76) # The ceil 47 | ax.set_xlim(-0.32, 2.78) # The ceil 48 | 49 | plt.title("(a) GroupBy-0.5-task-execution-time", fontsize=12) 50 | 51 | 52 | 53 | def autolabel(rects): 54 | for rect in rects: 55 | h = rect.get_height() 56 | ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h), 57 | ha='center', va='bottom', fontsize=11) 58 | 59 | autolabel(rects1) 60 | autolabel(rects2) 61 | autolabel(rects3) 62 | 63 | plt.show() -------------------------------------------------------------------------------- /src/python/PageRank/PageRankTaskGCTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.21, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | legend_properties = {'weight':'bold'} 29 | 30 | xvals = [92, 566, 0] 31 | yvals = [204, 4, 377] 32 | zvals = [133, 65, 612] 33 | 34 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 35 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 36 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 37 | 38 | ax.set_ylabel('GC time (s)', color='black') 39 | ax.set_xticks(ind+width) 40 | ax.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black')#, borderaxespad = 'bold') 41 | 42 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 43 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 44 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 45 | ax.set_ylim(0, 1200) # The ceil 46 | #plt.xlim(-0.3, 2.76) # The ceil 47 | ax.set_xlim(-0.32, 2.78) # The ceil 48 | 49 | plt.title("(b) PageRank-task-GC-time", fontsize=12) 50 | 51 | 52 | 53 | def autolabel(rects, loc, angle): 54 | for rect in rects: 55 | h = rect.get_height() 56 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.05*h, '%d'%int(h), 57 | ha='center', va='bottom', fontsize=11, rotation=angle) 58 | 59 | autolabel(rects1, 0, 0) 60 | autolabel(rects2, -0.01, 0) 61 | autolabel(rects3, 0.015, 0) 62 | 63 | plt.show() -------------------------------------------------------------------------------- /src/python/SVM/SVMTaskExecutionTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | plt.rc('pdf', fonttype=42) 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | fig = plt.figure(figsize=(3.4, 2.4)) 22 | ax = fig.add_subplot(111) 23 | plt.subplots_adjust(left=0.21, bottom=0.11, right=0.96, top=0.87, 24 | wspace=0.03, hspace=0.04) 25 | 26 | #plt.tight_layout() 27 | legend_properties = {'weight':'bold'} 28 | 29 | xvals = [503, 0, 70] # Paralell-0004-Executor-2-9.6min(1.1min) 30 | yvals = [473, 0, 68] # CMS-0017-Executor-16-9.6min(1.0min) 31 | #zvals = [0, 0, 0] 32 | 33 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 34 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 35 | #rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 36 | 37 | ax.set_ylabel('Time (s)', color='black') 38 | ax.set_xticks(ind+width/2) 39 | ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 40 | 41 | ax.legend( (rects1[0], rects2[0]), ('Parallel', 'CMS'), 42 | frameon=False, loc = "upper center", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 43 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 44 | ax.set_ylim(0, 1000) # The ceil 45 | #plt.xlim(-0.3, 2.76) # The ceil 46 | ax.set_xlim(-0.32, 2.70) # The ceil 47 | 48 | plt.title("(a) SVM-task-execution-time", fontsize=12) 49 | 50 | 51 | 52 | def autolabel(rects, loc, angle): 53 | for rect in rects: 54 | h = rect.get_height() 55 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.03*h, '%d'%int(h), 56 | ha='center', va='bottom', fontsize=11, rotation=angle) 57 | 58 | autolabel(rects1, 0, 45) 59 | autolabel(rects2, 0, 45) 60 | 61 | 62 | plt.show() -------------------------------------------------------------------------------- /src/python/PageRank/PageRankTaskExecutionTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | plt.rc('pdf', fonttype=42) 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.21, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | legend_properties = {'weight':'bold'} 29 | 30 | xvals = [766, 102, 678] 31 | yvals = [842, 91, 208] 32 | zvals = [1580, 215, 198] 33 | 34 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 35 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 36 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 37 | 38 | ax.set_ylabel('Time (s)', color='black') 39 | ax.set_xticks(ind+width) 40 | ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 41 | 42 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 43 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 44 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 45 | ax.set_ylim(0, 2500) # The ceil 46 | #plt.xlim(-0.3, 2.76) # The ceil 47 | ax.set_xlim(-0.32, 2.78) # The ceil 48 | 49 | plt.title("(a) PageRank-task-execution-time", fontsize=12) 50 | 51 | 52 | 53 | def autolabel(rects, loc, angle): 54 | for rect in rects: 55 | h = rect.get_height() 56 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.03*h, '%d'%int(h), 57 | ha='center', va='bottom', fontsize=11, rotation=angle) 58 | 59 | autolabel(rects1, -0.02, -45) 60 | autolabel(rects2, -0.02, -45) 61 | autolabel(rects3, -0.02, -45) 62 | 63 | plt.show() -------------------------------------------------------------------------------- /src/python/GroupBy/GroupBy-GC-Time.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.20, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | 29 | xvals = [17, 53, 0] 30 | yvals = [22, 9, 34] 31 | zvals = [14, 2, 28] 32 | 33 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 34 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 35 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 36 | 37 | ax.set_ylabel('GC time (s)', color='black') 38 | ax.set_xticks(ind+width) 39 | ax.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black' ) 40 | # ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 41 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, fontsize=10 ) 42 | 43 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 44 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 45 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 46 | 47 | ax.set_ylim(0, 100) # The ceil 48 | #plt.xlim(-0.3, 2.76) # The ceil 49 | ax.set_xlim(-0.32, 2.78) # The ceil 50 | 51 | #plt.title("(a) GroupBy-task-execution-time", fontsize=12) 52 | plt.title("(b) GroupBy-0.5-task-GC-time", fontsize=12) 53 | 54 | 55 | def autolabel(rects): 56 | for rect in rects: 57 | h = rect.get_height() 58 | ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h), 59 | ha='center', va='bottom', fontsize=11) 60 | 61 | autolabel(rects1) 62 | autolabel(rects2) 63 | autolabel(rects3) 64 | 65 | 66 | plt.show() -------------------------------------------------------------------------------- /src/python/Join-200G/JoinGCTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.20, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | 29 | xvals = [27, 2436, 0] 30 | yvals = [30, 1, 64] 31 | zvals = [80, 40, 631] 32 | 33 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 34 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 35 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 36 | 37 | ax.set_ylabel('GC time (s)', color='black') 38 | ax.set_xticks(ind+width) 39 | ax.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black' ) 40 | # ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 41 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, fontsize=10 ) 42 | 43 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 44 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 45 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 46 | 47 | ax.set_ylim(0, 3800) # The ceil 48 | #plt.xlim(-0.3, 2.76) # The ceil 49 | ax.set_xlim(-0.32, 2.78) # The ceil 50 | 51 | #plt.title("(a) GroupBy-task-execution-time", fontsize=12) 52 | plt.title("(b) Join-1.0-task-GC-time", fontsize=12) 53 | 54 | 55 | def autolabel(rects): 56 | for rect in rects: 57 | h = rect.get_height() 58 | ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h), 59 | ha='center', va='bottom', fontsize=11) 60 | 61 | autolabel(rects1) 62 | autolabel(rects2) 63 | autolabel(rects3) 64 | 65 | 66 | plt.show() -------------------------------------------------------------------------------- /src/python/AggregateByKey/AggregateByKey-GC-Time.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.20, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | 29 | xvals = [28, 980, 0] 30 | yvals = [30, 17, 129] 31 | zvals = [45, 19, 264] 32 | 33 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 34 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 35 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 36 | 37 | ax.set_ylabel('GC time (s)', color='black') 38 | ax.set_xticks(ind+width) 39 | ax.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black' ) 40 | # ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 41 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, fontsize=10 ) 42 | 43 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 44 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 45 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 46 | 47 | ax.set_ylim(0, 1500) # The ceil 48 | #plt.xlim(-0.3, 2.76) # The ceil 49 | ax.set_xlim(-0.32, 2.78) # The ceil 50 | 51 | #plt.title("(a) GroupBy-task-execution-time", fontsize=12) 52 | plt.title("(b) GroupBy-1.0-task-GC-time", fontsize=12) 53 | 54 | 55 | def autolabel(rects): 56 | for rect in rects: 57 | h = rect.get_height() 58 | ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h), 59 | ha='center', va='bottom', fontsize=11) 60 | 61 | autolabel(rects1) 62 | autolabel(rects2) 63 | autolabel(rects3) 64 | 65 | 66 | plt.show() -------------------------------------------------------------------------------- /src/python/SVM05/SVMTaskExecutionTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | plt.rc('pdf', fonttype=42) 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | fig = plt.figure(figsize=(3.4, 2.4)) 22 | ax = fig.add_subplot(111) 23 | plt.subplots_adjust(left=0.21, bottom=0.11, right=0.96, top=0.87, 24 | wspace=0.03, hspace=0.04) 25 | 26 | #plt.tight_layout() 27 | legend_properties = {'weight':'bold'} 28 | 29 | # id=15 Parallel 30 | # id=18 CMS 31 | # id=29 G1 32 | xvals = [163, 0, 24] # Paralell-0004-Executor-2-9.6min(1.1min) 33 | yvals = [159, 0, 16] # CMS-0017-Executor-16-9.6min(1.0min) 34 | zvals = [173, 0, 8] 35 | 36 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 37 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 38 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 39 | 40 | ax.set_ylabel('Time (s)', color='black') 41 | ax.set_xticks(ind+width) 42 | ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 43 | 44 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 45 | frameon=False, loc = "upper center", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 46 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 47 | ax.set_ylim(0, 300) # T"id" : "30"he ceil 48 | plt.xlim(-0.3, 2.76) # The ceil 49 | #ax.set_xlim(-0.32, 2.70) # The ceil 50 | 51 | plt.title("(a) SVM-0.5-task-execution-time", fontsize=12) 52 | 53 | 54 | 55 | def autolabel(rects, loc, angle): 56 | for rect in rects: 57 | h = rect.get_height() 58 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.03*h, '%d'%int(h), 59 | ha='center', va='bottom', fontsize=11, rotation=angle) 60 | 61 | autolabel(rects1, 0, 45) 62 | autolabel(rects2, 0, 45) 63 | autolabel(rects3, 0, 45) 64 | 65 | 66 | plt.show() -------------------------------------------------------------------------------- /src/main/java/util/JsonFileReader.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | 11 | public class JsonFileReader { 12 | 13 | public static String readFile(String fileName) { 14 | BufferedReader br = null; 15 | FileReader fr = null; 16 | StringBuilder sb = new StringBuilder(); 17 | 18 | try { 19 | fr = new FileReader(fileName); 20 | br = new BufferedReader(fr); 21 | String sCurrentLine; 22 | 23 | br = new BufferedReader(new FileReader(fileName)); 24 | 25 | while ((sCurrentLine = br.readLine()) != null) { 26 | sb.append(sCurrentLine).append("\r\n"); 27 | } 28 | } catch (IOException e) { 29 | e.printStackTrace(); 30 | } finally { 31 | try { 32 | if (br != null) 33 | br.close(); 34 | if (fr != null) 35 | fr.close(); 36 | } catch (IOException ex) { 37 | ex.printStackTrace(); 38 | } 39 | } 40 | 41 | return sb.toString(); 42 | } 43 | 44 | public static List readFileLines(String fileName) { 45 | BufferedReader br = null; 46 | FileReader fr = null; 47 | List lines = new ArrayList(); 48 | 49 | if (new File(fileName).exists() == false) 50 | return lines; 51 | 52 | try { 53 | fr = new FileReader(fileName); 54 | br = new BufferedReader(fr); 55 | String sCurrentLine; 56 | 57 | br = new BufferedReader(new FileReader(fileName)); 58 | 59 | while ((sCurrentLine = br.readLine()) != null) { 60 | lines.add(sCurrentLine); 61 | } 62 | } catch (IOException e) { 63 | System.err.println("Error in reading file: " + fileName); 64 | e.printStackTrace(); 65 | } finally { 66 | try { 67 | if (br != null) 68 | br.close(); 69 | if (fr != null) 70 | fr.close(); 71 | } catch (IOException ex) { 72 | ex.printStackTrace(); 73 | } 74 | } 75 | 76 | return lines; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/python/SVM/SVMTaskGCTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | plt.rc('pdf', fonttype=42) 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | fig = plt.figure(figsize=(3.4, 2.4)) 22 | ax = fig.add_subplot(111) 23 | plt.subplots_adjust(left=0.21, bottom=0.11, right=0.96, top=0.87, 24 | wspace=0.03, hspace=0.04) 25 | 26 | #plt.tight_layout() 27 | legend_properties = {'weight':'bold'} 28 | 29 | xvals = [8.2, 62, 0] 30 | yvals = [32, 36, 109] 31 | #zvals = [0, 0, 0] 32 | 33 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 34 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 35 | #rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 36 | 37 | ax.set_ylabel('GC time (s)', color='black') 38 | ax.set_xticks(ind+width/2) 39 | ax.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black')#, borderaxespad = 'bold') 40 | 41 | # ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 42 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 43 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 44 | 45 | ax.legend( (rects1[0], rects2[0]), ('Parallel', 'CMS'), 46 | frameon=False, loc = "upper center", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 47 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 48 | 49 | ax.set_ylim(0, 200) # The ceil 50 | #plt.xlim(-0.3, 2.76) # The ceil 51 | ax.set_xlim(-0.32, 2.70) # The ceil 52 | 53 | plt.title("(b) SVM-task-GC-time", fontsize=12) 54 | 55 | 56 | 57 | def autolabel(rects, loc, angle): 58 | for rect in rects: 59 | h = rect.get_height() 60 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.05*h, '%d'%int(h), 61 | ha='center', va='bottom', fontsize=11, rotation=angle) 62 | 63 | autolabel(rects1, 0, 0) 64 | autolabel(rects2, 0, 0) 65 | #autolabel(rects3, 0.015, 0) 66 | 67 | plt.show() -------------------------------------------------------------------------------- /src/main/java/util/AppListFetcher.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * Created by xulijie on 17-7-22. 8 | */ 9 | public class AppListFetcher { 10 | 11 | // [startId, endId] 12 | public static List fetch(String siteURL) { 13 | List appList = new ArrayList(); 14 | List lines = HtmlFetcher.fetchLines(siteURL); 15 | for (String line: lines) { 16 | if (line.trim().startsWith("") + 1; 18 | int end = line.lastIndexOf("<"); 19 | String appId = line.substring(start, end); 20 | System.out.println(appId); 21 | } 22 | } 23 | 24 | 25 | // app-20170721181818-0089 26 | return appList; 27 | } 28 | 29 | public static List fetchLocalFile(String siteURL, int startId, int endId) { 30 | List appList = new ArrayList(); 31 | List lines = JsonFileReader.readFileLines(siteURL); 32 | 33 | for (String line: lines) { 34 | if (line.trim().contains("app?appId=")) { 35 | int start = line.indexOf(">") + 1; 36 | int end = line.lastIndexOf("<"); 37 | String appId = line.substring(start, end); 38 | System.out.println(appId); 39 | } 40 | } 41 | 42 | 43 | // app-20170721181818-0089 44 | return appList; 45 | } 46 | 47 | public static void main(String[] args) { 48 | String url = "http://aliMaster:8080/"; 49 | //url = "/Users/xulijie/Documents/GCResearch/Experiments-2018/MasterUI/Join-0.5-100G/ISCAS Spark Master at spark___master_7077.htm"; 50 | //url = "/Users/xulijie/Documents/GCResearch/Experiments-2018/MasterUI/AggregateByKey-0.5-3/ISCAS Spark Master at spark___master_7077.htm"; 51 | //url = "/Users/xulijie/Documents/GCResearch/Experiments-2018/MasterUI/SQLGroupBy-1.0-200G-2/ISCAS Spark Master at spark___master_7077.htm"; 52 | //url = "/Users/xulijie/Documents/GCResearch/Experiments-2018/MasterUI/AggregateAndJoin-CPU/ISCAS Spark Master at spark___master_7077.htm"; 53 | url = "/Users/xulijie/Documents/GCResearch/Experiments-2018/MasterUI/Join-1.0-5G/ISCAS Spark Master at spark___master_7077.htm"; 54 | 55 | 56 | // fetch(url); 57 | fetchLocalFile(url, 0, 0); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/python/SVM05/SVMTaskGCTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | plt.rc('pdf', fonttype=42) 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | fig = plt.figure(figsize=(3.4, 2.4)) 22 | ax = fig.add_subplot(111) 23 | plt.subplots_adjust(left=0.21, bottom=0.11, right=0.96, top=0.87, 24 | wspace=0.03, hspace=0.04) 25 | 26 | #plt.tight_layout() 27 | legend_properties = {'weight':'bold'} 28 | 29 | 30 | xvals = [5, 19, 0] 31 | yvals = [14, 2, 36] # 24 concurrent GC cycles 32 | zvals = [7, 1, 6] 33 | 34 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 35 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 36 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 37 | 38 | ax.set_ylabel('GC time (s)', color='black') 39 | ax.set_xticks(ind+width) 40 | ax.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black')#, borderaxespad = 'bold') 41 | 42 | # ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 43 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 44 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 45 | 46 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 47 | frameon=False, loc = "upper center", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 48 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 49 | 50 | ax.set_ylim(0, 60) # The ceil 51 | plt.xlim(-0.3, 2.76) # The ceil 52 | #ax.set_xlim(-0.32, 2.70) # The ceil 53 | 54 | plt.title("(b) SVM-0.5-task-GC-time", fontsize=12) 55 | 56 | 57 | 58 | def autolabel(rects, loc, angle): 59 | for rect in rects: 60 | h = rect.get_height() 61 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.05*h, '%d'%int(h), 62 | ha='center', va='bottom', fontsize=11, rotation=angle) 63 | 64 | autolabel(rects1, 0, 0) 65 | autolabel(rects2, 0, 0) 66 | autolabel(rects3, 0, 0) 67 | 68 | plt.show() -------------------------------------------------------------------------------- /src/main/java/util/CommandRunner.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.io.BufferedInputStream; 4 | import java.io.BufferedReader; 5 | import java.io.IOException; 6 | import java.io.InputStreamReader; 7 | 8 | 9 | public class CommandRunner { 10 | 11 | public static void exec(String cmd) { 12 | System.out.println(cmd); 13 | Runtime run = Runtime.getRuntime(); 14 | try { 15 | Process p = run.exec(cmd); 16 | 17 | BufferedInputStream in = new BufferedInputStream(p.getInputStream()); 18 | BufferedReader inBr = new BufferedReader(new InputStreamReader(in)); 19 | String lineStr; 20 | while ((lineStr = inBr.readLine()) != null) 21 | System.out.println(lineStr); 22 | 23 | if (p.waitFor() != 0) { 24 | System.err.println("[" + p.exitValue() + "]Error in executing " + cmd); 25 | //if (p.exitValue() == 1) 26 | // System.err.println("Error in executing " + cmd); 27 | } 28 | inBr.close(); 29 | in.close(); 30 | 31 | System.out.println("[Done] " + cmd); 32 | } catch (InterruptedException e) { 33 | e.printStackTrace(); 34 | } catch (IOException e) { 35 | e.printStackTrace(); 36 | } catch (Exception e) { 37 | e.printStackTrace(); 38 | } 39 | } 40 | 41 | public static String execCurl(String cmd) { 42 | System.out.println("[Executing] " + cmd); 43 | StringBuilder sb = new StringBuilder(); 44 | 45 | Runtime run = Runtime.getRuntime(); 46 | try { 47 | Process p = run.exec(cmd); 48 | 49 | BufferedInputStream in = new BufferedInputStream(p.getInputStream()); 50 | BufferedReader inBr = new BufferedReader(new InputStreamReader(in)); 51 | String lineStr; 52 | while ((lineStr = inBr.readLine()) != null) 53 | sb.append(lineStr); 54 | 55 | if (p.waitFor() != 0) { 56 | if (p.exitValue() == 1) 57 | System.err.println("Error in executing " + cmd); 58 | } 59 | inBr.close(); 60 | in.close(); 61 | 62 | System.out.println("[Done] " + cmd); 63 | } catch (InterruptedException e) { 64 | e.printStackTrace(); 65 | } catch (IOException e) { 66 | e.printStackTrace(); 67 | } 68 | 69 | return sb.toString(); 70 | } 71 | 72 | 73 | public static void main(String[] args) { 74 | exec("curl --help"); 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/python/matplotlib/BrokenAxis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Broken axis example, where the y-axis will have a portion cut out. 3 | """ 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | 8 | 9 | 10 | # 30 points between [0, 0.2) originally made using np.random.rand(30)*.2 11 | pts = np.array([ 12 | 0.015, 0.166, 0.133, 0.159, 0.041, 0.024, 0.195, 0.039, 0.161, 0.018, 13 | 0.143, 0.056, 0.125, 0.096, 0.094, 0.051, 0.043, 0.021, 0.138, 0.075, 14 | 0.109, 0.195, 0.050, 0.074, 0.079, 0.155, 0.020, 0.010, 0.061, 0.008]) 15 | 16 | # Now let's make two outlier points which are far away from everything. 17 | pts[[3, 14]] += .8 18 | 19 | # If we were to simply plot pts, we'd lose most of the interesting 20 | # details due to the outliers. So let's 'break' or 'cut-out' the y-axis 21 | # into two portions - use the top (ax) for the outliers, and the bottom 22 | # (ax2) for the details of the majority of our data 23 | f, (ax, ax2) = plt.subplots(2, 1, sharex=True) 24 | 25 | # plot the same data on both axes 26 | ax.plot(pts) 27 | ax2.plot(pts) 28 | 29 | # zoom-in / limit the view to different portions of the data 30 | ax.set_ylim(.78, 1.) # outliers only 31 | ax2.set_ylim(0, .22) # most of the data 32 | 33 | # hide the spines between ax and ax2 34 | ax.spines['bottom'].set_visible(False) 35 | ax2.spines['top'].set_visible(False) 36 | ax.xaxis.tick_top() 37 | ax.tick_params(labeltop='off') # don't put tick labels at the top 38 | ax2.xaxis.tick_bottom() 39 | 40 | # This looks pretty good, and was fairly painless, but you can get that 41 | # cut-out diagonal lines look with just a bit more work. The important 42 | # thing to know here is that in axes coordinates, which are always 43 | # between 0-1, spine endpoints are at these locations (0,0), (0,1), 44 | # (1,0), and (1,1). Thus, we just need to put the diagonals in the 45 | # appropriate corners of each of our axes, and so long as we use the 46 | # right transform and disable clipping. 47 | 48 | d = .015 # how big to make the diagonal lines in axes coordinates 49 | # arguments to pass to plot, just so we don't keep repeating them 50 | kwargs = dict(transform=ax.transAxes, color='k', clip_on=False) 51 | ax.plot((-d, +d), (-d, +d), **kwargs) # top-left diagonal 52 | ax.plot((1 - d, 1 + d), (-d, +d), **kwargs) # top-right diagonal 53 | 54 | kwargs.update(transform=ax2.transAxes) # switch to the bottom axes 55 | ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs) # bottom-left diagonal 56 | ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs) # bottom-right diagonal 57 | 58 | # What's cool about this is that now if we vary the distance between 59 | # ax and ax2 via f.subplots_adjust(hspace=...) or plt.subplot_tool(), 60 | # the diagonal lines will move accordingly, and stay right at the tips 61 | # of the spines they are 'breaking' 62 | 63 | plt.show() -------------------------------------------------------------------------------- /src/python/Join-200G/JoinTaskExecutionTimeNoSplit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.20, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | legend_properties = {'weight':'bold'} 29 | 30 | xvals = [1147, 126, 2464] #Duration: 3737s, taskId: 2000, spillTime=126s, GC=2464s, YGC=27s, FGC=2436s (136 times) 31 | yvals = [1811, 60, 31] #Duration: 1902s, taskId: 2012, spillTime=60s, GC=31s, YGC=30s, FGC=1s (11 times), ConGC=64s 32 | #zvals = [1892, 57, 130] #Duration:2079s, taskId: 2013, spillTime=57s, GC=123s, YGC=86s, FGC=37s, ConGC=625s 33 | zvals = [1838, 128, 120] #Duration:2086s, taskId: 2014, spillTime=167s, GC=120s, YGC=80s, FGC=40s, ConGC=631s 34 | 35 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 36 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 37 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 38 | 39 | ax.set_ylabel('Time (s)', color='black') 40 | ax.set_xticks(ind+width) 41 | ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 42 | 43 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 44 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 45 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 46 | ax.set_ylim(0, 4000) # The ceil 47 | #plt.xlim(-0.3, 2.76) # The ceil 48 | ax.set_xlim(-0.32, 2.78) # The ceil 49 | 50 | plt.title("(a) Join-1.0-task-execution-time", fontsize=12) 51 | 52 | 53 | 54 | # def autolabel(rects): 55 | # for rect in rects: 56 | # h = rect.get_height() 57 | # ax.text(rect.get_x()+rect.get_width()/2., 1.04*h, '%d'%int(h), 58 | # ha='center', va='bottom', fontsize=11) 59 | 60 | def autolabel(rects, loc, angle): 61 | for rect in rects: 62 | h = rect.get_height() 63 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.03*h, '%d'%int(h), 64 | ha='center', va='bottom', fontsize=11, rotation=angle, weight='bold') 65 | 66 | autolabel(rects1, -0.08, -45) 67 | autolabel(rects2, -0.02, -45) 68 | autolabel(rects3, -0.02, -45) 69 | 70 | # autolabel(rects1) 71 | # autolabel(rects2) 72 | # autolabel(rects3) 73 | 74 | plt.show() -------------------------------------------------------------------------------- /src/python/AggregateByKey/AggregateByKeyTaskExecutionTime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | 5 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 6 | 7 | #plt.rc('font', family='Helvetica') 8 | # font = {'family' : 'Helvetica', 9 | # 'weight' : 'normal', 10 | # 'color' : 'black', 11 | # 'size' : '12'} 12 | 13 | 14 | plt.rc('font', family='Helvetica', size=12) 15 | 16 | N = 3 17 | ind = np.arange(N) # the x locations for the groups 18 | 19 | width = 0.23 # the width of the bars 20 | 21 | plt.rc('pdf', fonttype=42) 22 | fig = plt.figure(figsize=(3.4, 2.4)) 23 | ax = fig.add_subplot(111) 24 | plt.subplots_adjust(left=0.20, bottom=0.11, right=0.96, top=0.87, 25 | wspace=0.03, hspace=0.04) 26 | 27 | #plt.tight_layout() 28 | legend_properties = {'weight':'bold'} 29 | 30 | xvals = [365, 63, 1008] # D: 1436, GC 1008s (FGC: 980s, YGC: 28s, FGC: 84 times), Parallel-n1-E9-T13-id-1693, 90 times YGC 31 | yvals = [878, 89, 47] # D: 1014, taskId:1702, spillTime=89s, GC 47s (FGC: 17s, YGC: 30s, ConGC: 129s, 27 times), 521 times YGC 32 | zvals = [898, 224, 55] #Duration: 1177 , E9-T19-1699 spillTime=224s, GC:55s (FGC: 10s, YGC: 45s, ConGC: 264s, 52 times), 257 times YGC 33 | #zvals = [856, 276, 58] # Duration: 1190s, GC:58s (FGC:12s, YGC: 46s, ConGC: 262s, 57 times) 34 | 35 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 36 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 37 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 38 | 39 | ax.set_ylabel('Time (s)', color='black') 40 | ax.set_xticks(ind+width) 41 | ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 42 | 43 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 44 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 45 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 46 | ax.set_ylim(0, 1600) # The ceil 47 | #plt.xlim(-0.3, 2.76) # The ceil 48 | ax.set_xlim(-0.32, 2.78) # The ceil 49 | 50 | plt.title("(a) GroupBy-1.0-task-execution-time", fontsize=12) 51 | 52 | 53 | 54 | # def autolabel(rects): 55 | # for rect in rects: 56 | # h = rect.get_height() 57 | # ax.text(rect.get_x()+rect.get_width()/2., 1.04*h, '%d'%int(h), 58 | # ha='center', va='bottom', fontsize=11) 59 | 60 | def autolabel(rects, loc, angle): 61 | for rect in rects: 62 | h = rect.get_height() 63 | ax.text(rect.get_x()+rect.get_width()/2.+loc, 1.03*h, '%d'%int(h), 64 | ha='center', va='bottom', fontsize=11, rotation=angle) 65 | 66 | autolabel(rects1, -0.02, -40) 67 | autolabel(rects2, -0.02, -40) 68 | autolabel(rects3, -0.02, -40) 69 | 70 | # autolabel(rects1) 71 | # autolabel(rects2) 72 | # autolabel(rects3) 73 | 74 | plt.show() -------------------------------------------------------------------------------- /src/main/java/util/HtmlFetcher.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.InputStreamReader; 7 | import java.net.HttpURLConnection; 8 | import java.net.MalformedURLException; 9 | import java.net.ProtocolException; 10 | import java.net.URL; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | 14 | 15 | public class HtmlFetcher { 16 | 17 | public static String fetch(String siteURL) { 18 | 19 | HttpURLConnection connection = null; 20 | 21 | StringBuilder response = new StringBuilder(); 22 | 23 | try { 24 | 25 | URL url = new URL(siteURL); 26 | connection = (HttpURLConnection) url.openConnection(); 27 | connection.setRequestMethod("GET"); 28 | // connection.setConnectTimeout(8000); 29 | // connection.setReadTimeout(8000); 30 | String line; 31 | 32 | InputStream in = connection.getInputStream(); 33 | BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); 34 | 35 | while ((line = reader.readLine()) != null) { 36 | response.append(line).append("\r\n"); 37 | } 38 | 39 | reader.close(); 40 | 41 | } catch (IndexOutOfBoundsException e) { 42 | e.printStackTrace(); 43 | } catch (ProtocolException e) { 44 | e.printStackTrace(); 45 | } catch (MalformedURLException e) { 46 | e.printStackTrace(); 47 | } catch (IOException e) { 48 | e.printStackTrace(); 49 | } finally { 50 | if (connection != null) 51 | connection.disconnect(); 52 | } 53 | 54 | return response.toString(); 55 | } 56 | 57 | public static List fetchLines(String siteURL) { 58 | List list = new ArrayList(); 59 | 60 | HttpURLConnection connection = null; 61 | 62 | try { 63 | 64 | URL url = new URL(siteURL); 65 | connection = (HttpURLConnection) url.openConnection(); 66 | connection.setRequestMethod("GET"); 67 | // connection.setConnectTimeout(8000); 68 | // connection.setReadTimeout(8000); 69 | String line; 70 | 71 | InputStream in = connection.getInputStream(); 72 | BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); 73 | 74 | while ((line = reader.readLine()) != null) { 75 | list.add(line); 76 | 77 | } 78 | 79 | reader.close(); 80 | 81 | } catch (IndexOutOfBoundsException e) { 82 | e.printStackTrace(); 83 | } catch (ProtocolException e) { 84 | e.printStackTrace(); 85 | } catch (MalformedURLException e) { 86 | e.printStackTrace(); 87 | } catch (IOException e) { 88 | e.printStackTrace(); 89 | } finally { 90 | if (connection != null) 91 | connection.disconnect(); 92 | } 93 | 94 | return list; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/python/plotter/BoxPlotter2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib.cbook as cbook 4 | 5 | # fake data 6 | np.random.seed(937) 7 | data = np.random.lognormal(size=(37, 4), mean=1.5, sigma=1.75) 8 | 9 | print(data) 10 | labels = list('ABCD') 11 | print('labels:', labels) 12 | 13 | # compute the boxplot stats 14 | stats = cbook.boxplot_stats(data, labels=labels, bootstrap=10000) 15 | # After we've computed the stats, we can go through and change anything. 16 | # Just to prove it, I'll set the median of each set to the median of all 17 | # the data, and double the means 18 | print(stats) 19 | print(type(stats)) 20 | for n in range(len(stats)): 21 | stats[n]['med'] = np.median(data) 22 | stats[n]['mean'] *= 2 23 | 24 | 25 | fs = 10 # fontsize 26 | 27 | # demonstrate how to toggle the display of different elements: 28 | fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6, 6), sharey=True) 29 | axes[0, 0].bxp(stats) 30 | axes[0, 0].set_title('Default', fontsize=fs) 31 | 32 | axes[0, 1].bxp(stats, showmeans=True) 33 | axes[0, 1].set_title('showmeans=True', fontsize=fs) 34 | 35 | axes[0, 2].bxp(stats, showmeans=True, meanline=True) 36 | axes[0, 2].set_title('showmeans=True,\nmeanline=True', fontsize=fs) 37 | 38 | axes[1, 0].bxp(stats, showbox=False, showcaps=False) 39 | tufte_title = 'Tufte Style\n(showbox=False,\nshowcaps=False)' 40 | axes[1, 0].set_title(tufte_title, fontsize=fs) 41 | 42 | axes[1, 1].bxp(stats, shownotches=True) 43 | axes[1, 1].set_title('notch=True', fontsize=fs) 44 | 45 | axes[1, 2].bxp(stats, showfliers=False) 46 | axes[1, 2].set_title('showfliers=False', fontsize=fs) 47 | 48 | for ax in axes.flatten(): 49 | ax.set_yscale('log') 50 | ax.set_yticklabels([]) 51 | 52 | fig.subplots_adjust(hspace=0.4) 53 | plt.show() 54 | 55 | # demonstrate how to customize the display different elements: 56 | boxprops = dict(linestyle='--', linewidth=3, color='darkgoldenrod') 57 | flierprops = dict(marker='o', markerfacecolor='green', markersize=12, 58 | linestyle='none') 59 | medianprops = dict(linestyle='-.', linewidth=2.5, color='firebrick') 60 | meanpointprops = dict(marker='D', markeredgecolor='black', 61 | markerfacecolor='firebrick') 62 | meanlineprops = dict(linestyle='--', linewidth=2.5, color='purple') 63 | 64 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(6, 6), sharey=True) 65 | axes[0, 0].bxp(stats, boxprops=boxprops) 66 | axes[0, 0].set_title('Custom boxprops', fontsize=fs) 67 | 68 | axes[0, 1].bxp(stats, flierprops=flierprops, medianprops=medianprops) 69 | axes[0, 1].set_title('Custom medianprops\nand flierprops', fontsize=fs) 70 | 71 | axes[1, 0].bxp(stats, meanprops=meanpointprops, meanline=False, 72 | showmeans=True) 73 | axes[1, 0].set_title('Custom mean\nas point', fontsize=fs) 74 | 75 | axes[1, 1].bxp(stats, meanprops=meanlineprops, meanline=True, 76 | showmeans=True) 77 | axes[1, 1].set_title('Custom mean\nas line', fontsize=fs) 78 | 79 | for ax in axes.flatten(): 80 | ax.set_yscale('log') 81 | ax.set_yticklabels([]) 82 | 83 | fig.suptitle("I never said they'd be pretty") 84 | fig.subplots_adjust(hspace=0.4) 85 | plt.show() -------------------------------------------------------------------------------- /src/main/java/parser/StageTasksJsonParser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import appinfo.Application; 4 | import appinfo.Stage; 5 | import com.google.gson.*; 6 | import util.JsonFileReader; 7 | 8 | import java.io.File; 9 | import java.util.Map; 10 | 11 | 12 | public class StageTasksJsonParser { 13 | 14 | /** 15 | * 16 | * stageDir contains attempt-id.json and attempt-id-taskSummary.json 17 | * @param stageDir profiles/appName_appId/job-0/stage-0 18 | * @param jobId 0 19 | * @param stageId 0 20 | * @param app 21 | */ 22 | public void parseStageTasksJson(File stageDir, int jobId, int stageId, Application app) { 23 | 24 | Stage stage = app.getStage(stageId); 25 | 26 | for (File attemptFile : stageDir.listFiles()) { 27 | String attemptFileName = attemptFile.getName(); 28 | 29 | if (attemptFile.getName().contains("taskSummary")) { 30 | int stageAttemptId = Integer.parseInt(attemptFileName.substring(attemptFileName.indexOf('-') + 1, 31 | attemptFileName.lastIndexOf('-'))); 32 | 33 | parseTaskSummary(attemptFile, stage, stageAttemptId); 34 | } else { 35 | int stageAttemptId = Integer.parseInt(attemptFileName.substring(attemptFileName.indexOf('-') + 1, 36 | attemptFileName.lastIndexOf("json") - 1)); 37 | 38 | parseTasksJson(attemptFile, stage, stageAttemptId); 39 | } 40 | } 41 | 42 | } 43 | 44 | private void parseTaskSummary(File attemptFile, Stage stage, int stageAttemptId) { 45 | String taskSummaryJson = JsonFileReader.readFile(attemptFile.getAbsolutePath()); 46 | 47 | if (!taskSummaryJson.trim().isEmpty()) { 48 | try { 49 | JsonParser parser = new JsonParser(); 50 | JsonElement el = parser.parse(taskSummaryJson); 51 | stage.addTaskSummary(stageAttemptId, el.getAsJsonObject()); 52 | 53 | } catch (JsonIOException e) { 54 | e.printStackTrace(); 55 | } catch (JsonSyntaxException e) { 56 | e.printStackTrace(); 57 | } 58 | } 59 | 60 | } 61 | 62 | 63 | /** 64 | * @param attemptFile attempt-0.json 65 | */ 66 | private void parseTasksJson(File attemptFile, Stage stage, int stageAttemptId) { 67 | 68 | String stageTasksJson = JsonFileReader.readFile(attemptFile.getAbsolutePath()); 69 | 70 | try { 71 | JsonParser parser = new JsonParser(); 72 | JsonElement el = parser.parse(stageTasksJson); 73 | JsonObject tasksObject = el.getAsJsonObject().get("tasks").getAsJsonObject(); 74 | 75 | for (Map.Entry taskEntry : tasksObject.entrySet()) { 76 | JsonObject taskObject = taskEntry.getValue().getAsJsonObject(); 77 | stage.addTask(stageAttemptId, taskObject); 78 | } 79 | 80 | } catch (JsonIOException e) { 81 | e.printStackTrace(); 82 | } catch (JsonSyntaxException e) { 83 | e.printStackTrace(); 84 | } 85 | 86 | } 87 | 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/parser/JobsJsonParser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import appinfo.Application; 4 | import appinfo.Job; 5 | 6 | import com.google.gson.*; 7 | import util.HtmlFetcher; 8 | import util.FileTextWriter; 9 | 10 | 11 | import java.io.File; 12 | import java.util.List; 13 | 14 | 15 | public class JobsJsonParser { 16 | 17 | 18 | private String appDir; 19 | private String appURL; 20 | 21 | public JobsJsonParser(String appURL, String appDir) { 22 | this.appDir = appDir; 23 | this.appURL = appURL; 24 | } 25 | 26 | public JobsJsonParser() {} 27 | 28 | public void parseJobsJson(String jobsJson, Application app) { 29 | 30 | try { 31 | JsonParser parser = new JsonParser(); 32 | JsonElement el = parser.parse(jobsJson); 33 | JsonArray jobJsonArray = null; 34 | 35 | if (el.isJsonArray()) 36 | jobJsonArray = el.getAsJsonArray(); 37 | else { 38 | System.err.println("Error in parsing the jobs json!"); 39 | System.exit(1); 40 | } 41 | 42 | 43 | /* 44 | jobObject represents 45 | { 46 | "jobId" : 16, 47 | "name" : "aggregate at AreaUnderCurve.scala:45", 48 | "submissionTime" : "2017-05-30T16:25:43.699GMT", 49 | "completionTime" : "2017-05-30T16:25:44.455GMT", 50 | "stageIds" : [ 33, 31, 32 ], 51 | "status" : "SUCCEEDED", 52 | "numTasks" : 160, 53 | "numActiveTasks" : 0, 54 | "numCompletedTasks" : 33, 55 | "numSkippedTasks" : 127, 56 | "numFailedTasks" : 0, 57 | "numActiveStages" : 0, 58 | "numCompletedStages" : 1, 59 | "numSkippedStages" : 2, 60 | "numFailedStages" : 0 61 | } 62 | */ 63 | 64 | for (JsonElement jobElem : jobJsonArray) { 65 | JsonObject jobObject = jobElem.getAsJsonObject(); 66 | Job job = new Job(jobObject); 67 | app.addJob(job); 68 | } 69 | 70 | } catch (JsonIOException e) { 71 | e.printStackTrace(); 72 | } catch (JsonSyntaxException e) { 73 | e.printStackTrace(); 74 | } 75 | 76 | } 77 | 78 | public void saveJobsJson(Application app) { 79 | // "profiles/WordCount-CMS-4-28_app-20170618202557-0295/jobs.json" 80 | String jobsJsonFile = appDir + File.separatorChar + "jobs.json"; 81 | 82 | String jobsJson = HtmlFetcher.fetch(appURL + "/jobs"); 83 | FileTextWriter.write(jobsJsonFile, jobsJson); 84 | 85 | parseJobsJson(jobsJson, app); 86 | } 87 | 88 | 89 | // "profiles/WordCount-CMS-4-28_app-20170618202557-0295/jobId/stageId/" 90 | private void saveStagesJsonPerJob(Job job) { 91 | List stageIds = job.getStageIds(); 92 | for (Integer stageId : stageIds) { 93 | 94 | String stageURL = appURL + "/stages/" + stageId + "/"; 95 | } 96 | 97 | 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/util/DateParser.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.text.DateFormat; 4 | import java.text.ParseException; 5 | import java.text.SimpleDateFormat; 6 | import java.util.Date; 7 | import java.util.Locale; 8 | import java.util.TimeZone; 9 | 10 | /** 11 | * Created by xulijie on 17-6-23. 12 | */ 13 | 14 | public class DateParser { 15 | 16 | private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.CHINA); 17 | // e.g., date = "2017-05-30T16:25:43.699GMT" 18 | 19 | // "17/11/20 19:01:15" 20 | private static SimpleDateFormat timeFormat = new SimpleDateFormat("yy/MM/dd HH:mm:ss", Locale.CHINA); 21 | 22 | public static long parseDate(String dateString) { 23 | Date date = null; 24 | try { 25 | date = sdf.parse(dateString); 26 | } catch (ParseException e) { 27 | e.printStackTrace(); 28 | } 29 | 30 | return date.getTime(); 31 | } 32 | 33 | /* 34 | "submissionTime" : "2017-05-30T16:25:43.699GMT" 35 | "completionTime" : "2017-05-30T16:25:44.455GMT" 36 | return duration = completionTime - submissionTime (ms) 37 | */ 38 | public static long durationMS(String submissionTime, String completionTime) { 39 | long start = parseDate(submissionTime); 40 | long end = parseDate(completionTime); 41 | 42 | return end - start; 43 | } 44 | 45 | public static String getDate(long time) { 46 | SimpleDateFormat sdfChina = new SimpleDateFormat("HH:mm:ss"); 47 | sdfChina.setTimeZone(TimeZone.getTimeZone("GMT-8")); 48 | Date date = new Date(time); 49 | return sdfChina.format(date); 50 | } 51 | 52 | public static int getTimeValue(String hhmmss) { 53 | int value = 0; 54 | 55 | for (String s : hhmmss.split(":")) { 56 | value = Integer.parseInt(s.trim()) + value * 60; 57 | } 58 | 59 | return value; 60 | } 61 | 62 | public static long getTimeStamp(String startTime) { 63 | Date date = null; 64 | try { 65 | date = timeFormat.parse(startTime); 66 | } catch (ParseException e) { 67 | e.printStackTrace(); 68 | } 69 | 70 | return date.getTime() / 1000; 71 | } 72 | 73 | public static void main(String[] args) { 74 | String startTime = "2017-11-20T11:51:06.245GMT"; 75 | long duration = 4677; 76 | long endMS = DateParser.parseDate(startTime) + duration; 77 | 78 | String endTime = getDate(endMS); 79 | System.out.println("start = " + getDate(DateParser.parseDate(startTime))); 80 | System.out.println("end = " + endTime); 81 | 82 | System.out.println("03:02:30 = " + getTimeValue("03:02:30")); 83 | System.out.println("11:22:30 = " + getTimeValue("11:22:30")); 84 | System.out.println("23:59:59 = " + getTimeValue("23:59:59")); 85 | System.out.println("00:00:00 = " + getTimeValue("24:00:00")); 86 | System.out.println("00:01:59 = " + getTimeValue("00:01:59")); 87 | System.out.println("00:02:00 = " + getTimeValue("00:02:00")); 88 | 89 | 90 | startTime = "17/11/20 19:01:15"; 91 | long timeStamp = getTimeStamp(startTime); 92 | System.out.println(timeStamp); 93 | 94 | } 95 | 96 | 97 | } 98 | -------------------------------------------------------------------------------- /src/python/GroupBy/GroupBySkewTasks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | import os 5 | import json 6 | import io 7 | 8 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 9 | plt.rc('font', family='Helvetica', size=12) 10 | fig = plt.figure(figsize=(5.1, 2.4)) 11 | ax = fig.add_subplot(111) 12 | plt.subplots_adjust(left=0.11, bottom=0.11, right=0.98, top=0.87, 13 | wspace=0.03, hspace=0.04) 14 | 15 | 16 | 17 | file_Dir="D:/plot/executors_0/" 18 | jump=4 19 | 20 | #file_list=os.listdir(file_Dir) 21 | data_list=[] 22 | name_list=[] 23 | text_list=[] 24 | # for file in file_list: 25 | # json_dir=file_Dir+file+"/gcMetrics-"+file+".json" 26 | # json_file=json.load(io.open(json_dir,'r',encoding='utf-8')) 27 | # data=json_file["jvmHeapSize"]["oldGen"]["peakSize"] 28 | # type=data.split(" ") 29 | # if type[1]=="mb": 30 | # data=(float(type[0])/1024) 31 | # else: 32 | # data=(float(type[0])) 33 | # text_list.append((data,file)) 34 | # text_list=sorted(text_list,cmp = lambda x,y: cmp(x[0],y[0]),reverse=True) 35 | # print(text_list) 36 | data_list=map(lambda x:x[0],text_list) 37 | name_list=map(lambda x:x[1],text_list) 38 | print(data_list) 39 | print(name_list) 40 | data_list=[3.9, 3.62, 3.5, 3.4, 3.2, 3.1, 2.99, 2.85, 2.66, 2.43, 2.29, 1.98, 1.84, 1.43, 1.34, 1.23, 0.858984375, 0.677841796875, 0.537353515625, 0.394541015625, 0.227578125, 0.204443359375, 0.111806640625, 0.111796875, 0.111767578125, 0.11162109375, 0.1072265625, 0.10693359375, 0.106728515625, 0.0958203125, 0.09580078125, 0.091171875] 41 | name_list=['27', '11', '30', '19', '5', '26', '28', '18', '24', '13', '20', '2', '17', '29', '16', '15', '12', '3', '22', '14', '31', '7', '0', '21', '1', '9', '23', '6', '4', '8', '25', '10'] 42 | print(data_list) 43 | print(name_list) 44 | width = 1 # the width of the bars 45 | N = 32 #len(file_list) 46 | ind = np.arange(N) # the x locations for the groups 47 | xlab_int=np.arange(0,N,jump) 48 | 49 | xlab=map(lambda x:str(x+1),xlab_int) 50 | xlab.append('32') 51 | xlab_i=map(lambda x:int(x)+width,xlab_int) 52 | xlab_i.append(32) 53 | 54 | 55 | ax.bar(ind+width, data_list, width, color='lightpink', edgecolor='black')#, hatch='xxx') 56 | ax.hlines(3.29, 0, 33, colors = "black", linestyles = "dashed", linewidth=1) 57 | ax.hlines(3.69, 0, 33, colors = "black", linestyles = ":", linewidth=1) 58 | ax.hlines(3.70, 0, 33, colors = "black", linestyles = "-.", linewidth=1) 59 | ax.set_ylabel('Shuffled records (GB)', color='black') 60 | ax.set_xticks(xlab_i) 61 | ax.set_xticklabels(xlab, color='black' ) 62 | # ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 63 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, fontsize=10 ) 64 | 65 | #ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 66 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 67 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 68 | 69 | ax.set_ylim(0, 4.2) # The ceil 70 | 71 | #ax.set_xlim(-0.32, 2.78) # The ceil 72 | 73 | #plt.title("(a) GroupBy-task-execution-time", fontsize=12) 74 | plt.title("The distribution of shuffled records in 32 reduce tasks", fontsize=12) 75 | 76 | plt.show() -------------------------------------------------------------------------------- /src/python/statistics/BoxPlotStatistics.py: -------------------------------------------------------------------------------- 1 | class BoxPlotStatistics: 2 | # metric = ("app.duration", "Time (s)", 1000) 3 | def __init__(self, metric): 4 | self.name = metric[0] 5 | self.ylabel = metric[1] 6 | self.unit = metric[2] 7 | self.title = metric[3] 8 | 9 | self.Parallel = {} 10 | self.Parallel['label'] = 'Parallel' 11 | self.Parallel['E-1'] = {} 12 | self.Parallel['E-2'] = {} 13 | self.Parallel['E-4'] = {} 14 | 15 | self.CMS = {} 16 | self.CMS['label'] = 'CMS' 17 | self.CMS['E-1'] = {} 18 | self.CMS['E-2'] = {} 19 | self.CMS['E-4'] = {} 20 | 21 | self.G1 = {} 22 | self.G1['label'] = 'G1' 23 | self.G1['E-1'] = {} 24 | self.G1['E-2'] = {} 25 | self.G1['E-4'] = {} 26 | 27 | 28 | def addStatistics(self, line, fileName, withMax): 29 | self.name = line[line.find('[') + 1: line.find(']')] 30 | metrics = line[line.find(']') + 1:].replace(' ', '').split(',') 31 | 32 | stat = {} 33 | if fileName.lower().find("parallel") != -1: 34 | stat = self.Parallel 35 | elif fileName.lower().find("cms") != -1: 36 | stat = self.CMS 37 | elif fileName.lower().find("g1") != -1: 38 | stat = self.G1 39 | 40 | executorType = '' 41 | if fileName.lower().find("1-6656m") != -1: 42 | executorType = 'E-1' 43 | elif fileName.lower().find("2-13g") != -1: 44 | executorType = 'E-2' 45 | elif fileName.lower().find("4-26g") != -1: 46 | executorType = 'E-4' 47 | 48 | stat[executorType]['label'] = executorType 49 | stat[executorType]['fliers'] = [] 50 | 51 | for metric in metrics: 52 | metricName = metric.split('=')[0] 53 | metricValue = float(metric.split('=')[1]) / self.unit 54 | 55 | if(metricName == 'mean'): 56 | # stat[executorType]['mean'] = metricValue 57 | pass 58 | elif(metricName == 'median'): 59 | stat[executorType]['med'] = metricValue 60 | elif(metricName == 'min'): 61 | stat[executorType]['whislo'] = metricValue 62 | elif(metricName == 'max'): 63 | stat[executorType]['whishi'] = metricValue 64 | elif(metricName == 'quantile25'): 65 | stat[executorType]['q1'] = metricValue 66 | elif(metricName == 'quantile75'): 67 | stat[executorType]['q3'] = metricValue 68 | 69 | stat[executorType]['whislo'] = stat[executorType]['q1'] 70 | 71 | if(withMax == False): 72 | stat[executorType]['whishi'] = stat[executorType]['q3'] 73 | 74 | def checkAndFillNulls(self): 75 | for stat in [self.Parallel, self.CMS, self.G1]: 76 | for executorType in ['E-1', 'E-2', 'E-4']: 77 | if(stat[executorType].has_key('label') == False): 78 | stat[executorType]['label'] = executorType 79 | stat[executorType]['fliers'] = [] 80 | 81 | # stat[executorType]['mean'] = float('NaN') 82 | stat[executorType]['med'] = float('NaN') 83 | stat[executorType]['whislo'] = float('NaN') 84 | stat[executorType]['whishi'] = float('NaN') 85 | stat[executorType]['q1'] = float('NaN') 86 | stat[executorType]['q3'] = float('NaN') 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/python/AggregateByKey/AggregateByKeySpilledTasks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | import os 5 | import json 6 | import io 7 | plt.rc('pdf', fonttype=42) 8 | plt.rc('font', family='Helvetica', size=10) 9 | fig = plt.figure(figsize=(5.1, 2.4)) 10 | ax = fig.add_subplot(111) 11 | plt.subplots_adjust(left=0.10, bottom=0.18, right=0.98, top=0.87, 12 | wspace=0.03, hspace=0.04) 13 | file_Dir="D:/plot/executors_0/" 14 | jump=4 15 | 16 | #file_list=os.listdir(file_Dir) 17 | data_list=[] 18 | name_list=[] 19 | text_list=[] 20 | # for file in file_list: 21 | # json_dir=file_Dir+file+"/gcMetrics-"+file+".json" 22 | # json_file=json.load(io.open(json_dir,'r',encoding='utf-8')) 23 | # data=json_file["jvmHeapSize"]["oldGen"]["peakSize"] 24 | # type=data.split(" ") 25 | # if type[1]=="mb": 26 | # data=(float(type[0])/1024) 27 | # else: 28 | # data=(float(type[0])) 29 | # text_list.append((data,file)) 30 | #text_list=sorted(text_list,cmp = lambda x,y: cmp(x[0],y[0]),reverse=True) 31 | #print(text_list) 32 | data_list=map(lambda x:x[0],text_list) 33 | name_list=map(lambda x:x[1],text_list) 34 | print(data_list) 35 | print(name_list) 36 | 37 | data_list_1=[8.9, 8.8, 8.4, 8.3, 8.0, 7.7, 7.58] 38 | data_list_2=[7.32, 7.30, 7.28, 7.26, 7.21, 7.15, 7.13, 7.1, 39 | 7.08, 7.06, 6.99, 6.95, 6.95, 6.92, 6.89, 6.85, 6.83, 40 | 6.77, 6.72, 6.68, 6.62] 41 | data_list_3=[6.46, 6.2, 6.2, 6.0] 42 | name_list=['27', '11', '30', '19', '5', '26', '28', '18', '24', '13', '20', '2', '17', '29', '16', '15', '12', '3', '22', '14', '31', '7', '0', '21', '1', '9', '23', '6', '4', '8', '25', '10'] 43 | print(data_list) 44 | print(name_list) 45 | width = 1 # the width of the bars 46 | N = 32 # len(file_list) 47 | ind_1 = np.arange(len(data_list_1)) # the x locations for the groups 48 | ind_2 = np.arange(len(data_list_2)) 49 | ind_3 = np.arange(len(data_list_3)) 50 | xlab_int=np.arange(0,N,jump) 51 | 52 | xlab=map(lambda x:str(x+1),xlab_int) 53 | xlab.append('32') 54 | xlab_i=map(lambda x:int(x)+width,xlab_int) 55 | xlab_i.append(32) 56 | 57 | 58 | ax.bar(ind_1+width, data_list_1, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\\\',label="Spill twice with any GC") 59 | ax.bar(ind_2+width+len(data_list_1), data_list_2, width, color='lightpink', edgecolor='black', hatch='////',label="Spill twice with Parallel GC") 60 | ax.bar(ind_3+width+len(data_list_1)+len(data_list_2), data_list_3, width, color='lightpink', edgecolor='black',label="Spill only once with any GC") 61 | 62 | # ax.hlines(3.29 * 2, 0, 33, colors = "black", linestyles = "dashed", linewidth=1) 63 | # ax.hlines(3.69 * 2, 0, 33, colors = "black", linestyles = ":", linewidth=1) 64 | # ax.hlines(3.70 * 2, 0, 33, colors = "black", linestyles = "-.", linewidth=1) 65 | ax.set_ylabel('Shuffled records (GB)', color='black', fontsize=10) 66 | ax.set_xlabel('Task number', color='black', fontsize=10) 67 | ax.set_xticks(xlab_i) 68 | ax.set_xticklabels(xlab, color='black' ) 69 | ax.legend(loc = "upper right",frameon=False,fontsize=9, borderaxespad=0.1) 70 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, fontsize=10 ) 71 | 72 | #ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 73 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 74 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 75 | 76 | ax.set_ylim(0, 13) # The ceil 77 | 78 | #ax.set_xlim(-0.32, 2.78) # The ceil 79 | 80 | #plt.title("(a) GroupBy-task-execution-time", fontsize=12) 81 | #plt.title("The distribution of shuffled records in 32 reduce tasks", fontsize=12) 82 | 83 | plt.show() -------------------------------------------------------------------------------- /src/main/java/gc/ExecutorGCLogParserByGCPlot.java: -------------------------------------------------------------------------------- 1 | package gc; 2 | 3 | import com.sun.deploy.net.HttpRequest; 4 | 5 | import java.io.*; 6 | import java.net.URL; 7 | import java.net.URLConnection; 8 | 9 | /** 10 | * Created by xulijie on 17-9-1. 11 | */ 12 | public class ExecutorGCLogParserByGCPlot { 13 | 14 | 15 | // from http://www.cnblogs.com/zhuawang/archive/2012/12/08/2809380.html 16 | public static String sendPost(String url, String param) { 17 | PrintWriter out = null; 18 | BufferedReader in = null; 19 | String result = ""; 20 | try { 21 | URL realUrl = new URL(url); 22 | // 打开和URL之间的连接 23 | URLConnection conn = realUrl.openConnection(); 24 | // 设置通用的请求属性 25 | conn.setRequestProperty("accept", "*/*"); 26 | conn.setRequestProperty("connection", "Keep-Alive"); 27 | conn.setRequestProperty("user-agent", 28 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); 29 | // 发送POST请求必须设置如下两行 30 | conn.setDoOutput(true); 31 | conn.setDoInput(true); 32 | // 获取URLConnection对象对应的输出流 33 | out = new PrintWriter(conn.getOutputStream()); 34 | // 发送请求参数 35 | out.print(param); 36 | // flush输出流的缓冲 37 | out.flush(); 38 | // 定义BufferedReader输入流来读取URL的响应 39 | in = new BufferedReader( 40 | new InputStreamReader(conn.getInputStream())); 41 | String line; 42 | while ((line = in.readLine()) != null) { 43 | result += line; 44 | } 45 | } catch (Exception e) { 46 | System.out.println("发送 POST 请求出现异常!"+e); 47 | e.printStackTrace(); 48 | } 49 | //使用finally块来关闭输出流、输入流 50 | finally{ 51 | try{ 52 | if(out!=null){ 53 | out.close(); 54 | } 55 | if(in!=null){ 56 | in.close(); 57 | } 58 | } 59 | catch(IOException ex){ 60 | ex.printStackTrace(); 61 | } 62 | } 63 | return result; 64 | } 65 | 66 | public static void main(String[] args) { 67 | 68 | String executorFile = "/Users/xulijie/Documents/GCResearch/Experiments/profiles/GroupByRDD-0.5-2/GroupByRDD-Parallel-2-14G-0.5_app-20170721101243-0006/executors"; 69 | 70 | String gcLogFile = executorFile + File.separatorChar + "0" + File.separatorChar + "stdout"; 71 | 72 | System.out.println(gcLogFile); 73 | 74 | String sr = sendPost("http://localhost:8080/analyzeGC?apiKey=e094a34e-c3eb-4c9a-8254-f0dd107245cc", 75 | "token=ad7a623b201a6c839cd29b25467ac4cd794947fad97956ff0ce496b7c42ea66f&upload=@" + gcLogFile); 76 | System.out.println(sr); 77 | 78 | // curl -include --form upload=@/Users/xulijie/Documents/GCResearch/Experiments/profiles/GroupByRDD-0.5-2/GroupByRDD-Parallel-2-14G-0.5_app-20170721101243-0006/executors/0/stdout https://gs.gcplot.com/gc/jvm/log/process?token=ad7a623b201a6c839cd29b25467ac4cd794947fad97956ff0ce496b7c42ea66f 79 | 80 | 81 | // curl -X POST --data-binary @/Users/xulijie/Documents/GCResearch/Experiments/profiles/GroupByRDD-0.5-2/GroupByRDD-Parallel-2-14G-0.5_app-20170721101243-0006/executors/12/stdout http://localhost:8080/analyzeGC?apiKey=e094a34e-c3eb-4c9a-8254-f0dd107245cc --header "Content-Type:text" 82 | 83 | 84 | //curl -include --form upload=@/Users/xulijie/Documents/GCResearch/Experiments/profiles/GroupByRDD-0.5-2/GroupByRDD-Parallel-2-14G-0.5_app-20170721101243-0006/executors/12/stdout http://localhost:8080/analyzeGC?apiKey=e094a34e-c3eb-4c9a-8254-f0dd107245cc --header "Content-Type:text" 85 | 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/python/GroupBy/GroupBySkewTasks2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib as mpl 4 | import os 5 | import json 6 | import io 7 | plt.rc('pdf', fonttype=42) 8 | plt.rc('font', family='Helvetica', size=10) 9 | fig = plt.figure(figsize=(5.1, 2.4)) 10 | ax = fig.add_subplot(111) 11 | plt.subplots_adjust(left=0.10, bottom=0.18, right=0.98, top=0.87, 12 | wspace=0.03, hspace=0.04) 13 | file_Dir="D:/plot/executors_0/" 14 | jump=4 15 | 16 | #file_list=os.listdir(file_Dir) 17 | data_list=[] 18 | name_list=[] 19 | text_list=[] 20 | # for file in file_list: 21 | # json_dir=file_Dir+file+"/gcMetrics-"+file+".json" 22 | # json_file=json.load(io.open(json_dir,'r',encoding='utf-8')) 23 | # data=json_file["jvmHeapSize"]["oldGen"]["peakSize"] 24 | # type=data.split(" ") 25 | # if type[1]=="mb": 26 | # data=(float(type[0])/1024) 27 | # else: 28 | # data=(float(type[0])) 29 | # text_list.append((data,file)) 30 | #text_list=sorted(text_list,cmp = lambda x,y: cmp(x[0],y[0]),reverse=True) 31 | #print(text_list) 32 | data_list=map(lambda x:x[0],text_list) 33 | name_list=map(lambda x:x[1],text_list) 34 | print(data_list) 35 | print(name_list) 36 | 37 | data_list_1=[3.9] 38 | data_list_2=[3.65, 3.5, 3.4] 39 | data_list_3=[3.2, 3.1, 2.99, 2.85, 2.66, 2.43, 2.29, 1.98, 1.84, 1.43, 1.34, 1.23, 0.858984375, 0.677841796875, 0.537353515625, 0.394541015625, 0.227578125, 0.204443359375, 0.111806640625, 0.111796875, 0.111767578125, 0.11162109375, 0.1072265625, 0.10693359375, 0.106728515625, 0.0958203125, 0.09580078125, 0.091171875] 40 | name_list=['27', '11', '30', '19', '5', '26', '28', '18', '24', '13', '20', '2', '17', '29', '16', '15', '12', '3', '22', '14', '31', '7', '0', '21', '1', '9', '23', '6', '4', '8', '25', '10'] 41 | print(data_list) 42 | print(name_list) 43 | width = 1 # the width of the bars 44 | N = 32 # len(file_list) 45 | ind_1 = np.arange(len(data_list_1)) # the x locations for the groups 46 | ind_2 = np.arange(len(data_list_2)) 47 | ind_3 = np.arange(len(data_list_3)) 48 | xlab_int=np.arange(0,N,jump) 49 | 50 | xlab=map(lambda x:str(x+1),xlab_int) 51 | xlab.append('32') 52 | xlab_i=map(lambda x:int(x)+width,xlab_int) 53 | xlab_i.append(32) 54 | 55 | 56 | ax.bar(ind_1+width, data_list_1, width, color='lightgreen', edgecolor='black', hatch='xxxx',label="Trigger shuffle spill with any GC") 57 | ax.bar(ind_2+width+len(data_list_1), data_list_2, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\\\',label="Trigger shuffle spill with Parallel GC") 58 | ax.bar(ind_3+width+len(data_list_1)+len(data_list_2), data_list_3, width, color='lightpink', edgecolor='black',label="Without shuffle spill") 59 | 60 | ax.hlines(3.29, 0, 33, colors = "black", linestyles = "dashed", linewidth=1) 61 | ax.hlines(3.69, 0, 33, colors = "black", linestyles = ":", linewidth=1) 62 | ax.hlines(3.70, 0, 33, colors = "black", linestyles = "-.", linewidth=1) 63 | ax.set_ylabel('Shuffled records (GB)', color='black', fontsize=10) 64 | ax.set_xlabel('Task number', color='black', fontsize=10) 65 | ax.set_xticks(xlab_i) 66 | ax.set_xticklabels(xlab, color='black' ) 67 | ax.legend(loc = "right",frameon=False,fontsize=9) 68 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, fontsize=10 ) 69 | 70 | #ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 71 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 72 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 73 | 74 | ax.set_ylim(0, 4.2) # The ceil 75 | 76 | #ax.set_xlim(-0.32, 2.78) # The ceil 77 | 78 | #plt.title("(a) GroupBy-task-execution-time", fontsize=12) 79 | #plt.title("The distribution of shuffled records in 32 reduce tasks", fontsize=12) 80 | 81 | plt.show() -------------------------------------------------------------------------------- /src/main/java/appinfo/Job.java: -------------------------------------------------------------------------------- 1 | package appinfo; 2 | 3 | import com.google.gson.JsonArray; 4 | import com.google.gson.JsonElement; 5 | import com.google.gson.JsonObject; 6 | import util.DateParser; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | 12 | public class Job { 13 | 14 | private int jobId; 15 | private String name; 16 | private String submissionTime; 17 | private String completionTime; 18 | private List stageIds = new ArrayList(); 19 | 20 | // 1. SUCCEEDED (without any failed stages/tasks) 21 | // 2. FINISHED (with failed stages/tasks but completed) 22 | // 3. FAILED (does not finished with failed stages/tasks) 23 | 24 | private String status; 25 | private int numTasks; 26 | private int numActiveTasks; 27 | private int numCompletedTasks; 28 | private int numSkippedTasks; 29 | private int numFailedTasks; 30 | private int numActiveStages; 31 | private int numCompletedStages; 32 | private int numSkippedStages; 33 | private int numFailedStages; 34 | 35 | private long durationMS; 36 | 37 | 38 | public Job(JsonObject jobObject) { 39 | parse(jobObject); 40 | } 41 | 42 | /* 43 | jobObject: 44 | { 45 | "jobId" : 16, 46 | "name" : "aggregate at AreaUnderCurve.scala:45", 47 | "submissionTime" : "2017-05-30T16:25:43.699GMT", 48 | "completionTime" : "2017-05-30T16:25:44.455GMT", 49 | "stageIds" : [ 33, 31, 32 ], 50 | "status" : "SUCCEEDED", 51 | "numTasks" : 160, 52 | "numActiveTasks" : 0, 53 | "numCompletedTasks" : 33, 54 | "numSkippedTasks" : 127, 55 | "numFailedTasks" : 0, 56 | "numActiveStages" : 0, 57 | "numCompletedStages" : 1, 58 | "numSkippedStages" : 2, 59 | "numFailedStages" : 0 60 | } 61 | */ 62 | 63 | private void parse(JsonObject jobObject) { 64 | jobId = jobObject.get("jobId").getAsInt(); 65 | name = jobObject.get("name").getAsString(); 66 | submissionTime = jobObject.get("submissionTime").getAsString(); 67 | if (jobObject.get("completionTime") != null) 68 | completionTime = jobObject.get("completionTime").getAsString(); 69 | status = jobObject.get("status").getAsString(); 70 | numTasks = jobObject.get("numTasks").getAsInt(); 71 | numActiveTasks = jobObject.get("numActiveTasks").getAsInt(); 72 | numCompletedTasks = jobObject.get("numCompletedTasks").getAsInt(); 73 | numSkippedTasks = jobObject.get("numSkippedTasks").getAsInt(); 74 | numFailedTasks = jobObject.get("numFailedTasks").getAsInt(); 75 | numActiveStages = jobObject.get("numActiveStages").getAsInt(); 76 | numCompletedStages = jobObject.get("numCompletedStages").getAsInt(); 77 | numSkippedStages = jobObject.get("numSkippedStages").getAsInt(); 78 | numFailedStages = jobObject.get("numFailedStages").getAsInt(); 79 | 80 | if (jobObject.get("completionTime") != null) 81 | durationMS = DateParser.durationMS(submissionTime, completionTime); 82 | 83 | JsonArray stageIdsArray = jobObject.get("stageIds").getAsJsonArray(); 84 | 85 | for (JsonElement stageIdElem : stageIdsArray) { 86 | stageIds.add(stageIdElem.getAsInt()); 87 | } 88 | 89 | if (status.equals("SUCCEEDED") && numFailedTasks > 0) 90 | status = "FINISHED"; 91 | } 92 | 93 | public int getJobId() { 94 | return jobId; 95 | } 96 | 97 | public List getStageIds() { 98 | return stageIds; 99 | } 100 | 101 | public String getStatus() { 102 | return status; 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/appinfo/Task.java: -------------------------------------------------------------------------------- 1 | package appinfo; 2 | 3 | import com.google.gson.JsonObject; 4 | 5 | import java.util.Map; 6 | import java.util.TreeMap; 7 | 8 | 9 | /* 10 | { 11 | "taskId" : 0, 12 | "index" : 0, 13 | "attempt" : 0, 14 | "launchTime" : "2017-06-18T12:25:59.382GMT", 15 | "executorId" : "6", 16 | "host" : "172.26.80.237", 17 | "taskLocality" : "ANY", 18 | "speculative" : false, 19 | "accumulatorUpdates" : [ ], 20 | "taskMetrics" : { 21 | "executorDeserializeTime" : 682, 22 | "executorDeserializeCpuTime" : 66354869, 23 | "executorRunTime" : 14908, 24 | "executorCpuTime" : 10840279294, 25 | "resultSize" : 2572, 26 | "jvmGcTime" : 572, 27 | "resultSerializationTime" : 0, 28 | "memoryBytesSpilled" : 0, 29 | "diskBytesSpilled" : 0, 30 | "inputMetrics" : { 31 | "bytesRead" : 134283264, 32 | "recordsRead" : 4956261 33 | }, 34 | "outputMetrics" : { 35 | "bytesWritten" : 0, 36 | "recordsWritten" : 0 37 | }, 38 | "shuffleReadMetrics" : { 39 | "remoteBlocksFetched" : 0, 40 | "localBlocksFetched" : 0, 41 | "fetchWaitTime" : 0, 42 | "remoteBytesRead" : 0, 43 | "localBytesRead" : 0, 44 | "recordsRead" : 0 45 | }, 46 | "shuffleWriteMetrics" : { 47 | "bytesWritten" : 2636, 48 | "writeTime" : 452601, 49 | "recordsWritten" : 101 50 | } 51 | } 52 | } 53 | */ 54 | 55 | public class Task { 56 | 57 | private String appId; 58 | private String appName; 59 | private int stageId; 60 | private int taskId; 61 | 62 | private Map taskAttemptMap = new TreeMap(); 63 | 64 | public Task(String appId, String appName, int stageId, int taskId) { 65 | this.appId = appId; 66 | this.appName = appName; 67 | this.stageId = stageId; 68 | this.taskId = taskId; 69 | } 70 | 71 | public void addTaskAttempt(JsonObject taskObject) { 72 | 73 | TaskAttempt taskAttempt = new TaskAttempt(appId, appName, stageId, taskObject); 74 | // Note that the attemptId may not be consistent with the array index. 75 | taskAttemptMap.put(taskAttempt.getTaskAttemptId(), taskAttempt); 76 | } 77 | 78 | public TaskAttempt getCompletedTask() { 79 | 80 | for (TaskAttempt taskAttempt : taskAttemptMap.values()) { 81 | if (taskAttempt.getErrorMessage() == null) 82 | return taskAttempt; 83 | } 84 | 85 | return null; 86 | 87 | } 88 | 89 | public TaskAttempt getFailedTask() { 90 | for (TaskAttempt taskAttempt : taskAttemptMap.values()) { 91 | if (taskAttempt.getErrorMessage() != null) 92 | return taskAttempt; 93 | } 94 | 95 | return null; 96 | } 97 | 98 | public TaskAttempt getFirstCompletedTask() { 99 | TaskAttempt attempt0 = taskAttemptMap.get(0); 100 | 101 | // only consider the taskAttempt with attemptId = 0 102 | if (attempt0 != null && attempt0.getErrorMessage() == null) 103 | return attempt0; 104 | else 105 | return null; 106 | } 107 | 108 | public TaskAttempt getFirstTaskAttempt() { 109 | return taskAttemptMap.get(0); 110 | } 111 | 112 | public int getTaskId() { 113 | return taskId; 114 | } 115 | 116 | public int getStageId() { 117 | return stageId; 118 | } 119 | } 120 | 121 | -------------------------------------------------------------------------------- /src/python/plotter/BoxPlotter4.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | data = {} 6 | data['Parallel'] = {} 7 | data['CMS'] = {} 8 | data['G1'] = {} 9 | 10 | n = 5 11 | for k,v in data.iteritems(): 12 | upper = random.randint(0, 1000) 13 | #v['E-1'] = cbook.boxplot_stats(np.random.uniform(0, upper, size=n)) 14 | #v['E-2'] = cbook.boxplot_stats(np.random.uniform(0, upper, size=n)) 15 | #v['E-4'] = cbook.boxplot_stats(np.random.uniform(0, upper, size=n)) 16 | 17 | v['E-1'] = np.random.uniform(0, upper, size=n) 18 | v['E-2'] = np.random.uniform(0, upper, size=n) 19 | v['E-4'] = np.random.uniform(0, upper, size=n) 20 | 21 | 22 | fig, axes = plt.subplots(ncols=3, sharey=True) 23 | fig.subplots_adjust(wspace=0) 24 | 25 | for ax, name in zip(axes, ['Parallel', 'CMS', 'G1']): 26 | #ax.boxplot() 27 | 28 | list = [cbook.boxplot_stats(data[name][item], labels=[item]) for item in ['E-1', 'E-2', 'E-4']] 29 | print(list) 30 | ax.bxp(list) 31 | #ax.set(xticklabels=['E-1', 'E-2', 'E-4'], xlabel=name) 32 | ax.margins(0.05) # Optional 33 | 34 | plt.show() 35 | 36 | 37 | Parallel = {} 38 | CMS = {} 39 | G1 = {} 40 | 41 | Parallel['label'] = 'Parallel' 42 | Parallel['E-1'] = {} 43 | Parallel['E-1']['label'] = 'Parallel_1_7G' 44 | Parallel['E-1']['q1'] = 2.35 45 | Parallel['E-1']['med'] = 3.33 46 | Parallel['E-1']['q3'] = 14.85 47 | Parallel['E-1']['whishi'] = Parallel['E-1']['q3'] 48 | Parallel['E-1']['whislo'] = Parallel['E-1']['q1'] 49 | Parallel['E-1']['mean'] = 13.00 50 | Parallel['E-1']['fliers'] = [] 51 | 52 | Parallel['E-2'] = {} 53 | Parallel['E-2']['label'] = 'Parallel_2_14G' 54 | Parallel['E-2']['q1'] = 2 55 | Parallel['E-2']['med'] = 3.33 56 | Parallel['E-2']['q3'] = 14.85 57 | Parallel['E-2']['whishi'] = Parallel['E-2']['q3'] 58 | Parallel['E-2']['whislo'] = Parallel['E-2']['q1'] 59 | Parallel['E-2']['mean'] = 13.00 60 | Parallel['E-2']['fliers'] = [] 61 | 62 | Parallel['E-4'] = {} 63 | Parallel['E-4']['label'] = 'Parallel_4_28G' 64 | Parallel['E-4']['q1'] = 1.90 65 | Parallel['E-4']['med'] = 3.33 66 | Parallel['E-4']['q3'] = 14.85 67 | Parallel['E-4']['whishi'] = Parallel['E-4']['q3'] 68 | Parallel['E-4']['whislo'] = Parallel['E-4']['q1'] 69 | Parallel['E-4']['mean'] = 13.00 70 | Parallel['E-4']['fliers'] = [] 71 | 72 | CMS['label'] = 'CMS' 73 | CMS['E-1'] = {} 74 | CMS['E-1']['label'] = 'CMS_1_7G' 75 | CMS['E-1']['q1'] = 1.8 76 | CMS['E-1']['med'] = 3.33 77 | CMS['E-1']['q3'] = 14.85 78 | CMS['E-1']['whishi'] = CMS['E-1']['q3'] 79 | CMS['E-1']['whislo'] = CMS['E-1']['q1'] 80 | CMS['E-1']['mean'] = 13.00 81 | CMS['E-1']['fliers'] = [] 82 | 83 | CMS['E-2'] = {} 84 | CMS['E-2']['label'] = 'CMS_2_14G' 85 | CMS['E-2']['q1'] = 1.7 86 | CMS['E-2']['med'] = 3.33 87 | CMS['E-2']['q3'] = 14.85 88 | CMS['E-2']['whishi'] = CMS['E-2']['q3'] 89 | CMS['E-2']['whislo'] = CMS['E-2']['q1'] 90 | CMS['E-2']['mean'] = 13.00 91 | CMS['E-2']['fliers'] = [] 92 | 93 | CMS['E-4'] = {} 94 | CMS['E-4']['label'] = 'CMS_4_28G' 95 | CMS['E-4']['q1'] = 1.1 96 | CMS['E-4']['med'] = 3.33 97 | CMS['E-4']['q3'] = 14.85 98 | CMS['E-4']['whishi'] = CMS['E-4']['q3'] 99 | CMS['E-4']['whislo'] = CMS['E-4']['q1'] 100 | CMS['E-4']['mean'] = 13.00 101 | CMS['E-4']['fliers'] = [] 102 | 103 | 104 | G1['label'] = 'G1' 105 | G1['E-1'] = {} 106 | G1['E-1']['label'] = 'G1_1_7G' 107 | G1['E-1']['q1'] = 2.35 108 | G1['E-1']['med'] = 3.33 109 | G1['E-1']['q3'] = 14.85 110 | G1['E-1']['whishi'] = G1['E-1']['q3'] 111 | G1['E-1']['whislo'] = G1['E-1']['q1'] 112 | G1['E-1']['mean'] = 13.00 113 | G1['E-1']['fliers'] = [] 114 | 115 | G1['E-2'] = {} 116 | G1['E-2']['label'] = 'G1_2_14G' 117 | G1['E-2']['q1'] = 1.5 118 | G1['E-2']['med'] = 3.33 119 | G1['E-2']['q3'] = 14.85 120 | G1['E-2']['whishi'] = G1['E-2']['q3'] 121 | G1['E-2']['whislo'] = G1['E-2']['q1'] 122 | G1['E-2']['mean'] = 13.00 123 | G1['E-2']['fliers'] = [] 124 | 125 | G1['E-4'] = {} 126 | G1['E-4']['label'] = 'G1_4_28G' 127 | G1['E-4']['q1'] = 1.2 128 | G1['E-4']['med'] = 3.33 129 | G1['E-4']['q3'] = 14.85 130 | G1['E-4']['whishi'] = G1['E-4']['q3'] 131 | G1['E-4']['whislo'] = G1['E-4']['q1'] 132 | G1['E-4']['mean'] = 13.00 133 | G1['E-4']['fliers'] = [] -------------------------------------------------------------------------------- /src/python/plotter/BoxPlotter3.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import random 4 | import matplotlib.cbook as cbook 5 | 6 | Parallel = {} 7 | CMS = {} 8 | G1 = {} 9 | 10 | Parallel['label'] = 'Parallel' 11 | Parallel['E-1'] = {} 12 | Parallel['E-1']['label'] = 'Parallel_1_7G' 13 | Parallel['E-1']['q1'] = 2.35 14 | Parallel['E-1']['med'] = 3.33 15 | Parallel['E-1']['q3'] = 14.85 16 | Parallel['E-1']['whishi'] = Parallel['E-1']['q3'] 17 | Parallel['E-1']['whislo'] = Parallel['E-1']['q1'] 18 | Parallel['E-1']['mean'] = 13.00 19 | Parallel['E-1']['fliers'] = [] 20 | 21 | Parallel['E-2'] = {} 22 | Parallel['E-2']['label'] = 'Parallel_2_14G' 23 | Parallel['E-2']['q1'] = 2 24 | Parallel['E-2']['med'] = 3.33 25 | Parallel['E-2']['q3'] = 14.85 26 | Parallel['E-2']['whishi'] = Parallel['E-2']['q3'] 27 | Parallel['E-2']['whislo'] = Parallel['E-2']['q1'] 28 | Parallel['E-2']['mean'] = 13.00 29 | Parallel['E-2']['fliers'] = [] 30 | 31 | Parallel['E-4'] = {} 32 | Parallel['E-4']['label'] = 'Parallel_4_28G' 33 | Parallel['E-4']['q1'] = 1.90 34 | Parallel['E-4']['med'] = 3.33 35 | Parallel['E-4']['q3'] = 14.85 36 | Parallel['E-4']['whishi'] = Parallel['E-4']['q3'] 37 | Parallel['E-4']['whislo'] = Parallel['E-4']['q1'] 38 | Parallel['E-4']['mean'] = 13.00 39 | Parallel['E-4']['fliers'] = [] 40 | 41 | CMS['label'] = 'CMS' 42 | CMS['E-1'] = {} 43 | CMS['E-1']['label'] = 'CMS_1_7G' 44 | CMS['E-1']['q1'] = 1.8 45 | CMS['E-1']['med'] = 3.33 46 | CMS['E-1']['q3'] = 14.85 47 | CMS['E-1']['whishi'] = CMS['E-1']['q3'] 48 | CMS['E-1']['whislo'] = CMS['E-1']['q1'] 49 | CMS['E-1']['mean'] = 13.00 50 | CMS['E-1']['fliers'] = [] 51 | 52 | CMS['E-2'] = {} 53 | CMS['E-2']['label'] = 'CMS_2_14G' 54 | CMS['E-2']['q1'] = 1.7 55 | CMS['E-2']['med'] = 3.33 56 | CMS['E-2']['q3'] = 14.85 57 | CMS['E-2']['whishi'] = CMS['E-2']['q3'] 58 | CMS['E-2']['whislo'] = CMS['E-2']['q1'] 59 | CMS['E-2']['mean'] = 13.00 60 | CMS['E-2']['fliers'] = [] 61 | 62 | CMS['E-4'] = {} 63 | CMS['E-4']['label'] = 'CMS_4_28G' 64 | CMS['E-4']['q1'] = 1.1 65 | CMS['E-4']['med'] = 3.33 66 | CMS['E-4']['q3'] = 14.85 67 | CMS['E-4']['whishi'] = CMS['E-4']['q3'] 68 | CMS['E-4']['whislo'] = CMS['E-4']['q1'] 69 | CMS['E-4']['mean'] = 13.00 70 | CMS['E-4']['fliers'] = [] 71 | 72 | 73 | G1['label'] = 'G1' 74 | G1['E-1'] = {} 75 | G1['E-1']['label'] = 'G1_1_7G' 76 | G1['E-1']['q1'] = 2.35 77 | G1['E-1']['med'] = 3.33 78 | G1['E-1']['q3'] = 14.85 79 | G1['E-1']['whishi'] = G1['E-1']['q3'] 80 | G1['E-1']['whislo'] = G1['E-1']['q1'] 81 | G1['E-1']['mean'] = 13.00 82 | G1['E-1']['fliers'] = [] 83 | 84 | G1['E-2'] = {} 85 | G1['E-2']['label'] = 'G1_2_14G' 86 | G1['E-2']['q1'] = 1.5 87 | G1['E-2']['med'] = 3.33 88 | G1['E-2']['q3'] = 14.85 89 | G1['E-2']['whishi'] = G1['E-2']['q3'] 90 | G1['E-2']['whislo'] = G1['E-2']['q1'] 91 | G1['E-2']['mean'] = 13.00 92 | G1['E-2']['fliers'] = [] 93 | 94 | G1['E-4'] = {} 95 | G1['E-4']['label'] = 'G1_4_28G' 96 | G1['E-4']['q1'] = 1.2 97 | G1['E-4']['med'] = 3.33 98 | G1['E-4']['q3'] = 14.85 99 | G1['E-4']['whishi'] = 18 100 | G1['E-4']['whislo'] = 0.3 101 | G1['E-4']['mean'] = 13.00 102 | G1['E-4']['fliers'] = [] 103 | 104 | fig, axes = plt.subplots(nrows=2, ncols=3, sharey=False, figsize=(8,9)) 105 | fig.subplots_adjust(wspace=0) 106 | 107 | for ax, stats in zip(axes[0], [Parallel, CMS, G1]): 108 | list = [stats['E-1'], stats['E-2'], stats['E-4']] 109 | ax.bxp(list, showfliers=False, showcaps=False, meanline=False, showmeans=True) 110 | # ax.set(xticklabels=['E1', 'E2', 'E4'], xlabel=stats['label']) 111 | ax.set_xticklabels([]) 112 | 113 | ax2 = ax.twiny() # ax2 is responsible for "top" axis and "right" axis 114 | ax2.set_xticks(ax.get_xticks()) 115 | ax2.set_xticklabels(['5', '4', '3']) 116 | ax2.set_xlim(ax.get_xlim()) 117 | 118 | ax.margins(0.05) # Optional 119 | 120 | for ax, stats in zip(axes[1], [Parallel, CMS, G1]): 121 | list = [stats['E-1'], stats['E-2'], stats['E-4']] 122 | ax.bxp(list, showfliers=False, showcaps=False, meanline=False, showmeans=True) 123 | ax.set(xticklabels=['E1', 'E2', 'E4'], xlabel=stats['label']) 124 | 125 | ax2 = ax.twiny() # ax2 is responsible for "top" axis and "right" axis 126 | ax2.set_xticks(ax.get_xticks()) 127 | ax2.set_xticklabels(['5', '4', '3']) 128 | ax2.set_xlim(ax.get_xlim()) 129 | 130 | ax.margins(0.05) # Optional 131 | 132 | plt.show() -------------------------------------------------------------------------------- /src/main/java/generalGC/HeapUsage.java: -------------------------------------------------------------------------------- 1 | package generalGC; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * Created by xulijie on 17-12-22. 8 | */ 9 | public class HeapUsage { 10 | 11 | private List youngGen = new ArrayList(); 12 | private List oldGen = new ArrayList(); 13 | private List metaGen = new ArrayList(); 14 | 15 | /* 16 | public void addYoungUsage(double time, double usage, double allocated, String gcType, String gcCause) { 17 | Usage yUsage = new Usage(time, usage, allocated, gc, "Young"); 18 | 19 | if (!youngGen.isEmpty() && youngGen.get(youngGen.size() - 1).allocated != allocated) 20 | youngGen.add(new Usage(time, usage, youngGen.get(youngGen.size() - 1).allocated, gc, "Young")); 21 | youngGen.add(yUsage); 22 | } 23 | 24 | public void addOldUsage(double time, double usage, double allocated, String gc) { 25 | Usage oUsage = new Usage(time, usage, allocated, gc, "Old"); 26 | if (!oldGen.isEmpty() && oldGen.get(oldGen.size() - 1).allocated != allocated) 27 | oldGen.add(new Usage(time, usage, oldGen.get(oldGen.size() - 1).allocated, gc, "Old")); 28 | oldGen.add(oUsage); 29 | } 30 | 31 | public void addMetaUsage(double time, double usage, double allocated, String gc) { 32 | Usage mUsage = new Usage(time, usage, allocated, gc, "Metaspace"); 33 | if (!metaGen.isEmpty() && metaGen.get(metaGen.size() - 1).allocated != allocated) 34 | metaGen.add(new Usage(time, usage, metaGen.get(metaGen.size() - 1).allocated, gc, "Metaspace")); 35 | metaGen.add(mUsage); 36 | } 37 | */ 38 | 39 | public void display() { 40 | System.out.println("============ Young Generation ============"); 41 | for(Usage usage : youngGen) 42 | System.out.println(usage); 43 | 44 | System.out.println("============ Old Generation ============"); 45 | for(Usage usage : oldGen) 46 | System.out.println(usage); 47 | 48 | System.out.println("============ Metaspace Generation ============"); 49 | for(Usage usage : metaGen) 50 | System.out.println(usage); 51 | } 52 | 53 | public String toString() { 54 | StringBuilder sb = new StringBuilder(); 55 | 56 | for(Usage usage : youngGen) 57 | sb.append(usage + "\n"); 58 | 59 | for(Usage usage : oldGen) 60 | sb.append(usage + "\n"); 61 | 62 | for(Usage usage : metaGen) 63 | sb.append(usage + "\n"); 64 | 65 | return sb.toString(); 66 | } 67 | 68 | // usage.addUsage("YGC", offsetTime, yBeforeMB, yAfterMB, youngMB, oldBeforeMB, oldAfterMB, oldMB, ygcSeconds, gcCause); 69 | public void addUsage(String gcType, double offsetTime, double yBeforeMB, double yAfterMB, double youngMB, double oldBeforeMB, 70 | double oldAfterMB, double oldMB, double gcSeconds, String gcCause) { 71 | 72 | Usage yUsage = new Usage("Young", gcType, offsetTime, yBeforeMB, yAfterMB, youngMB, gcSeconds, gcCause); 73 | youngGen.add(yUsage); 74 | 75 | //if (oldBeforeMB != oldAfterMB) { 76 | Usage oUsage = new Usage("Old", gcType, offsetTime, oldBeforeMB, oldAfterMB, oldMB, gcSeconds, gcCause); 77 | oldGen.add(oUsage); 78 | //} 79 | // System.out.println(gcType + " " + offsetTime + " " + yBeforeMB + " " + yAfterMB + " " + youngMB + " " + gcCause); 80 | } 81 | } 82 | 83 | class Usage { 84 | String gen; 85 | String gcType; 86 | double offsetTime; 87 | double beforeGC; 88 | double afterGC; 89 | double allocated; 90 | double gcPauseSec; 91 | String gcCause; 92 | 93 | public Usage(String gen, String gcType, double offsetTime, double beforeGC, double afterGC, double allocated, double gcPauseSec, String gcCause) { 94 | this.gen = gen; 95 | this.gcType = gcType; 96 | this.offsetTime = offsetTime; 97 | this.beforeGC = beforeGC; 98 | this.afterGC = afterGC; 99 | this.allocated = allocated; 100 | this.gcPauseSec = gcPauseSec; 101 | this.gcCause = gcCause; 102 | } 103 | 104 | public String toString() { 105 | return "[" + gen + "](" + gcType + ") time = " + offsetTime + ", beforeGC = " 106 | + beforeGC + ", afterGC = " + afterGC + ", allocated = " + allocated 107 | + ", gcPause = " + gcPauseSec + "s, gcCause = " + gcCause; 108 | } 109 | } -------------------------------------------------------------------------------- /src/test/java/ComputeStatistics.java: -------------------------------------------------------------------------------- 1 | import util.RelativeDifference; 2 | import util.Statistics; 3 | 4 | import java.util.ArrayList; 5 | import java.util.Arrays; 6 | import java.util.List; 7 | 8 | /** 9 | * Created by xulijie on 17-11-13. 10 | */ 11 | public class ComputeStatistics { 12 | 13 | public static void computeMean(String name, double[] values) { 14 | Statistics statistics = new Statistics(values); 15 | System.out.println("[" + name + "] mean = " + 16 | String.format("%.1f", statistics.getMean())); 17 | } 18 | 19 | public static void main(String[] args) { 20 | 21 | double[] values = {3.4, 1.8, 1.7}; 22 | computeMean("GroupBy-Parallel-0.5", values); 23 | 24 | values = new double[]{2.5, 1.6, 1.7}; 25 | computeMean("GroupBy-CMS-0.5", values); 26 | 27 | values = new double[]{2.6, 1.5, 1.3}; 28 | computeMean("GroupBy-G1-0.5", values); 29 | 30 | values = new double[]{6.6, 3.8}; 31 | computeMean("GroupBy-Parallel-1.0", values); 32 | 33 | values = new double[]{2.8, 4.2}; 34 | computeMean("GroupBy-CMS-1.0", values); 35 | 36 | values = new double[]{2.8, 2.7}; 37 | computeMean("GroupBy-G1-1.0", values); 38 | 39 | 40 | values = new double[]{4.7, 4.7, 4.7}; 41 | computeMean("Join-Parallel-0.5", values); 42 | 43 | values = new double[]{3.8, 3.7, 3.7}; 44 | computeMean("Join-CMS-0.5", values); 45 | 46 | values = new double[]{4.3, 4.1, 4.1}; 47 | computeMean("Join-G1-0.5", values); 48 | 49 | values = new double[]{70.7, 14.2, 13.4}; 50 | computeMean("Join-Parallel-1.0", values); 51 | 52 | values = new double[]{10.9, 11.0, 11.8}; 53 | computeMean("Join-CMS-1.0", values); 54 | 55 | values = new double[]{11.7, 11.5, 11.2}; 56 | computeMean("Join-G1-1.0", values); 57 | 58 | 59 | 60 | 61 | values = new double[]{19.1, 20.3, 22.7}; 62 | computeMean("PageRank-Parallel-0.5", values); 63 | 64 | values = new double[]{20.7, 18.9, 24.2}; 65 | computeMean("PageRank-CMS-0.5", values); 66 | 67 | values = new double[]{36.5, 31.8, 38.0}; 68 | computeMean("PageRank-G1-0.5", values); 69 | 70 | 71 | values = new double[]{6.2, 5.8, 6.0}; 72 | computeMean("SVM-Parallel-0.5", values); 73 | 74 | values = new double[]{6.0, 5.9, 6.1}; 75 | computeMean("SVM-CMS-0.5", values); 76 | 77 | values = new double[]{6.1, 5.8, 5.7}; 78 | computeMean("SVM-G1-0.5", values); 79 | 80 | values = new double[]{15.2, 14.2, 14.0}; 81 | computeMean("SVM-Parallel-1.0", values); 82 | 83 | values = new double[]{14.7, 14.6, 14.3}; 84 | computeMean("SVM-CMS-1.0", values); 85 | 86 | values = new double[]{13.9, 13.6}; 87 | computeMean("SVM-G1-1.0", values); 88 | 89 | 90 | double[] doubles = new double[]{2.3, 1.9, 1.8}; 91 | compareAppDuration(doubles); 92 | 93 | doubles = new double[]{5.2, 3.5, 2.8}; 94 | compareAppDuration(doubles); 95 | 96 | doubles = new double[]{4.7, 3.7, 4.2}; 97 | compareAppDuration(doubles); 98 | 99 | doubles = new double[]{32.8, 11.2, 11.5}; 100 | compareAppDuration(doubles); 101 | 102 | doubles = new double[]{20.7, 21.3, 35.4}; 103 | compareAppDuration(doubles); 104 | 105 | doubles = new double[]{14.5, 14.5, 13.8}; 106 | compareAppDuration(doubles); 107 | 108 | } 109 | 110 | public static void compareAppDuration(double[] values) { 111 | Arrays.sort(values); 112 | 113 | double initDuration = 0; 114 | StringBuilder sb = new StringBuilder(); 115 | boolean first = true; 116 | 117 | for (double d : values) { 118 | 119 | double relativeDiff = RelativeDifference.getRelativeDifference(initDuration, d) * 100; 120 | String label = ""; 121 | if (relativeDiff > 20) 122 | label = "<<"; 123 | else if (relativeDiff > 10) 124 | label = "<"; 125 | else if (relativeDiff >= 0) 126 | label = "~"; 127 | else 128 | label = "!"; 129 | 130 | initDuration = d; 131 | if (first) { 132 | first = false; 133 | sb.append(d); 134 | } else { 135 | sb.append(label + d + "(" + (int) relativeDiff + ")"); 136 | } 137 | } 138 | 139 | System.out.println("\t" + sb.toString()); 140 | 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/python/plotter/GroupBoxPlotter.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | class GroupBoxPlotter: 4 | @staticmethod 5 | def plotStatisticsByGCAlgo(file, firstSucessfulAppNum, secondSucessfulAppNum, 6 | firstStatistics, secondStatistics, sharedy): 7 | if (sharedy): 8 | fig, axes = plt.subplots(nrows=2, ncols=3, sharey=True) #, figsize=(8,7.6)) # sharey='row') 9 | else: 10 | fig, axes = plt.subplots(nrows=2, ncols=3, sharey='row') #, figsize=(8,7.6)) # sharey='row') 11 | fig.subplots_adjust(wspace=0) 12 | 13 | 14 | Parallel = firstStatistics.Parallel 15 | CMS = firstStatistics.CMS 16 | G1 = firstStatistics.G1 17 | 18 | colors = ['pink', 'lightblue', 'lightgreen'] 19 | 20 | i = 0 21 | 22 | for ax, stats in zip(axes[0], [Parallel, CMS, G1]): 23 | list = [stats['E-1'], stats['E-2'], stats['E-4']] 24 | 25 | bplot = ax.bxp(list, showfliers=False, showmeans=False, patch_artist=True) 26 | for patch, color in zip(bplot['boxes'], colors): 27 | patch.set_facecolor(color) 28 | 29 | # ax.set_xticklabels(['E1', 'E2', 'E4'], fontsize=20) 30 | ax.set_xticklabels([]) 31 | # ax.set_xlabel(xlabel=stats['label'], fontsize=22) 32 | ax.tick_params(axis='y', labelsize=20) 33 | 34 | ax2 = ax.twiny() # ax2 is responsible for "top" axis and "right" axis 35 | ax2.set_xticks(ax.get_xticks()) 36 | ax2.set_xticklabels(firstSucessfulAppNum[i], fontsize=20, y=0.98) 37 | i += 1 38 | ax2.set_xlim(ax.get_xlim()) 39 | 40 | ax.margins(0.05) # Optional 41 | # if (firstStatistics.title.endswith("shuffleReadBytes")): 42 | # print(secondStatistics.title) 43 | # print(list) 44 | # if (firstStatistics.title.endswith("shuffleWriteBytes")): 45 | # print(secondStatistics.title) 46 | # print(list) 47 | # if (firstStatistics.title.endswith("inputBytes")): 48 | # print(secondStatistics.title) 49 | # print(list) 50 | # if (firstStatistics.title.endswith("outputBytes")): 51 | # print(secondStatistics.title) 52 | # print(list) 53 | # if (secondStatistics.title.endswith("resultSize")): 54 | # print(secondStatistics.title) 55 | # print(list) 56 | 57 | 58 | axes[0][0].set_ylabel(firstStatistics.ylabel, fontsize=20) 59 | fig.suptitle(firstStatistics.title, fontsize=22, y=1.02) 60 | 61 | 62 | 63 | Parallel = secondStatistics.Parallel 64 | CMS = secondStatistics.CMS 65 | G1 = secondStatistics.G1 66 | 67 | colors = ['pink', 'lightblue', 'lightgreen'] 68 | 69 | i = 0 70 | 71 | for ax, stats in zip(axes[1], [Parallel, CMS, G1]): 72 | list = [stats['E-1'], stats['E-2'], stats['E-4']] 73 | 74 | bplot = ax.bxp(list, showfliers=False, showmeans=False, patch_artist=True) 75 | for patch, color in zip(bplot['boxes'], colors): 76 | patch.set_facecolor(color) 77 | 78 | ax.set_xticklabels(['E1', 'E2', 'E4'], fontsize=20) 79 | ax.set_xlabel(xlabel=stats['label'], fontsize=22) 80 | ax.tick_params(axis='y', labelsize=20) 81 | 82 | ax2 = ax.twiny() # ax2 is responsible for "top" axis and "right" axis 83 | ax2.set_xticks(ax.get_xticks()) 84 | ax2.set_xticklabels(secondSucessfulAppNum[i], fontsize=20, y=0.98) 85 | i += 1 86 | ax2.set_xlim(ax.get_xlim()) 87 | 88 | ax.margins(0.05) # Optional 89 | 90 | # if (secondStatistics.title.endswith("shuffleReadBytes")): 91 | # print(secondStatistics.title) 92 | # print(list) 93 | # if (secondStatistics.title.endswith("shuffleWriteBytes")): 94 | # print(secondStatistics.title) 95 | # print(list) 96 | # if (secondStatistics.title.endswith("inputBytes")): 97 | # print(secondStatistics.title) 98 | # print(list) 99 | # if (secondStatistics.title.endswith("outputBytes")): 100 | # print(secondStatistics.title) 101 | # print(list) 102 | # if (secondStatistics.title.endswith("resultSize")): 103 | # print(secondStatistics.title) 104 | # print(list) 105 | 106 | 107 | axes[1][0].set_ylabel(secondStatistics.ylabel, fontsize=20) 108 | 109 | 110 | plt.savefig(file, dpi=150, bbox_inches='tight') -------------------------------------------------------------------------------- /src/main/java/analyzer/ExecutorAnalyzer.java: -------------------------------------------------------------------------------- 1 | package analyzer; 2 | 3 | import appinfo.*; 4 | ; 5 | import com.google.gson.*; 6 | import jdk.nashorn.internal.objects.DataPropertyDescriptor; 7 | import profiler.SparkAppProfiler; 8 | 9 | 10 | import util.DateParser; 11 | import util.FileTextWriter; 12 | import util.JsonFileReader; 13 | import util.RelativeDifference; 14 | 15 | import java.io.File; 16 | import java.io.FileWriter; 17 | import java.util.*; 18 | 19 | /** 20 | * Created by xulijie on 17-11-9. 21 | */ 22 | public class ExecutorAnalyzer { 23 | 24 | public void analyzeSlaveTopMetrics(String executorTopMetricsPath, String slave) { 25 | // key = appName_slaveName_PID 26 | List slaveTopMetrics = new ArrayList(); 27 | 28 | List topMetricsLines = JsonFileReader.readFileLines(executorTopMetricsPath); 29 | String time = ""; 30 | double cpu = 0; 31 | double memory = 0; 32 | 33 | for (String line : topMetricsLines) { 34 | if (line.startsWith("top")) 35 | time = line.substring(line.indexOf("-") + 2, line.indexOf("up") - 1); 36 | // %Cpu(s): 54.2 us, 2.0 sy, 0.0 ni, 34.6 id, 8.7 wa, 0.0 hi, 0.6 si, 0.0 st 37 | // KiB Mem : 32947020 total, 28891956 free, 2518352 used, 1536712 buff/cache 38 | if (line.startsWith("%Cpu")) 39 | cpu = 100 - Double.parseDouble(line.substring(line.indexOf("ni") + 4, line.indexOf("id") - 1)); 40 | if (line.startsWith("KiB Mem")) { 41 | memory = Double.parseDouble(line.substring(line.indexOf("free") + 5, line.indexOf("used") - 1).trim()); 42 | slaveTopMetrics.add(new TopMetrics(time, cpu, memory / 1024 / 1024)); 43 | } 44 | } 45 | 46 | StringBuilder sb = new StringBuilder(); 47 | sb.append("\n[Top Metrics][" + slave + "]\n"); 48 | for (TopMetrics topMetrics : slaveTopMetrics) 49 | sb.append(topMetrics + "\n"); 50 | 51 | System.out.println(sb.toString()); 52 | } 53 | 54 | public static void main(String args[]) { 55 | 56 | String appJsonRootDir = "/Users/xulijie/Documents/GCResearch/Experiments-11-17/profiles/"; 57 | 58 | /* 59 | String applicationName = "GroupBy"; 60 | int[] selectedStageIds = new int[]{1}; 61 | 62 | String appJsonDir = appJsonRootDir + "GroupByRDD-0.5"; 63 | appJsonDir = appJsonRootDir + "GroupByRDD-1.0"; 64 | TaskComparator comparator = new TaskComparator(applicationName, selectedStageIds, appJsonDir, metrics, true); 65 | comparator.computeRelativeDifference(); 66 | */ 67 | 68 | /* 69 | String applicationName = "Join"; 70 | int[] selectedStageIds = new int[]{2}; 71 | String appJsonDir0 = appJsonRootDir + "RDDJoin-0.5"; 72 | String appJsonDir1 = appJsonRootDir + "RDDJoin-1.0"; 73 | SlowestTaskComparator comparator = new SlowestTaskComparator( 74 | applicationName, selectedStageIds, 75 | appJsonDir0, appJsonDir1, metrics, false, false); 76 | comparator.computeRelativeDifference(); 77 | */ 78 | 79 | 80 | String appName = "RDDJoin-1.0"; 81 | // String applicationName = "rjoin-Parallel-1-6656m-1.0-n1"; 82 | // String slave = "aliSlave4"; 83 | 84 | // String applicationName = "rjoin-CMS-1-6656m-1.0-n4"; 85 | // String slave = "aliSlave6"; 86 | 87 | String applicationName = "rjoin-G1-1-6656m-1.0-n3"; 88 | String slave = "aliSlave1"; 89 | 90 | 91 | /* 92 | 93 | String applicationName = "SVM"; 94 | int[] selectedStageIds = new int[]{4, 6, 8, 10, 12, 14, 16, 18, 20, 22}; 95 | // int[] selectedStageIds = new int[]{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}; 96 | String appJsonDir0 = appJsonRootDir + "SVM-0.5"; 97 | String appJsonDir1 = appJsonRootDir + "SVM-1.0"; 98 | SlowestTaskComparator comparator = new SlowestTaskComparator(applicationName, selectedStageIds, appJsonDir0, appJsonDir1, metrics, false, slowestmode); 99 | comparator.computeRelativeDifference(); 100 | */ 101 | 102 | // String appName = "PageRank-0.5"; 103 | // String applicationName = "PageRank-Parallel-1-6656m-0.5-n4"; 104 | // String slave = "aliSlave7"; 105 | 106 | // String applicationName = "PageRank-CMS-1-6656m-0.5-n2"; 107 | // String slave = "aliSlave7"; 108 | 109 | // String applicationName = "PageRank-G1-1-6656m-0.5-n3"; 110 | // String slave = "aliSlave5"; 111 | 112 | String slaveTopMetricsFile = appJsonRootDir + appName + "/topMetrics/" + slave + "/" + applicationName + ".txt"; 113 | 114 | ExecutorAnalyzer analyzer = new ExecutorAnalyzer(); 115 | analyzer.analyzeSlaveTopMetrics(slaveTopMetricsFile, slave); 116 | 117 | 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/main/java/gc/G1GCViewerLogParser.java: -------------------------------------------------------------------------------- 1 | package gc; 2 | 3 | import generalGC.HeapUsage; 4 | import util.FileTextWriter; 5 | import util.JsonFileReader; 6 | 7 | import java.util.List; 8 | 9 | /** 10 | * Created by xulijie on 18-4-5. 11 | */ 12 | public class G1GCViewerLogParser { 13 | private HeapUsage usage = new HeapUsage(); 14 | 15 | private double STWPauseTime = 0; 16 | private double youngGCTime = 0; 17 | private double fullGCTime = 0; 18 | 19 | public void parse(String logFile) { 20 | List lines = JsonFileReader.readFileLines(logFile); 21 | 22 | for (String line : lines) { 23 | line = line.trim(); 24 | 25 | if (line.startsWith("[201")) 26 | parseGCRecord(line); 27 | } 28 | display(); 29 | } 30 | 31 | public GCStatistics parseStatistics(String logFile) { 32 | List lines = JsonFileReader.readFileLines(logFile); 33 | 34 | for (String line : lines) { 35 | line = line.trim(); 36 | 37 | if (line.startsWith("[201")) 38 | parseGCRecord(line); 39 | } 40 | 41 | return new GCStatistics(STWPauseTime, youngGCTime, fullGCTime, 0); 42 | } 43 | 44 | private void parseGCRecord(String line) { 45 | String gcType = ""; 46 | 47 | if (line.contains("[GC pause") && line.contains("(young)")) { 48 | gcType = "YGC"; 49 | } 50 | 51 | if (line.contains("[Full GC") 52 | || line.contains("(young) (initial-mark)") 53 | || line.contains("[GC cleanup") 54 | || line.contains("[GC remark") 55 | || line.contains("[GC concurrent") 56 | || line.contains("(mixed)")) { 57 | gcType = "FGC"; 58 | } 59 | 60 | if (line.contains("[Eden")) { 61 | // [2017-11-20T18:54:36.579+0800][9.032] 62 | int endTime = line.indexOf(']', line.indexOf("][") + 2); 63 | // 9.032 64 | double offsetTime = Double.parseDouble(line.substring(line.indexOf("][") + 2, endTime)); 65 | int gcCauseIndex = line.indexOf("] [") + 3; 66 | // GC (Allocation Failure) 67 | String gcCause = line.substring(gcCauseIndex, line.indexOf('[', gcCauseIndex) - 1); 68 | 69 | 70 | int EdenIndex = line.indexOf("Eden") + 6; 71 | // 735138K->257048K(1514496K) 72 | String Eden = line.substring(EdenIndex, line.indexOf(',', EdenIndex)); 73 | double yBeforeMB = computeMB(Eden.substring(0, Eden.indexOf('K'))); 74 | double yAfterMB = computeMB(Eden.substring(Eden.indexOf('>') + 1, Eden.indexOf("K("))); 75 | double youngMB = computeMB(Eden.substring(Eden.indexOf('(') + 1, Eden.indexOf("K)"))); 76 | // System.out.println(PSYoungGen); 77 | // System.out.println(" yBeforeMB = " + yBeforeMB + ", yAfterMB = " + yAfterMB + ", youngMB = " + youngMB); 78 | 79 | // 129024K->15319K(494592K) 80 | int heapUsageIndex = line.lastIndexOf("] ") + 2; 81 | // int heapUsageIndex = line.indexOf("] ", EdenIndex) + 2; 82 | String heapUsage = line.substring(heapUsageIndex, line.indexOf(',', heapUsageIndex)); 83 | double heapBeforeMB = computeMB(heapUsage.substring(0, heapUsage.indexOf('K'))); 84 | double heapAfterMB = computeMB(heapUsage.substring(heapUsage.indexOf('>') + 1, heapUsage.indexOf("K("))); 85 | double heapMB = computeMB(heapUsage.substring(heapUsage.indexOf('(') + 1, heapUsage.indexOf("K)"))); 86 | 87 | double oldBeforeMB = heapBeforeMB - yBeforeMB; 88 | double oldAfterMB = heapAfterMB - yAfterMB; 89 | double oldMB = heapMB - youngMB; 90 | 91 | double gcSeconds = Double.parseDouble(line.substring(line.lastIndexOf(", ") + 2, line.lastIndexOf(" secs"))); 92 | 93 | usage.addUsage(gcType, offsetTime, yBeforeMB, yAfterMB, youngMB, oldBeforeMB, oldAfterMB, oldMB, gcSeconds, gcCause); 94 | 95 | STWPauseTime += gcSeconds; 96 | 97 | if (gcType.equals("FGC")) 98 | fullGCTime += gcSeconds; 99 | else 100 | youngGCTime += gcSeconds; 101 | } 102 | 103 | } 104 | 105 | /* 106 | 2017-11-22T10:03:14.403+0800: 596.322: [GC pause (G1 Evacuation Pause) (young) 596.322: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 64042, predicted base time: 43.99 ms, remaining time: 156.01 ms, target pause time: 200.00 ms] 107 | 108 | */ 109 | 110 | public double computeMB(String KB) { 111 | return (double) Long.parseLong(KB) / 1024; 112 | } 113 | 114 | public void outputUsage(String outputFile) { 115 | FileTextWriter.write(outputFile, usage.toString()); 116 | } 117 | 118 | public void display() { 119 | System.out.println(usage.toString()); 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /src/main/java/util/JxlUtil.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | import java.io.File; 6 | import java.io.FileInputStream; 7 | import java.io.FileOutputStream; 8 | import java.io.IOException; 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | import java.util.Map.Entry; 12 | 13 | import jxl.Cell; 14 | import jxl.Sheet; 15 | import jxl.Workbook; 16 | import jxl.read.biff.BiffException; 17 | import jxl.write.Label; 18 | import jxl.write.WritableSheet; 19 | import jxl.write.WritableWorkbook; 20 | import jxl.write.WriteException; 21 | 22 | /** 23 | * Created by Lijie on 2017/2/13. 24 | * Modified by YE on 2017/3/20. 25 | */ 26 | 27 | public class JxlUtil { 28 | 29 | private String filePath; 30 | 31 | public String getPath() { 32 | return filePath; 33 | } 34 | 35 | public void setPath(String filePath) { 36 | this.filePath = filePath; 37 | } 38 | 39 | 40 | public Map>> parse() { 41 | File file = new File(filePath); 42 | if (!file.exists() || !file.getName().endsWith(".xls")) { 43 | try { 44 | throw new Exception("要解析的路径有问题: " + filePath); 45 | } catch (Exception e) { 46 | e.printStackTrace(); 47 | } 48 | } 49 | Map>> listListMap = new HashMap>>(); 50 | Workbook workBook = null; 51 | FileInputStream fis = null; 52 | try { 53 | fis = new FileInputStream(file); 54 | workBook = Workbook.getWorkbook(fis); 55 | Sheet[] sheetArray = workBook.getSheets(); 56 | for (int i = 0; sheetArray != null && i < sheetArray.length; i++) { 57 | Sheet sheet = sheetArray[i]; 58 | List> listList = parseSheet(sheet); 59 | if (listList != null && listList.size() > 0) { 60 | listListMap.put(sheet.getName(), listList); 61 | } 62 | } 63 | } catch (BiffException e) { 64 | System.out.println("解析文件发生异常: " + e); 65 | } catch (IOException e) { 66 | System.out.println("解析文件发生异常: " + e); 67 | } finally { 68 | try { 69 | if (workBook != null) { 70 | workBook.close(); 71 | workBook = null; 72 | } 73 | if (fis != null) { 74 | fis.close(); 75 | fis = null; 76 | } 77 | } catch (Exception e) { 78 | System.out.println("关闭文件流发生异常: " + e); 79 | } 80 | } 81 | return listListMap; 82 | } 83 | 84 | private List> parseSheet(Sheet sheet) { 85 | List> listList = new ArrayList>(); 86 | int rowCount = sheet.getRows(); 87 | for (int i = 1; i < rowCount; i++) { 88 | List list = new ArrayList(); 89 | Cell[] cellArray = sheet.getRow(i); 90 | for (int j = 0; cellArray != null && j < cellArray.length; j++) { 91 | list.add(cellArray[j].getContents()); 92 | } 93 | listList.add(list); 94 | } 95 | return listList; 96 | } 97 | 98 | 99 | public boolean write(Map>> listListMap) throws WriteException { 100 | File file = new File(filePath); 101 | boolean result = false; 102 | WritableWorkbook workBook = null; 103 | FileOutputStream fos = null; 104 | try { 105 | fos = new FileOutputStream(file); 106 | workBook = Workbook.createWorkbook(fos); 107 | int sheetNo = 0; 108 | for (Entry>> entry : listListMap.entrySet()) { 109 | String key = entry.getKey(); 110 | List> listList = entry.getValue(); 111 | WritableSheet sheet = workBook.createSheet(key, sheetNo++); 112 | for (int i = 0; i < listList.size(); i++) { 113 | List list = listList.get(i); 114 | for (int j = 0; j < list.size(); j++) { 115 | Label label = new Label(j, i, list.get(j)); 116 | sheet.addCell(label); 117 | } 118 | } 119 | } 120 | workBook.write(); 121 | System.out.println("成功写入文件"); 122 | } catch (Exception e) { 123 | System.out.println("写入文件发生异常: " + e); 124 | } finally { 125 | try { 126 | if (workBook != null) { 127 | workBook.close(); 128 | } 129 | if (fos != null) { 130 | fos.close(); 131 | } 132 | } catch (IOException e) { 133 | System.out.println("关闭文件流发生异常: " + e); 134 | } 135 | } 136 | return result; 137 | } 138 | } -------------------------------------------------------------------------------- /src/python/plotter/TimeSeriesPlotter.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.dates as mdates 3 | import matplotlib as mpl 4 | import os, sys 5 | 6 | from datetime import datetime 7 | from reader import FileReader 8 | 9 | def plotExecutorAndWorkerUsage(appName, slowestTasksDir): 10 | for dir in os.listdir(slowestTasksDir): 11 | if (dir.startswith(".DS") == False): 12 | for topMetricsFile in os.listdir(os.path.join(slowestTasksDir, dir)): 13 | if (topMetricsFile.endswith(".txt")): 14 | plotResourceUsage(os.path.join(slowestTasksDir, dir, topMetricsFile), slowestTasksDir, dir) 15 | 16 | 17 | def plotResourceUsage(topMetricsFile, slowestTasksDir, appName): 18 | fileLines = FileReader.readLines(topMetricsFile) 19 | 20 | isExecutorMetric = False 21 | isSlaveMetric = False 22 | 23 | executorTime = [] 24 | executorCPU = [] 25 | executorMemory = [] 26 | 27 | slaveTime = [] 28 | slaveCPU = [] 29 | slaveMemory = [] 30 | 31 | for line in fileLines: 32 | if (line.startswith("[Top Metrics][Executor")): 33 | isExecutorMetric = True 34 | elif (line.startswith("[Top Metrics][aliSlave")): 35 | isSlaveMetric = True 36 | isExecutorMetric = False 37 | 38 | elif(isExecutorMetric == True and line.strip() != ""): 39 | time = line[line.find('[') + 1: line.find(']')] 40 | cpu = line[line.find('=') + 2: line.find(',')] 41 | memory = line[line.find('Memory') + 9:] 42 | executorTime.append(datetime.strptime(time, '%H:%M:%S')) 43 | executorCPU.append(float(cpu)) 44 | executorMemory.append(float(memory)) 45 | 46 | elif(isSlaveMetric == True and line.strip() != ""): 47 | time = line[line.find('[') + 1: line.find(']')] 48 | cpu = line[line.find('=') + 2: line.find(',')] 49 | memory = line[line.find('Memory') + 9:] 50 | slaveTime.append(datetime.strptime(time, '%H:%M:%S')) 51 | slaveCPU.append(float(cpu)) 52 | slaveMemory.append(float(memory)) 53 | 54 | 55 | fig, axes = plt.subplots(nrows=2, ncols=1, sharey=False, sharex= True) 56 | # locator = mpl.dates.MinuteLocator() 57 | xfmt = mdates.DateFormatter('%H:%M:%S') 58 | #ax.xaxis.set_major_locator(locator) 59 | axes[0].xaxis.set_major_formatter(xfmt) 60 | axes[1].xaxis.set_major_formatter(xfmt) 61 | axes[0].set_ylabel("Executor CPU (%)", color='r') 62 | axes[0].tick_params('y', colors='r') 63 | axes[1].set_ylabel("Worker CPU (%)", color='r') 64 | axes[1].tick_params('y', colors='r') 65 | axes[0].set_ylim(0, 840) # The ceil 66 | axes[1].set_ylim(0, 105) # The ceil 67 | # plt.ylim(0, statistics.max) # The ceil 68 | # plt.legend() 69 | fig.autofmt_xdate() 70 | 71 | axes[0].plot_date(executorTime, executorCPU, '-r', label='CPU') 72 | axes[1].plot_date(slaveTime, slaveCPU, '-r', label='CPU') 73 | 74 | 75 | ax12 = axes[0].twinx() 76 | ax12.plot_date(executorTime, executorMemory, '-b', label='Memory') 77 | ax12.set_ylabel('Executor Memory (GB)', color='b') 78 | ax12.tick_params('y', colors='b') 79 | ax12.set_ylim(0, 32) # The ceil 80 | # ax12.tick_params('y', colors='r') 81 | ax22 = axes[1].twinx() 82 | ax22.plot_date(slaveTime, slaveMemory, '-b', label='Memory') 83 | ax22.set_ylabel('Worker Memory (GB)', color='b') 84 | ax22.tick_params('y', colors='b') 85 | ax22.set_ylim(0, 32) # The ceil 86 | 87 | plt.suptitle(appName) 88 | 89 | outputDir = os.path.join(slowestTasksDir, "topMetricsFigures") 90 | if not os.path.exists(outputDir): 91 | os.mkdir(outputDir) 92 | file = os.path.join(outputDir, appName + ".pdf") 93 | plt.show() 94 | #plt.savefig(file, dpi=150, bbox_inches='tight') 95 | 96 | 97 | 98 | 99 | if __name__ == '__main__': 100 | 101 | dir = "/Users/xulijie/Documents/GCResearch/Experiments-11-17/medianProfiles/" 102 | taskDir = "/SlowestTasks" 103 | #taskDir = "/failedTasks" 104 | 105 | # # for GroupByRDD 106 | # appName = "GroupByRDD-0.5" 107 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 108 | # appName = "GroupByRDD-1.0" 109 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 110 | # 111 | # # for RDDJoin 112 | # appName = "RDDJoin-0.5" 113 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 114 | appName = "RDDJoin-1.0" 115 | plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 116 | # 117 | # # for SVM 118 | # appName = "SVM-0.5" 119 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 120 | # appName = "SVM-1.0" 121 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 122 | # # 123 | # # # for PageRank 124 | # appName = "PageRank-0.5" 125 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 126 | # appName = "PageRank-1.0" 127 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 128 | 129 | -------------------------------------------------------------------------------- /src/python/Join/JoinGCTime.py: -------------------------------------------------------------------------------- 1 | """ 2 | Broken axis example, where the y-axis will have a portion cut out. 3 | """ 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | import matplotlib as mpl 8 | 9 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 10 | 11 | #plt.rc('font', family='Helvetica') 12 | # font = {'family' : 'Helvetica', 13 | # 'weight' : 'normal', 14 | # 'color' : 'black', 15 | # 'size' : '12'} 16 | 17 | plt.rc('pdf', fonttype=42) 18 | plt.rc('font', family='Helvetica', size=12) 19 | 20 | N = 3 21 | ind = np.arange(N) # the x locations for the groups 22 | 23 | width = 0.23 # the width of the bars 24 | 25 | # fig = plt.figure(figsize=(3.2, 2.4)) 26 | # ax = fig.add_subplot(111) 27 | # plt.subplots_adjust(left=0.19, bottom=0.11, right=0.98, top=0.87, 28 | # wspace=0.03, hspace=0.04) 29 | 30 | xvals = [15, 4537, 0] 31 | yvals = [45, 1, 31] 32 | zvals = [26, 4, 227] 33 | 34 | 35 | # If we were to simply plot pts, we'd lose most of the interesting 36 | # details due to the outliers. So let's 'break' or 'cut-out' the y-axis 37 | # into two portions - use the top (ax) for the outliers, and the bottom 38 | # (ax2) for the details of the majority of our data 39 | f, (ax, ax2) = plt.subplots(2, 1, sharex=True, figsize=(3.4, 2.4)) 40 | plt.subplots_adjust(left=0.22, bottom=0.11, right=0.97, top=0.87, 41 | wspace=0.03, hspace=0.04) 42 | 43 | 44 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black') 45 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 46 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 47 | 48 | ax.set_xticks(ind+width) 49 | 50 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 51 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 52 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 53 | 54 | #plt.xlim(-0.3, 2.76) # The ceil 55 | ax.set_xlim(-0.32, 2.78) # The ceil 56 | 57 | 58 | 59 | rects4 = ax2.bar(ind, xvals, width, color='lightpink', edgecolor='black') 60 | rects5 = ax2.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 61 | rects6 = ax2.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 62 | 63 | 64 | ax2.set_xticks(ind+width) 65 | ax2.set_xticklabels( ('YGC', 'FGC', 'ConGC'), color='black')#, borderaxespad = 'bold') 66 | # 67 | # ax2.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 68 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 69 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 70 | 71 | #plt.xlim(-0.3, 2.76) # The ceil 72 | ax2.set_xlim(-0.32, 2.78) # The ceil 73 | 74 | # zoom-in / limit the view to different portions of the data 75 | ax.set_ylim(4050, 6000) # outliers only 76 | ax2.set_ylim(0, 350) # most of the data 77 | 78 | # hide the spines between ax and ax2 79 | ax.spines['bottom'].set_visible(False) 80 | ax2.spines['top'].set_visible(False) 81 | ax.xaxis.set_ticks_position('none') 82 | ax.tick_params(labeltop='off') # don't put tick labels at the top 83 | ax2.xaxis.tick_bottom() 84 | 85 | # This looks pretty good, and was fairly painless, but you can get that 86 | # cut-out diagonal lines look with just a bit more work. The important 87 | # thing to know here is that in axes coordinates, which are always 88 | # between 0-1, spine endpoints are at these locations (0,0), (0,1), 89 | # (1,0), and (1,1). Thus, we just need to put the diagonals in the 90 | # appropriate corners of each of our axes, and so long as we use the 91 | # right transform and disable clipping. 92 | 93 | d = .015 # how big to make the diagonal lines in axes coordinates 94 | # arguments to pass to plot, just so we don't keep repeating them 95 | kwargs = dict(transform=ax.transAxes, color='k', clip_on=False) 96 | ax.plot((-d, +d), (-d, +d), **kwargs) # top-left diagonal 97 | ax.plot((1 - d, 1 + d), (-d, +d), **kwargs) # top-right diagonal 98 | 99 | kwargs.update(transform=ax2.transAxes) # switch to the bottom axes 100 | ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs) # bottom-left diagonal 101 | ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs) # bottom-right diagonal 102 | 103 | # What's cool about this is that now if we vary the distance between 104 | # ax and ax2 via f.subplots_adjust(hspace=...) or plt.subplot_tool(), 105 | # the diagonal lines will move accordingly, and stay right at the tips 106 | # of the spines they are 'breaking' 107 | 108 | ax.set_title("(b) Join-task-GC-time", fontsize=12) 109 | 110 | 111 | 112 | def autolabel(rects, ax): 113 | for rect in rects: 114 | h = rect.get_height() 115 | ax.text(rect.get_x()+rect.get_width()/2., 1.03*h, '%d'%int(h), 116 | ha='center', va='bottom', fontsize=10)#, rotation='vertical')#, rotation='45') 117 | 118 | autolabel(rects1, ax) 119 | autolabel(rects1, ax2) 120 | autolabel(rects2, ax2) 121 | autolabel(rects3, ax2) 122 | 123 | yaxis_label = ax.set_ylabel('GC time (s)', color='black') 124 | yaxis_label.set_position((-0.05, -0.05)) 125 | #f.tight_layout() 126 | plt.show() -------------------------------------------------------------------------------- /src/python/Join/JoinTaskExecutionTime.py: -------------------------------------------------------------------------------- 1 | """ 2 | Broken axis example, where the y-axis will have a portion cut out. 3 | """ 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | import matplotlib as mpl 8 | 9 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 10 | 11 | #plt.rc('font', family='Helvetica') 12 | # font = {'family' : 'Helvetica', 13 | # 'weight' : 'normal', 14 | # 'color' : 'black', 15 | # 'size' : '12'} 16 | 17 | plt.rc('pdf', fonttype=42) 18 | plt.rc('font', family='Helvetica', size=12) 19 | 20 | N = 3 21 | ind = np.arange(N) # the x locations for the groups 22 | 23 | width = 0.23 # the width of the bars 24 | 25 | # fig = plt.figure(figsize=(3.2, 2.4)) 26 | # ax = fig.add_subplot(111) 27 | # plt.subplots_adjust(left=0.19, bottom=0.11, right=0.98, top=0.87, 28 | # wspace=0.03, hspace=0.04) 29 | 30 | xvals = [164, 0, 4552] 31 | yvals = [340, 0, 46] 32 | zvals = [550, 0, 30] 33 | 34 | 35 | # If we were to simply plot pts, we'd lose most of the interesting 36 | # details due to the outliers. So let's 'break' or 'cut-out' the y-axis 37 | # into two portions - use the top (ax) for the outliers, and the bottom 38 | # (ax2) for the details of the majority of our data 39 | f, (ax, ax2) = plt.subplots(2, 1, sharex=True, figsize=(3.4, 2.4)) 40 | plt.subplots_adjust(left=0.22, bottom=0.11, right=0.97, top=0.87, 41 | wspace=0.03, hspace=0.04) 42 | 43 | 44 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 45 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 46 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 47 | 48 | #ax.set_xticks(ind+width) 49 | #ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 50 | 51 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 52 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 53 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 54 | 55 | #plt.xlim(-0.3, 2.76) # The ceil 56 | ax2.set_xlim(-0.32, 2.78) # The ceil 57 | 58 | 59 | 60 | rects4 = ax2.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 61 | rects5 = ax2.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 62 | rects6 = ax2.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 63 | 64 | 65 | ax2.set_xticks(ind+width) 66 | ax2.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 67 | # 68 | # ax2.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 69 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 70 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 71 | 72 | #plt.xlim(-0.3, 2.76) # The ceil 73 | ax2.set_xlim(-0.32, 2.78) # The ceil 74 | 75 | # zoom-in / limit the view to different portions of the data 76 | ax.set_ylim(4050, 6000) # outliers only 77 | ax2.set_ylim(0, 1550) # most of the data 78 | 79 | # hide the spines between ax and ax2 80 | ax.spines['bottom'].set_visible(False) 81 | ax2.spines['top'].set_visible(False) 82 | ax.xaxis.set_ticks_position('none') 83 | ax.tick_params(labeltop='off') # don't put tick labels at the top 84 | ax2.xaxis.tick_bottom() 85 | 86 | # This looks pretty good, and was fairly painless, but you can get that 87 | # cut-out diagonal lines look with just a bit more work. The important 88 | # thing to know here is that in axes coordinates, which are always 89 | # between 0-1, spine endpoints are at these locations (0,0), (0,1), 90 | # (1,0), and (1,1). Thus, we just need to put the diagonals in the 91 | # appropriate corners of each of our axes, and so long as we use the 92 | # right transform and disable clipping. 93 | 94 | d = .015 # how big to make the diagonal lines in axes coordinates 95 | # arguments to pass to plot, just so we don't keep repeating them 96 | kwargs = dict(transform=ax.transAxes, color='k', clip_on=False) 97 | ax.plot((-d, +d), (-d, +d), **kwargs) # top-left diagonal 98 | ax.plot((1 - d, 1 + d), (-d, +d), **kwargs) # top-right diagonal 99 | 100 | kwargs.update(transform=ax2.transAxes) # switch to the bottom axes 101 | ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs) # bottom-left diagonal 102 | ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs) # bottom-right diagonal 103 | 104 | # What's cool about this is that now if we vary the distance between 105 | # ax and ax2 via f.subplots_adjust(hspace=...) or plt.subplot_tool(), 106 | # the diagonal lines will move accordingly, and stay right at the tips 107 | # of the spines they are 'breaking' 108 | 109 | ax.set_title("(a) Join-task-execution-time", fontsize=12) 110 | 111 | 112 | 113 | def autolabel(rects, ax): 114 | for rect in rects: 115 | h = rect.get_height() 116 | ax.text(rect.get_x()+rect.get_width()/2., 1.03*h, '%d'%int(h), 117 | ha='center', va='bottom', fontsize=10)#, rotation='vertical')#, rotation='45') 118 | 119 | autolabel(rects1, ax) 120 | autolabel(rects2, ax2) 121 | autolabel(rects1, ax2) 122 | autolabel(rects3, ax2) 123 | 124 | yaxis_label = plt.ylabel('Time (s)', color='black') 125 | yaxis_label.set_position((-0.05,1)) 126 | #f.tight_layout() 127 | plt.show() -------------------------------------------------------------------------------- /src/main/java/parser/ExecutorsJsonParser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import appinfo.Application; 4 | import appinfo.Executor; 5 | import com.google.gson.JsonElement; 6 | import com.google.gson.JsonIOException; 7 | import com.google.gson.JsonParser; 8 | import com.google.gson.JsonSyntaxException; 9 | import gc.CMSGCViewerLogParser; 10 | import gc.G1GCViewerLogParser; 11 | import gc.GCStatistics; 12 | import gc.ParallelGCViewerLogParser; 13 | import util.JsonFileReader; 14 | 15 | import java.io.File; 16 | import java.util.ArrayList; 17 | import java.util.HashSet; 18 | import java.util.List; 19 | import java.util.Set; 20 | 21 | 22 | public class ExecutorsJsonParser { 23 | 24 | public void parseExecutorsJson(String allexecutorsJson, Application app) { 25 | try { 26 | JsonParser parser = new JsonParser(); 27 | JsonElement el = parser.parse(allexecutorsJson); 28 | for (JsonElement executorElem : el.getAsJsonArray()) { 29 | Executor executor = new Executor(executorElem.getAsJsonObject()); 30 | app.addExecutor(executor); 31 | } 32 | 33 | } catch (JsonIOException e) { 34 | e.printStackTrace(); 35 | } catch (JsonSyntaxException e) { 36 | e.printStackTrace(); 37 | } 38 | } 39 | 40 | public void parseExecutorGCSummary(String executorsDir, Application app) { 41 | 42 | Set aliveExecutorIds = new HashSet(); 43 | for (Executor executor : app.getExecutors()) 44 | aliveExecutorIds.add(executor.getId()); 45 | 46 | for (File executorDir : new File(executorsDir).listFiles()) { 47 | if (executorDir.isDirectory()) { 48 | String executorId = executorDir.getName(); 49 | 50 | if (aliveExecutorIds.contains(executorId)) { 51 | String gcSummaryFile = executorDir.getAbsolutePath() + File.separatorChar 52 | + "gcMetrics-" + executorId + ".csv"; 53 | 54 | List lines = JsonFileReader.readFileLines(gcSummaryFile); 55 | for (String line : lines) { 56 | String[] metrics = line.split(";"); 57 | app.getExecutor(executorId).addGCMetric(metrics); 58 | } 59 | 60 | /* 61 | String gceasyFile = executorDir.getAbsolutePath() + File.separatorChar 62 | + "gcMetrics-" + executorId + ".json"; 63 | String gceasyJson = JsonFileReader.readFile(gceasyFile); 64 | app.getExecutor(executorId).addGCeasyMetric(gceasyJson); 65 | */ 66 | 67 | String topFile = executorDir.getAbsolutePath() + File.separatorChar 68 | + "topMetrics.txt"; 69 | List topMetricsLines = JsonFileReader.readFileLines(topFile); 70 | app.getExecutor(executorId).addTopMetrics(topMetricsLines); 71 | 72 | // Parse spill metrics 73 | String stderr = executorDir.getAbsolutePath() + File.separatorChar 74 | + "stderr"; 75 | List stderrLines = JsonFileReader.readFileLines(stderr); 76 | app.getExecutor(executorId).addSpillMetrics(stderrLines); 77 | 78 | String gcEventFile = executorDir.getAbsolutePath() + File.separatorChar 79 | + "gcEvent-" + executorId + ".txt"; 80 | List gcEventLines = JsonFileReader.readFileLines(gcEventFile); 81 | app.getExecutor(executorId).countGCTimeInShuffleSpill(gcEventLines); 82 | 83 | String gcPlainEventFile = executorDir.getAbsolutePath() + File.separatorChar 84 | + "gcPlainEvent-" + executorId + ".txt"; 85 | /* 86 | GCStatistics stat = null; 87 | 88 | if (app.getName().contains("Parallel")) { 89 | ParallelGCViewerLogParser parser = new ParallelGCViewerLogParser(); 90 | stat = parser.parseStatistics(gcPlainEventFile); 91 | } else if (app.getName().contains("CMS")) { 92 | CMSGCViewerLogParser parser = new CMSGCViewerLogParser(); 93 | stat = parser.parseStatistics(gcPlainEventFile); 94 | } else if (app.getName().contains("G1")) { 95 | G1GCViewerLogParser parser = new G1GCViewerLogParser(); 96 | stat = parser.parseStatistics(gcPlainEventFile); 97 | } 98 | 99 | app.getExecutor(executorId).setGCStatistics(stat); 100 | */ 101 | } 102 | } 103 | } 104 | 105 | } 106 | 107 | 108 | public static Set getAliveExecutors(String executorJsonFile) { 109 | 110 | Set executorIds = new HashSet(); 111 | 112 | try { 113 | JsonParser parser = new JsonParser(); 114 | JsonElement el = parser.parse(JsonFileReader.readFile(executorJsonFile)); 115 | for (JsonElement executorElem : el.getAsJsonArray()) { 116 | Executor executor = new Executor(executorElem.getAsJsonObject()); 117 | executorIds.add(executor.getId()); 118 | } 119 | } catch (JsonIOException e) { 120 | e.printStackTrace(); 121 | } catch (JsonSyntaxException e) { 122 | e.printStackTrace(); 123 | } 124 | 125 | return executorIds; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/main/java/gc/GCViewerParser.java: -------------------------------------------------------------------------------- 1 | package gc; 2 | 3 | import util.GCViewerNoneGUI; 4 | 5 | import java.io.File; 6 | import java.lang.reflect.InvocationTargetException; 7 | 8 | /** 9 | * Created by xulijie on 18-4-6. 10 | */ 11 | public class GCViewerParser { 12 | 13 | /** 14 | * Welcome to GCViewer with cmdline" 15 | * java -jar gcviewer.jar [] -> opens gui and loads given file 16 | * java -jar gcviewer.jar [];[];[...] -> opens gui and loads given files as series of rotated logfiles 17 | * java -jar gcviewer.jar [] [] -> cmdline: writes report to "); 18 | * java -jar gcviewer.jar [];[];[...] [] -> cmdline: loads given files as series of rotated logfiles and writes report to 19 | * java -jar gcviewer.jar [] [] [] -> cmdline: writes report to and renders gc chart to 20 | * java -jar gcviewer.jar [];[];[...] [] [] -> cmdline: loads given files as series of rotated logfiles and writes report to and renders gc chart to 21 | * java -jar gcviewer.jar [] [] [] [-t ] 22 | * java -jar gcviewer.jar [];[];[...] [] [] [-t ] 23 | */ 24 | 25 | public static GCViewerNoneGUI gcViewerNoneGUI = new GCViewerNoneGUI(); 26 | 27 | public static void parseExecutorGCLog(String gcLogFile, String exportCVSFile, String chartPNGFile) { 28 | 29 | try { 30 | System.out.println("[GCLogParsing] " + gcLogFile); 31 | gcViewerNoneGUI.doMain(new String[]{gcLogFile, exportCVSFile, chartPNGFile}); 32 | } catch (InvocationTargetException e) { 33 | e.printStackTrace(); 34 | } catch (InterruptedException e) { 35 | e.printStackTrace(); 36 | } 37 | } 38 | 39 | 40 | // java -jar gcviewer-1.3x.jar gc.log summary.csv [chart.png] [-t PLAIN|CSV|CSV_TS|SIMPLE|SUMMARY] 41 | public static void parseExecutorGCLogToSummary(String gcLogFile, String summaryCSV, String type) { 42 | 43 | try { 44 | System.out.println("[GCLogParsing] " + gcLogFile); 45 | gcViewerNoneGUI.doMain(new String[]{gcLogFile, summaryCSV, "-t", type}); 46 | } catch (InvocationTargetException e) { 47 | e.printStackTrace(); 48 | } catch (InterruptedException e) { 49 | e.printStackTrace(); 50 | } 51 | } 52 | 53 | public static void parseExecutorLogByGCViewer(String baseDir, String appName, 54 | String medianParallelApp, String medianCMSApp, String medianG1App, 55 | int ParallelExecutorID, int CMSExecutorID, int G1ExecutorID) { 56 | 57 | String executorDir = baseDir + appName + File.separatorChar; 58 | String ParallelGCLog = executorDir + medianParallelApp + File.separatorChar + "executors" 59 | + File.separatorChar + ParallelExecutorID + File.separatorChar + "stdout"; 60 | String CMSG1Log = executorDir + medianCMSApp + File.separatorChar + "executors" 61 | + File.separatorChar + CMSExecutorID + File.separatorChar + "stdout"; 62 | String G1Log = executorDir + medianG1App + File.separatorChar + "executors" 63 | + File.separatorChar + G1ExecutorID + File.separatorChar + "stdout"; 64 | 65 | String outputDir = baseDir + appName + File.separatorChar + "SlowestTask"; 66 | String ParallelParsedLog = outputDir + File.separatorChar + "Parallel" 67 | + File.separatorChar + "parallel-E" + ParallelExecutorID + "1.txt"; 68 | String CMSParsedLog = outputDir + File.separatorChar + "CMS" 69 | + File.separatorChar + "CMS-E" + CMSExecutorID + ".txt"; 70 | String G1ParsedLog = outputDir + File.separatorChar + "G1" 71 | + File.separatorChar + "G1-E" + G1ExecutorID + ".txt"; 72 | File file = new File(ParallelParsedLog); 73 | file = file.getParentFile(); 74 | if (!file.exists()) 75 | file.mkdirs(); 76 | file = new File(CMSParsedLog); 77 | file = file.getParentFile(); 78 | if (!file.exists()) 79 | file.mkdirs(); 80 | file = new File(G1ParsedLog); 81 | file = file.getParentFile(); 82 | if (!file.exists()) 83 | file.mkdirs(); 84 | 85 | // java -jar gcviewer-1.3x.jar gc.log summary.csv [chart.png] [-t PLAIN|CSV|CSV_TS|SIMPLE|SUMMARY] 86 | parseExecutorGCLogToSummary(ParallelGCLog, ParallelParsedLog, "CSV_TS"); 87 | parseExecutorGCLogToSummary(CMSG1Log, CMSParsedLog, "CSV_TS"); 88 | parseExecutorGCLogToSummary(G1Log, G1ParsedLog, "CSV_TS"); 89 | 90 | } 91 | 92 | public static void main(String[] args) { 93 | 94 | String baseDir = "/Users/xulijie/Documents/GCResearch/PaperExperiments/medianProfiles/"; 95 | 96 | String appName = "GroupByRDD-0.5"; 97 | String medianParallelApp = "GroupByRDD-Parallel-1-6656m-0.5-n1_app-20171120185427-0000"; 98 | String medianCMSApp = "GroupByRDD-CMS-1-6656m-0.5-n5_app-20171120195033-0019"; 99 | String medianG1App = "GroupByRDD-G1-1-6656m-0.5-n1_app-20171120201509-0030"; 100 | int ParallelExectuorID = 30; 101 | int CMSExecutorID = 17; 102 | int G1ExecutorID = 16; 103 | 104 | parseExecutorLogByGCViewer(baseDir, appName, medianParallelApp, medianCMSApp, medianG1App, 105 | ParallelExectuorID, CMSExecutorID, G1ExecutorID); 106 | 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/python/Join-200G/JoinTaskExecutionTimeComparison.py: -------------------------------------------------------------------------------- 1 | """ 2 | Broken axis example, where the y-axis will have a portion cut out. 3 | """ 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | import matplotlib as mpl 8 | 9 | mpl.rcParams['axes.linewidth'] = 1.5 #set the value globally 10 | 11 | #plt.rc('font', family='Helvetica') 12 | # font = {'family' : 'Helvetica', 13 | # 'weight' : 'normal', 14 | # 'color' : 'black', 15 | # 'size' : '12'} 16 | 17 | plt.rc('pdf', fonttype=42) 18 | plt.rc('font', family='Helvetica', size=12) 19 | 20 | N = 3 21 | ind = np.arange(N) # the x locations for the groups 22 | 23 | width = 0.23 # the width of the bars 24 | 25 | # fig = plt.figure(figsize=(3.2, 2.4)) 26 | # ax = fig.add_subplot(111) 27 | # plt.subplots_adjust(left=0.19, bottom=0.11, right=0.98, top=0.87, 28 | # wspace=0.03, hspace=0.04) 29 | 30 | xvals = [1147, 126, 2464] #Duration: 3737s, taskId: 2000, spillTime=126s, GC=2464s, YGC=27s, FGC=2436s (136 times) 31 | yvals = [1811, 60, 31] #Duration: 1902s, taskId: 2012, spillTime=60s, GC=31s, YGC=30s, FGC=1s (11 times), ConGC=64s 32 | zvals = [1838, 128, 120] #Duration:2086s, taskId: 2014, spillTime=167s, GC=120s, YGC=80s, FGC=40s (81 times), ConGC=631s 33 | 34 | #zvals = [1892, 57, 130] #Duration:2079s, taskId: 2013, spillTime=57s, GC=123s, YGC=86s, FGC=37s, ConGC=625s 35 | 36 | 37 | # If we were to simply plot pts, we'd lose most of the interesting 38 | # details due to the outliers. So let's 'break' or 'cut-out' the y-axis 39 | # into two portions - use the top (ax) for the outliers, and the bottom 40 | # (ax2) for the details of the majority of our data 41 | f, (ax, ax2) = plt.subplots(2, 1, sharex=True, figsize=(3.4, 2.4)) 42 | plt.subplots_adjust(left=0.22, bottom=0.11, right=0.97, top=0.87, 43 | wspace=0.03, hspace=0.04) 44 | 45 | 46 | rects1 = ax.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 47 | rects2 = ax.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 48 | rects3 = ax.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 49 | 50 | #ax.set_xticks(ind+width) 51 | #ax.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 52 | 53 | ax.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 54 | frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 55 | fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 56 | 57 | #plt.xlim(-0.3, 2.76) # The ceil 58 | ax2.set_xlim(-0.32, 2.78) # The ceil 59 | 60 | 61 | 62 | rects4 = ax2.bar(ind, xvals, width, color='lightpink', edgecolor='black')#, hatch="///") 63 | rects5 = ax2.bar(ind+width, yvals, width, color='lightgreen', edgecolor='black', hatch='xxx') 64 | rects6 = ax2.bar(ind+width*2, zvals, width, color='deepskyblue', edgecolor='black', hatch='\\\\\\') 65 | 66 | 67 | ax2.set_xticks(ind+width) 68 | ax2.set_xticklabels( ('CompTime', 'SpillTime', 'GCTime'), color='black')#, borderaxespad = 'bold') 69 | # 70 | # ax2.legend( (rects1[0], rects2[0], rects3[0]), ('Parallel', 'CMS', 'G1'), 71 | # frameon=False, loc = "upper right", labelspacing=0.2, markerfirst=False, #prop=legend_properties, 72 | # fontsize=10, ncol=3, borderaxespad=0.3, columnspacing=1.2, handletextpad=0.5)#, handlelength=0.8) 73 | 74 | #plt.xlim(-0.3, 2.76) # The ceil 75 | ax2.set_xlim(-0.32, 2.78) # The ceil 76 | 77 | # zoom-in / limit the view to different portions of the data 78 | ax.set_ylim(4050, 6000) # outliers only 79 | ax2.set_ylim(0, 1550) # most of the data 80 | 81 | # hide the spines between ax and ax2 82 | ax.spines['bottom'].set_visible(False) 83 | ax2.spines['top'].set_visible(False) 84 | ax.xaxis.set_ticks_position('none') 85 | ax.tick_params(labeltop='off') # don't put tick labels at the top 86 | ax2.xaxis.tick_bottom() 87 | 88 | # This looks pretty good, and was fairly painless, but you can get that 89 | # cut-out diagonal lines look with just a bit more work. The important 90 | # thing to know here is that in axes coordinates, which are always 91 | # between 0-1, spine endpoints are at these locations (0,0), (0,1), 92 | # (1,0), and (1,1). Thus, we just need to put the diagonals in the 93 | # appropriate corners of each of our axes, and so long as we use the 94 | # right transform and disable clipping. 95 | 96 | d = .015 # how big to make the diagonal lines in axes coordinates 97 | # arguments to pass to plot, just so we don't keep repeating them 98 | kwargs = dict(transform=ax.transAxes, color='k', clip_on=False) 99 | ax.plot((-d, +d), (-d, +d), **kwargs) # top-left diagonal 100 | ax.plot((1 - d, 1 + d), (-d, +d), **kwargs) # top-right diagonal 101 | 102 | kwargs.update(transform=ax2.transAxes) # switch to the bottom axes 103 | ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs) # bottom-left diagonal 104 | ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs) # bottom-right diagonal 105 | 106 | # What's cool about this is that now if we vary the distance between 107 | # ax and ax2 via f.subplots_adjust(hspace=...) or plt.subplot_tool(), 108 | # the diagonal lines will move accordingly, and stay right at the tips 109 | # of the spines they are 'breaking' 110 | 111 | ax.set_title("(a) Join-task-execution-time", fontsize=12) 112 | 113 | 114 | 115 | def autolabel(rects, ax): 116 | for rect in rects: 117 | h = rect.get_height() 118 | ax.text(rect.get_x()+rect.get_width()/2., 1.03*h, '%d'%int(h), 119 | ha='center', va='bottom', fontsize=10)#, rotation='vertical')#, rotation='45') 120 | 121 | autolabel(rects1, ax) 122 | autolabel(rects2, ax2) 123 | autolabel(rects1, ax2) 124 | autolabel(rects3, ax2) 125 | 126 | yaxis_label = plt.ylabel('Time (s)', color='black') 127 | yaxis_label.set_position((-0.05,1)) 128 | #f.tight_layout() 129 | plt.show() -------------------------------------------------------------------------------- /src/python/plotter/TimeSeriesPlotter2.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.dates as mdates 3 | import matplotlib as mpl 4 | import os, sys 5 | 6 | from datetime import datetime 7 | from reader import FileReader 8 | 9 | def plotExecutorAndWorkerUsage(appName, slowestTasksDir): 10 | for dir in os.listdir(slowestTasksDir): 11 | if (dir.startswith(".DS") == False): 12 | for topMetricsFile in os.listdir(os.path.join(slowestTasksDir, dir)): 13 | if (topMetricsFile.startswith("topMetrics")): 14 | plotResourceUsage(os.path.join(slowestTasksDir, dir, topMetricsFile), slowestTasksDir, dir) 15 | 16 | 17 | def plotResourceUsage(topMetricsFile, slowestTasksDir, appName): 18 | fileLines = FileReader.readLines(topMetricsFile) 19 | 20 | isExecutorMetric = False 21 | isSlaveMetric = False 22 | 23 | executorTime = [] 24 | executorCPU = [] 25 | executorMemory = [] 26 | 27 | slaveTime = [] 28 | slaveCPU = [] 29 | slaveMemory = [] 30 | 31 | first_time = -1 32 | 33 | for line in fileLines: 34 | if (line.startswith("[Top Metrics][Executor")): 35 | isExecutorMetric = True 36 | first_time = -1 37 | elif (line.startswith("[Top Metrics][aliSlave")): 38 | isSlaveMetric = True 39 | isExecutorMetric = False 40 | first_time = -1 41 | elif(isExecutorMetric == True and line.strip() != ""): 42 | time = line[line.find('[') + 1: line.find(']')] 43 | cpu = line[line.find('=') + 2: line.find(',')] 44 | memory = line[line.find('Memory') + 9:] 45 | 46 | if first_time == -1: 47 | first_time = datetime.strptime(time, '%H:%M:%S') 48 | time = first_time - first_time 49 | else: 50 | cur_time = datetime.strptime(time, '%H:%M:%S') 51 | time = cur_time - first_time 52 | 53 | # executorTime.append(datetime.strptime(time, '%H:%M:%S')) 54 | executorTime.append(time.seconds) 55 | executorCPU.append(float(cpu)) 56 | executorMemory.append(float(memory)) 57 | 58 | elif(isSlaveMetric == True and line.strip() != ""): 59 | time = line[line.find('[') + 1: line.find(']')] 60 | cpu = line[line.find('=') + 2: line.find(',')] 61 | memory = line[line.find('Memory') + 9:] 62 | 63 | if first_time == -1: 64 | first_time = datetime.strptime(time, '%H:%M:%S') 65 | time = first_time - first_time 66 | print first_time 67 | else: 68 | cur_time = datetime.strptime(time, '%H:%M:%S') 69 | time = cur_time - first_time 70 | slaveTime.append(time.seconds) 71 | slaveCPU.append(float(cpu)) 72 | slaveMemory.append(float(memory)) 73 | 74 | 75 | fig, axes = plt.subplots(nrows=2, ncols=1, sharey=False, sharex= True) 76 | # locator = mpl.dates.MinuteLocator() 77 | # xfmt = mdates.DateFormatter('%H:%M:%S') 78 | #ax.xaxis.set_major_locator(locator) 79 | # axes[0].xaxis.set_major_formatter(xfmt) 80 | # axes[1].xaxis.set_major_formatter(xfmt) 81 | axes[0].set_ylabel("Executor CPU (%)", color='r') 82 | axes[0].tick_params('y', colors='r') 83 | axes[1].set_ylabel("Worker CPU (%)", color='r') 84 | axes[1].tick_params('y', colors='r') 85 | axes[0].set_ylim(0, 840) # The ceil 86 | axes[1].set_ylim(0, 105) # The ceil 87 | axes[1].set_xlabel("Time (sec)", color=u'#000000') 88 | #plt.xlim(0, executorTime.max) # The ceil 89 | # plt.legend() 90 | #fig.autofmt_xdate() 91 | 92 | axes[0].plot(executorTime, executorCPU, '-r', label='CPU') 93 | axes[1].plot(slaveTime, slaveCPU, '-r', label='CPU') 94 | ax12 = axes[0].twinx() 95 | ax12.plot(executorTime, executorMemory, '-b', label='Memory') 96 | ax12.set_ylabel('Executor Memory (GB)', color='b') 97 | ax12.tick_params('y', colors='b') 98 | ax12.set_ylim(0, 32) # The ceil 99 | # ax12.tick_params('y', colors='r') 100 | ax22 = axes[1].twinx() 101 | ax22.plot(slaveTime, slaveMemory, '-b', label='Memory') 102 | ax22.set_ylabel('Worker Memory (GB)', color='b') 103 | ax22.tick_params('y', colors='b') 104 | ax22.set_ylim(0, 32) # The ceil 105 | 106 | 107 | ax12.set_xlim(xmin=0) 108 | ax22.set_xlim(xmin=0) 109 | 110 | plt.suptitle("(b) Join-1.0-G1-CPU-usage", y=0.95) 111 | 112 | outputDir = os.path.join(slowestTasksDir, "topMetricsFigures") 113 | if not os.path.exists(outputDir): 114 | os.mkdir(outputDir) 115 | file = os.path.join(outputDir, appName + ".pdf") 116 | plt.show() 117 | #plt.savefig(file, dpi=150, bbox_inches='tight') 118 | 119 | 120 | 121 | 122 | if __name__ == '__main__': 123 | 124 | dir = "/Users/xulijie/Documents/GCResearch/PaperExperiments/medianProfiles/" 125 | taskDir = "/SlowestTask" 126 | #taskDir = "/failedTasks" 127 | 128 | # # for GroupByRDD 129 | # appName = "GroupByRDD-0.5" 130 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 131 | # appName = "GroupByRDD-1.0" 132 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 133 | # 134 | # # for RDDJoin 135 | # appName = "RDDJoin-0.5" 136 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 137 | appName = "RDDJoin-1.0" 138 | plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 139 | # 140 | # # for SVM 141 | # appName = "SVM-0.5"plotExecutorAndWorkerUsage 142 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 143 | # appName = "SVM-1.0" 144 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 145 | # # 146 | # # # for PageRank 147 | # appName = "PageRank-0.5" 148 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 149 | # appName = "PageRank-1.0" 150 | # plotExecutorAndWorkerUsage(appName, dir + appName + taskDir) 151 | 152 | -------------------------------------------------------------------------------- /src/main/java/generalGC/G1GCLogParser.java: -------------------------------------------------------------------------------- 1 | package generalGC; 2 | 3 | import util.FileTextWriter; 4 | import util.JsonFileReader; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * Created by xulijie on 17-12-21. 10 | */ 11 | public class G1GCLogParser { 12 | 13 | private HeapUsage usage = new HeapUsage(); 14 | 15 | public void parse(String logFile) { 16 | List lines = JsonFileReader.readFileLines(logFile); 17 | 18 | String timestamp = ""; 19 | 20 | String gcCause = null; 21 | 22 | for (String line : lines) { 23 | line = line.trim(); 24 | 25 | if (line.startsWith("2017-")) { 26 | timestamp = line.substring(0, line.indexOf(':', line.indexOf(": ") + 1)); 27 | timestamp = timestamp.substring(timestamp.lastIndexOf(':') + 2); 28 | } 29 | 30 | if (line.contains("[GC pause") && line.contains("(young)")) { 31 | gcCause = "YGC"; 32 | } 33 | 34 | if (line.contains("[Full GC") 35 | || line.contains("(young) (initial-mark)") 36 | || line.contains("[GC cleanup") 37 | || line.contains("[GC remark") 38 | || line.contains("[GC concurrent") 39 | || line.contains("(mixed)")) { 40 | gcCause = "FGC"; 41 | } 42 | 43 | if (line.startsWith("[Eden")) 44 | parseGCRecord(Double.parseDouble(timestamp), line, gcCause); 45 | } 46 | } 47 | 48 | private void parseGCRecord(double timestamp, String line, String gcCause) { 49 | 50 | // [Eden: 25.0M(25.0M)->0.0B(35.0M) Survivors: 0.0B->4096.0K Heap: 25.0M(504.0M)->5192.0K(504.0M)] 51 | 52 | // 25.0M(25.0M)->0.0B(35.0M) 53 | String Eden = line.substring(line.indexOf(':') + 2, line.indexOf("Survivors") - 1); 54 | double edenBeforeMB = computeMB(Eden.substring(0, Eden.indexOf('('))); 55 | double edenBeforeTotalMB = computeMB(Eden.substring(Eden.indexOf('(') + 1, Eden.indexOf(')'))); 56 | // 0.0B(35.0M) 57 | Eden = Eden.substring(Eden.indexOf('>') + 1); 58 | double edenAfterMB = computeMB(Eden.substring(0, Eden.indexOf('('))); 59 | double edenAfterTotalMB = computeMB(Eden.substring(Eden.indexOf('(') + 1, Eden.indexOf(')'))); 60 | 61 | // 0.0B->4096.0K 62 | String Survivors = line.substring(line.indexOf("Survivors") + 11, line.indexOf("Heap") - 1); 63 | double survivorBeforeMB = computeMB(Survivors.substring(0, Survivors.indexOf('-'))); 64 | double survivorAfterMB = computeMB(Survivors.substring(Survivors.indexOf('>') + 1)); 65 | 66 | // 25.0M(504.0M)->5192.0K(504.0M) 67 | String Heap = line.substring(line.indexOf("Heap") + 6, line.lastIndexOf(']')); 68 | double heapBeforeMB = computeMB(Heap.substring(0, Heap.indexOf('('))); 69 | double heapBeforeTotalMB = computeMB(Heap.substring(Heap.indexOf('(') + 1, Heap.indexOf(')'))); 70 | // 5192.0K(504.0M) 71 | Heap = Heap.substring(Heap.indexOf('>') + 1); 72 | double heapAfterMB = computeMB(Heap.substring(0, Heap.indexOf('('))); 73 | double heapAfterTotalMB = computeMB(Heap.substring(Heap.indexOf('(') + 1, Heap.indexOf(')'))); 74 | 75 | 76 | double yBeforeMB = edenBeforeMB + survivorBeforeMB; 77 | double yAfterMB = edenAfterMB + survivorAfterMB; 78 | double youngBeforeMB = edenBeforeTotalMB + survivorBeforeMB; 79 | double youngAfterMB = edenAfterTotalMB + survivorAfterMB; 80 | // System.out.println(PSYoungGen); 81 | // System.out.println(" yBeforeMB = " + yBeforeMB + ", yAfterMB = " + yAfterMB + ", youngMB = " + youngMB); 82 | 83 | // 129024K->15319K(494592K) 84 | double oldBeforeMB = heapBeforeMB - yBeforeMB; 85 | double oldAfterMB = heapAfterMB - yAfterMB; 86 | double oldBeforeTotalMB = heapBeforeTotalMB - youngBeforeMB; 87 | double oldAfterTotalMB = heapAfterTotalMB - youngAfterMB; 88 | 89 | 90 | /* 91 | if (gcCause.equals("YGC")) { 92 | usage.addYoungUsage(timestamp, yBeforeMB, youngBeforeMB, gcCause); 93 | usage.addYoungUsage(timestamp, yAfterMB, youngAfterMB, ""); 94 | 95 | if (oldAfterMB != oldBeforeMB) { 96 | usage.addOldUsage(timestamp, oldBeforeMB, oldBeforeTotalMB, gcCause); 97 | usage.addOldUsage(timestamp, oldAfterMB, oldAfterTotalMB, ""); 98 | } 99 | } else if (gcCause.equals("FGC")){ 100 | usage.addYoungUsage(timestamp, yBeforeMB, youngBeforeMB, gcCause); 101 | usage.addYoungUsage(timestamp, yAfterMB, youngAfterMB, ""); 102 | 103 | usage.addOldUsage(timestamp, oldBeforeMB, oldBeforeTotalMB, gcCause); 104 | usage.addOldUsage(timestamp, oldAfterMB, oldAfterTotalMB, ""); 105 | } 106 | */ 107 | } 108 | 109 | /* 110 | 2017-11-22T10:03:14.403+0800: 596.322: [GC pause (G1 Evacuation Pause) (young) 596.322: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 64042, predicted base time: 43.99 ms, remaining time: 156.01 ms, target pause time: 200.00 ms] 111 | 112 | */ 113 | 114 | public double computeMB(String size) { 115 | double mb = Double.parseDouble(size.substring(0, size.length() - 1)); 116 | if (size.endsWith("K")) 117 | mb = mb / 1024; 118 | else if (size.endsWith("B")) 119 | mb = mb / 1024 / 1024; 120 | else if (size.endsWith("G")) 121 | mb = mb * 1024; 122 | 123 | return mb; 124 | } 125 | 126 | private void outputUsage(String outputFile) { 127 | FileTextWriter.write(outputFile, usage.toString()); 128 | } 129 | 130 | public static void main(String[] args) { 131 | String logFile = "src/test/gclogs/SVM-1.0-E1-G1-19.txt"; 132 | String outputFile = "src/test/gclogs/Parsed-SVM-1.0-E1-G1-19.txt"; 133 | G1GCLogParser parser = new G1GCLogParser(); 134 | parser.parse(logFile); 135 | parser.outputUsage(outputFile); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/main/java/util/GCViewerNoneGUI.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | import com.tagtraum.perf.gcviewer.GCViewerArgsParser; 4 | import com.tagtraum.perf.gcviewer.GCViewerArgsParserException; 5 | import com.tagtraum.perf.gcviewer.exp.DataWriter; 6 | import com.tagtraum.perf.gcviewer.exp.DataWriterType; 7 | import com.tagtraum.perf.gcviewer.exp.impl.DataWriterFactory; 8 | import com.tagtraum.perf.gcviewer.imp.DataReaderException; 9 | import com.tagtraum.perf.gcviewer.imp.DataReaderFacade; 10 | import com.tagtraum.perf.gcviewer.model.GCModel; 11 | import com.tagtraum.perf.gcviewer.model.GCResource; 12 | import com.tagtraum.perf.gcviewer.view.SimpleChartRenderer; 13 | import java.io.File; 14 | import java.io.FileOutputStream; 15 | import java.io.IOException; 16 | import java.lang.reflect.InvocationTargetException; 17 | import java.util.logging.Level; 18 | import java.util.logging.Logger; 19 | 20 | public class GCViewerNoneGUI { 21 | private static final Logger LOGGER = Logger.getLogger(GCViewerNoneGUI.class.getName()); 22 | private static final int EXIT_OK = 0; 23 | private static final int EXIT_EXPORT_FAILED = -1; 24 | private static final int EXIT_ARGS_PARSE_FAILED = -2; 25 | 26 | private GCViewerArgsParser gcViewerArgsParser; 27 | 28 | public GCViewerNoneGUI() { 29 | this.gcViewerArgsParser = new GCViewerArgsParser(); 30 | } 31 | 32 | public static void main(String[] args) throws InvocationTargetException, InterruptedException { 33 | (new GCViewerNoneGUI()).doMain(args); 34 | } 35 | 36 | public void doMain(String[] args) throws InvocationTargetException, InterruptedException { 37 | GCViewerArgsParser argsParser = this.gcViewerArgsParser; 38 | 39 | try { 40 | argsParser.parseArguments(args); 41 | } catch (GCViewerArgsParserException var9) { 42 | usage(); 43 | LOGGER.log(Level.SEVERE, var9.getMessage(), var9); 44 | System.exit(-2); 45 | } 46 | 47 | if(argsParser.getArgumentCount() > 3) { 48 | usage(); 49 | } else if(argsParser.getArgumentCount() >= 2) { 50 | LOGGER.info("GCViewer command line mode"); 51 | GCResource gcResource = argsParser.getGcResource(); 52 | String summaryFilePath = argsParser.getSummaryFilePath(); 53 | String chartFilePath = argsParser.getChartFilePath(); 54 | DataWriterType type = argsParser.getType(); 55 | 56 | try { 57 | this.export(gcResource, summaryFilePath, chartFilePath, type); 58 | LOGGER.info("export completed successfully"); 59 | // System.exit(0); 60 | } catch (Exception var8) { 61 | LOGGER.log(Level.SEVERE, "Error during report generation", var8); 62 | System.exit(-1); 63 | } 64 | } 65 | 66 | } 67 | 68 | private void export(GCResource gcResource, String summaryFilePath, String chartFilePath, DataWriterType type) throws IOException, DataReaderException { 69 | DataReaderFacade dataReaderFacade = new DataReaderFacade(); 70 | GCModel model = dataReaderFacade.loadModel(gcResource); 71 | try { 72 | this.exportType(model, summaryFilePath, type); 73 | } catch (Throwable throwable) { 74 | throwable.printStackTrace(); 75 | } 76 | if(chartFilePath != null) { 77 | this.renderChart(model, chartFilePath); 78 | } 79 | 80 | } 81 | 82 | private void exportType(GCModel model, String summaryFilePath, DataWriterType type) throws Throwable { 83 | DataWriter summaryWriter = DataWriterFactory.getDataWriter(new File(summaryFilePath), type); 84 | Throwable var5 = null; 85 | 86 | try { 87 | summaryWriter.write(model); 88 | } catch (Throwable var14) { 89 | var5 = var14; 90 | throw var14; 91 | } finally { 92 | if(summaryWriter != null) { 93 | if(var5 != null) { 94 | try { 95 | summaryWriter.close(); 96 | } catch (Throwable var13) { 97 | var13.printStackTrace(); 98 | } 99 | } else { 100 | summaryWriter.close(); 101 | } 102 | } 103 | 104 | } 105 | 106 | } 107 | 108 | private void renderChart(GCModel model, String chartFilePath) throws IOException { 109 | SimpleChartRenderer renderer = new SimpleChartRenderer(); 110 | renderer.render(model, new FileOutputStream(new File(chartFilePath))); 111 | } 112 | 113 | private static void usage() { 114 | System.out.println("Welcome to GCViewer with cmdline"); 115 | System.out.println("java -jar gcviewer.jar [] -> opens gui and loads given file"); 116 | System.out.println("java -jar gcviewer.jar [];[];[...] -> opens gui and loads given files as series of rotated logfiles"); 117 | System.out.println("java -jar gcviewer.jar [] [] -> cmdline: writes report to "); 118 | System.out.println("java -jar gcviewer.jar [];[];[...] [] -> cmdline: loads given files as series of rotated logfiles and writes report to "); 119 | System.out.println("java -jar gcviewer.jar [] [] [] -> cmdline: writes report to and renders gc chart to "); 120 | System.out.println("java -jar gcviewer.jar [];[];[...] [] [] -> cmdline: loads given files as series of rotated logfiles and writes report to and renders gc chart to "); 121 | System.out.println("java -jar gcviewer.jar [] [] [] [-t ]"); 122 | System.out.println("java -jar gcviewer.jar [];[];[...] [] [] [-t ]"); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/appinfo/Stage.java: -------------------------------------------------------------------------------- 1 | package appinfo; 2 | 3 | import com.google.gson.JsonObject; 4 | 5 | import java.util.Map; 6 | import java.util.Set; 7 | import java.util.TreeMap; 8 | 9 | 10 | /* 11 | { 12 | "status" : "COMPLETE", 13 | "stageId" : 0, 14 | "attemptId" : 9, 15 | "numActiveTasks" : 0, 16 | "numCompleteTasks" : 6, 17 | "numFailedTasks" : 0, 18 | "executorRunTime" : 96944, 19 | "executorCpuTime" : 50190625794, 20 | "submissionTime" : "2017-06-22T10:41:45.611GMT", 21 | "firstTaskLaunchedTime" : "2017-06-22T10:41:53.195GMT", 22 | "completionTime" : "2017-06-22T10:42:24.194GMT", 23 | "inputBytes" : 805699584, 24 | "inputRecords" : 6191346, 25 | "outputBytes" : 0, 26 | "outputRecords" : 0, 27 | "shuffleReadBytes" : 0, 28 | "shuffleReadRecords" : 0, 29 | "shuffleWriteBytes" : 770542166, 30 | "shuffleWriteRecords" : 6191346, 31 | "memoryBytesSpilled" : 0, 32 | "diskBytesSpilled" : 0, 33 | "name" : "map at RDDJoinTest.scala:61", 34 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:369)\napplications.sql.rdd.RDDJoinTest$.main(RDDJoinTest.scala:61)\napplications.sql.rdd.RDDJoinTest.main(RDDJoinTest.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:743)\norg.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)\norg.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)", 35 | "schedulingPool" : "default", 36 | "accumulatorUpdates" : [ { 37 | "id" : 10132, 38 | "name" : "internal.metrics.resultSize", 39 | "value" : "14247" 40 | }, { 41 | "id" : 10149, 42 | "name" : "internal.metrics.input.recordsRead", 43 | "value" : "6191346" 44 | }, { 45 | "id" : 10131, 46 | "name" : "internal.metrics.executorCpuTime", 47 | "value" : "50190625794" 48 | }, { 49 | "id" : 10134, 50 | "name" : "internal.metrics.resultSerializationTime", 51 | "value" : "5" 52 | }, { 53 | "id" : 10146, 54 | "name" : "internal.metrics.shuffle.write.recordsWritten", 55 | "value" : "6191346" 56 | }, { 57 | "id" : 10128, 58 | "name" : "internal.metrics.executorDeserializeTime", 59 | "value" : "156" 60 | }, { 61 | "id" : 10145, 62 | "name" : "internal.metrics.shuffle.write.bytesWritten", 63 | "value" : "770542166" 64 | }, { 65 | "id" : 10148, 66 | "name" : "internal.metrics.input.bytesRead", 67 | "value" : "805699584" 68 | }, { 69 | "id" : 10130, 70 | "name" : "internal.metrics.executorRunTime", 71 | "value" : "96944" 72 | }, { 73 | "id" : 10133, 74 | "name" : "internal.metrics.jvmGCTime", 75 | "value" : "2769" 76 | }, { 77 | "id" : 10147, 78 | "name" : "internal.metrics.shuffle.write.writeTime", 79 | "value" : "27463776768" 80 | }, { 81 | "id" : 10129, 82 | "name" : "internal.metrics.executorDeserializeCpuTime", 83 | "value" : "71185751" 84 | } ] 85 | }, 86 | */ 87 | 88 | public class Stage { 89 | 90 | private String appId; 91 | private String appName; 92 | private String stageName; 93 | 94 | private int stageId; 95 | private Map stageAttemptMap = new TreeMap(); 96 | private boolean successful = false; 97 | 98 | public Stage(int stageId, String appId, String appName) { 99 | this.stageId = stageId; 100 | this.appId = appId; 101 | this.appName = appName; 102 | } 103 | 104 | public void addStageAttempt(JsonObject stageObject) { 105 | // stageId = stageObject.get("stageId").getAsInt(); 106 | StageAttempt stageAttempt = new StageAttempt(appId, appName, stageId, stageObject); 107 | 108 | if (stageName == null) 109 | stageName = stageAttempt.getName(); 110 | // Note that the attemptId may not be consistent with the array index. 111 | stageAttemptMap.put(stageAttempt.getAttemptId(), stageAttempt); 112 | 113 | if (stageAttempt.getAttemptId() == 0 && stageAttempt.getStatus().equals("COMPLETE")) 114 | successful = true; 115 | } 116 | 117 | public Set getStageAttemptIds() { 118 | return stageAttemptMap.keySet(); 119 | } 120 | 121 | public String getStageAttemptStatus(int stageAttemptId) { 122 | return stageAttemptMap.get(stageAttemptId).getStatus(); 123 | } 124 | 125 | 126 | public void addTask(int stageAttemptId, JsonObject taskObject) { 127 | StageAttempt stageAttempt = stageAttemptMap.get(stageAttemptId); 128 | stageAttempt.addTask(taskObject); 129 | } 130 | 131 | public void addTaskSummary(int stageAttemptId, JsonObject taskSummaryJsonObject) { 132 | StageAttempt stageAttempt = stageAttemptMap.get(stageAttemptId); 133 | stageAttempt.addTaskSummary(taskSummaryJsonObject); 134 | } 135 | 136 | public StageAttempt getCompletedStage() { 137 | 138 | StageAttempt attempt0 = stageAttemptMap.get(0); 139 | 140 | // only consider the stageAttempt with attemptId = 0 141 | if (attempt0 != null && attempt0.getStatus().equals("COMPLETE")) 142 | return attempt0; 143 | else 144 | return null; 145 | 146 | /* 147 | for (StageAttempt stageAttempt : stageAttemptMap.values()) { 148 | if (stageAttempt.getStatus().equals("COMPLETE")) 149 | return stageAttempt; 150 | } 151 | */ 152 | } 153 | 154 | public StageAttempt getFailedStage() { 155 | for (StageAttempt stageAttempt : stageAttemptMap.values()) { 156 | if (stageAttempt.getStatus().equals("FAILED")) 157 | return stageAttempt; 158 | } 159 | 160 | return null; 161 | } 162 | 163 | public StageAttempt getFirstStage() { 164 | return stageAttemptMap.get(0); 165 | } 166 | 167 | public int getStageId() { 168 | return stageId; 169 | } 170 | 171 | public String getAppId() { 172 | return appId; 173 | } 174 | 175 | public String getAppName() { 176 | return appName; 177 | } 178 | 179 | public String getStageName() { 180 | return stageName; 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /src/python/statistics/HistogramStatistics.py: -------------------------------------------------------------------------------- 1 | class HistogramStatistics: 2 | 3 | def __init__(self, metric): 4 | self.name = metric[0] 5 | self.ylabel = metric[1] 6 | self.unit = metric[2] 7 | # self.title = metric[3] 8 | self.legend = metric[3] 9 | 10 | self.parallel_means = [0, 0, 0] # Executor(1-7G), Executor(2-14G), Executor(4-28G) 11 | self.cms_means = [0, 0, 0] 12 | self.g1_means = [0, 0, 0] 13 | self.parallel_stderr = [0, 0, 0] 14 | self.cms_stderr = [0, 0, 0] 15 | self.g1_stderr = [0, 0, 0] 16 | 17 | self.parallel_quantile95 = [0, 0, 0] # Executor(1-7G), Executor(2-14G), Executor(4-28G) 18 | self.cms_quantile95 = [0, 0, 0] 19 | self.g1_quantile95 = [0, 0, 0] 20 | 21 | 22 | self.exec_1_7G_means = [0, 0, 0] # Parallel, CMS, G1 23 | self.exec_2_14G_means = [0, 0, 0] 24 | self.exec_4_28G_means = [0, 0, 0] 25 | self.exec_1_7G_stderr = [0, 0, 0] 26 | self.exec_2_14G_stderr = [0, 0, 0] 27 | self.exec_4_28G_stderr = [0, 0, 0] 28 | 29 | self.exec_1_7G_quantile95 = [0, 0, 0] # Parallel, CMS, G1 30 | self.exec_2_14G_quantile95 = [0, 0, 0] 31 | self.exec_4_28G_quantile95 = [0, 0, 0] 32 | 33 | self.max = 0 34 | 35 | def parseStatistics(self, line, gcAlgo, index): 36 | self.name = line[line.find('[') + 1: line.find(']')] 37 | metrics = line[line.find(']') + 1:].replace(' ', '').split(',') 38 | 39 | for metric in metrics: 40 | metricName = metric.split('=')[0] 41 | metricValue = float(metric.split('=')[1]) 42 | 43 | if(metricName == "mean"): 44 | if(gcAlgo == "Parallel"): 45 | self.parallel_means[index] = metricValue 46 | elif(gcAlgo == "CMS"): 47 | self.cms_means[index] = metricValue 48 | elif(gcAlgo == "G1"): 49 | self.g1_means[index] = metricValue 50 | 51 | if(metricName == "quantile95"): 52 | if(gcAlgo == "Parallel"): 53 | self.parallel_quantile95[index] = metricValue 54 | elif(gcAlgo == "CMS"): 55 | self.cms_quantile95[index] = metricValue 56 | elif(gcAlgo == "G1"): 57 | self.g1_quantile95[index] = metricValue 58 | 59 | if(metricName == "stdVar"): 60 | if(gcAlgo == "Parallel"): 61 | self.parallel_stderr[index] = metricValue 62 | elif(gcAlgo == "CMS"): 63 | self.cms_stderr[index] = metricValue 64 | elif(gcAlgo == "G1"): 65 | self.g1_stderr[index] = metricValue 66 | 67 | 68 | def addHistogramStatistics(self, line, fileName): 69 | self.name = line[line.find('[') + 1: line.find(']')] 70 | metrics = line[line.find(']') + 1:].replace(' ', '').split(',') 71 | 72 | gcAlgoIndex = 0 73 | gcAlgo = "" 74 | if fileName.lower().find("parallel") != -1: 75 | gcAlgoIndex = 0 76 | gcAlgo = "Parallel" 77 | elif fileName.lower().find("cms") != -1: 78 | gcAlgoIndex = 1 79 | gcAlgo = "CMS" 80 | elif fileName.lower().find("g1") != -1: 81 | gcAlgoIndex = 2 82 | gcAlgo = "G1" 83 | 84 | executorIndex = 0 85 | if fileName.lower().find("1-7g") != -1: 86 | executorIndex = 0 87 | elif fileName.lower().find("2-14g") != -1: 88 | executorIndex = 1 89 | elif fileName.lower().find("4-28g") != -1: 90 | executorIndex = 2 91 | 92 | for metric in metrics: 93 | metricName = metric.split('=')[0] 94 | metricValue = float(metric.split('=')[1]) / self.unit 95 | 96 | if(metricName == "mean"): 97 | if(gcAlgo == "Parallel"): 98 | self.parallel_means[executorIndex] = metricValue 99 | elif(gcAlgo == "CMS"): 100 | self.cms_means[executorIndex] = metricValue 101 | elif(gcAlgo == "G1"): 102 | self.g1_means[executorIndex] = metricValue 103 | self.addExectuorStatistics(executorIndex, gcAlgoIndex, "mean", metricValue) 104 | 105 | if(metricName == "quantile95"): 106 | if(gcAlgo == "Parallel"): 107 | self.parallel_quantile95[executorIndex] = metricValue 108 | elif(gcAlgo == "CMS"): 109 | self.cms_quantile95[executorIndex] = metricValue 110 | elif(gcAlgo == "G1"): 111 | self.g1_quantile95[executorIndex] = metricValue 112 | self.addExectuorStatistics(executorIndex, gcAlgoIndex, "quantile95", metricValue) 113 | 114 | if(metricName == "stdVar"): 115 | if(gcAlgo == "Parallel"): 116 | self.parallel_stderr[executorIndex] = metricValue 117 | elif(gcAlgo == "CMS"): 118 | self.cms_stderr[executorIndex] = metricValue 119 | elif(gcAlgo == "G1"): 120 | self.g1_stderr[executorIndex] = metricValue 121 | self.addExectuorStatistics(executorIndex, gcAlgoIndex, "stdVar", metricValue) 122 | 123 | if(metricName == "max"): 124 | if(metricValue > self.max): 125 | self.max = metricValue 126 | 127 | def addExectuorStatistics(self, executorIndex, gcAlgoIndex, metricName, metricValue): 128 | if(executorIndex == 0): 129 | if(metricName == "mean"): 130 | self.exec_1_7G_means[gcAlgoIndex] = metricValue 131 | elif(metricName == "stdVar"): 132 | self.exec_1_7G_stderr[gcAlgoIndex] = metricValue 133 | elif(metricName == "quantile95"): 134 | self.exec_1_7G_quantile95[gcAlgoIndex] = metricValue 135 | elif(executorIndex == 1): 136 | if(metricName == "mean"): 137 | self.exec_2_14G_means[gcAlgoIndex] = metricValue 138 | elif(metricName == "stdVar"): 139 | self.exec_2_14G_stderr[gcAlgoIndex] = metricValue 140 | elif(metricName == "quantile95"): 141 | self.exec_2_14G_quantile95[gcAlgoIndex] = metricValue 142 | elif(executorIndex == 2): 143 | if(metricName == "mean"): 144 | self.exec_4_28G_means[gcAlgoIndex] = metricValue 145 | elif(metricName == "stdVar"): 146 | self.exec_4_28G_stderr[gcAlgoIndex] = metricValue 147 | elif(metricName == "quantile95"): 148 | self.exec_4_28G_quantile95[gcAlgoIndex] = metricValue -------------------------------------------------------------------------------- /src/main/java/statstics/StageStatistics.java: -------------------------------------------------------------------------------- 1 | package statstics; 2 | 3 | import appinfo.Stage; 4 | import appinfo.StageAttempt; 5 | import appinfo.Task; 6 | import appinfo.TaskAttempt; 7 | import util.Statistics; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.Set; 12 | import java.util.TreeSet; 13 | 14 | /** 15 | * Created by xulijie on 17-7-3. 16 | */ 17 | public class StageStatistics { 18 | 19 | private Set stageId = new TreeSet(); 20 | 21 | private Statistics duration; 22 | 23 | private Statistics executorRunTime; 24 | private Statistics executorCpuTime; 25 | 26 | private Statistics inputBytes; 27 | private Statistics inputRecords; 28 | private Statistics outputBytes; 29 | private Statistics outputRecords; 30 | private Statistics shuffleReadBytes; 31 | private Statistics shuffleReadRecords; 32 | private Statistics shuffleWriteBytes; 33 | private Statistics shuffleWriteRecords; 34 | private Statistics memoryBytesSpilled; 35 | private Statistics diskBytesSpilled; 36 | 37 | // internal.metrics.* 38 | private Statistics resultSize; 39 | private Statistics resultSerializationTime; 40 | private Statistics executorDeserializeTime; 41 | private Statistics jvmGCTime; 42 | private Statistics shuffle_write_writeTime; 43 | private Statistics executorDeserializeCpuTime; 44 | 45 | 46 | private TaskStatistics taskStatistics; 47 | 48 | // In general, we run each application 5 times, so the length of stageWithSameId is 5 49 | public StageStatistics(List stagesWithSameId) { 50 | List stageAttempts = new ArrayList(); 51 | 52 | for (Stage stage : stagesWithSameId) { 53 | StageAttempt stageAttempt = stage.getCompletedStage(); 54 | if (stageAttempt != null) 55 | stageAttempts.add(stageAttempt); 56 | else 57 | System.err.println("[WARN] Stage " + stage.getStageId() + " in " 58 | + stage.getAppName() + "-" + stage.getAppId() + " is not completed!"); 59 | 60 | stageId.add(stage.getStageId()); 61 | 62 | } 63 | 64 | computeStatistics(stageAttempts); 65 | computeTaskStatistics(stageAttempts); 66 | } 67 | 68 | // tasks in stage x from app1, tasks in stage x from app2, tasks in stage x from app3 69 | private void computeTaskStatistics(List stageAttempts) { 70 | List tasksInSameStage = new ArrayList(); 71 | 72 | for(StageAttempt stageAttempt : stageAttempts) 73 | tasksInSameStage.addAll(stageAttempt.getTaskMap().values()); 74 | 75 | taskStatistics = new TaskStatistics(tasksInSameStage, stageId); 76 | } 77 | 78 | private void computeStatistics(List stageAttempts) { 79 | 80 | Object[] stageAttemptObjs = stageAttempts.toArray(); 81 | 82 | duration = new Statistics(stageAttemptObjs, "getDuration"); 83 | 84 | executorRunTime = new Statistics(stageAttemptObjs, "getExecutorRunTime"); 85 | executorCpuTime = new Statistics(stageAttemptObjs, "getExecutorCpuTime"); 86 | 87 | inputBytes = new Statistics(stageAttemptObjs, "getInputBytes"); 88 | inputRecords = new Statistics(stageAttemptObjs, "getInputRecords"); 89 | outputBytes = new Statistics(stageAttemptObjs, "getOutputBytes"); 90 | outputRecords = new Statistics(stageAttemptObjs, "getOutputRecords"); 91 | shuffleReadBytes = new Statistics(stageAttemptObjs, "getShuffleReadBytes"); 92 | shuffleReadRecords = new Statistics(stageAttemptObjs, "getShuffleReadRecords"); 93 | shuffleWriteBytes = new Statistics(stageAttemptObjs, "getShuffleWriteBytes"); 94 | shuffleWriteRecords = new Statistics(stageAttemptObjs, "getShuffleWriteRecords"); 95 | memoryBytesSpilled = new Statistics(stageAttemptObjs, "getMemoryBytesSpilled"); 96 | diskBytesSpilled = new Statistics(stageAttemptObjs, "getDiskBytesSpilled"); 97 | 98 | // internal.metrics.* 99 | resultSize = new Statistics(stageAttemptObjs, "getMetrics_resultSize"); 100 | resultSerializationTime = new Statistics(stageAttemptObjs, "getMetrics_resultSerializationTime"); 101 | executorDeserializeTime = new Statistics(stageAttemptObjs, "getMetrics_executorDeserializeTime"); 102 | jvmGCTime = new Statistics(stageAttemptObjs, "getMetrics_jvmGCTime"); 103 | shuffle_write_writeTime = new Statistics(stageAttemptObjs, "getMetrics_shuffle_write_writeTime"); 104 | executorDeserializeCpuTime = new Statistics(stageAttemptObjs, "getMetrics_executorDeserializeCpuTime"); 105 | } 106 | 107 | @Override 108 | public String toString() { 109 | StringBuilder sb = new StringBuilder(); 110 | 111 | String prefix = "stage" + formatSet(stageId); 112 | 113 | sb.append("[" + prefix + ".duration] " + duration + "\n"); 114 | sb.append("[" + prefix + ".inputBytes] " + inputBytes + "\n"); 115 | sb.append("[" + prefix + ".inputRecords] " + inputRecords + "\n"); 116 | sb.append("[" + prefix + ".outputBytes] " + outputBytes + "\n"); 117 | sb.append("[" + prefix + ".outputRecords] " + outputRecords + "\n"); 118 | sb.append("[" + prefix + ".shuffleReadBytes] " + shuffleReadBytes + "\n"); 119 | sb.append("[" + prefix + ".shuffleReadRecords] " + shuffleReadRecords + "\n"); 120 | sb.append("[" + prefix + ".shuffleWriteBytes] " + shuffleWriteBytes + "\n"); 121 | sb.append("[" + prefix + ".shuffleWriteRecords] " + shuffleWriteRecords + "\n"); 122 | sb.append("[" + prefix + ".memoryBytesSpilled] " + memoryBytesSpilled + "\n"); 123 | sb.append("[" + prefix + ".diskBytesSpilled] " + diskBytesSpilled + "\n"); 124 | 125 | sb.append("[" + prefix + ".resultSize] " + resultSize + "\n"); 126 | sb.append("[" + prefix + ".resultSerializationTime] " + resultSerializationTime + "\n"); 127 | sb.append("[" + prefix + ".executorDeserializeTime] " + executorDeserializeTime + "\n"); 128 | sb.append("[" + prefix + ".jvmGCTime] " + jvmGCTime + "\n"); 129 | sb.append("[" + prefix + ".shuffle_write_writeTime] " + shuffle_write_writeTime + "\n"); 130 | sb.append("[" + prefix + ".executorDeserializeCpuTime] " + executorDeserializeCpuTime + "\n"); 131 | 132 | sb.append(taskStatistics); 133 | 134 | return sb.toString(); 135 | } 136 | 137 | private String formatSet(Set stageId) { 138 | StringBuilder sb = new StringBuilder(); 139 | 140 | for (Integer i : stageId) 141 | sb.append(i + "+"); 142 | 143 | sb.delete(sb.length() - 1, sb.length()); 144 | 145 | return sb.toString(); 146 | } 147 | 148 | public Set getStageId() { 149 | return stageId; 150 | } 151 | 152 | 153 | public TaskAttempt getSlowestTask() { 154 | TaskAttempt slowestTask = taskStatistics.getSlowestTask(); 155 | return slowestTask; 156 | } 157 | } 158 | --------------------------------------------------------------------------------