├── .gitignore
├── LICENSE.txt
├── README.md
├── pom.xml
└── src
    └── main
        └── java
            ├── CommandDispatcherMain.java
            ├── HdrToCsv.java
            ├── ParseAndRunUtil.java
            ├── SplitHistogramLogs.java
            ├── SummarizeHistogramLogs.java
            ├── UnionHistogramLogs.java
            └── psy
                └── lob
                    └── saw
                        ├── HdrHistogramUtil.java
                        ├── HistogramIterator.java
                        ├── HistogramLogScanner.java
                        ├── HistogramSink.java
                        ├── OrderedHistogramLogReader.java
                        └── UnionHistograms.java


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | 
 3 | # Mobile Tools for Java (J2ME)
 4 | .mtj.tmp/
 5 | 
 6 | # Package Files #
 7 | *.jar
 8 | *.war
 9 | *.ear
10 | 
11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
12 | hs_err_pid*
13 | /target/
14 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The code in this repository code was Written by Nitsan Wakart,
 2 | and released to the public domain, as explained at
 3 | http://creativecommons.org/publicdomain/zero/1.0/
 4 | 
 5 | For users of this code who wish to consume it under the "BSD" license
 6 | rather than under the public domain or CC0 contribution text mentioned
 7 | above, the code found under this directory is *also* provided under the
 8 | following license (commonly referred to as the BSD 2-Clause License). This
 9 | license does not detract from the above stated release of the code into
10 | the public domain, and simply represents an additional license granted by
11 | the Author.
12 | 
13 | -----------------------------------------------------------------------------
14 | ** Beginning of "BSD 2-Clause License" text. **
15 | 
16 |  Copyright (c) 2016 Nitsan Wakart
17 |  All rights reserved.
18 | 
19 |  Redistribution and use in source and binary forms, with or without
20 |  modification, are permitted provided that the following conditions are met:
21 | 
22 |  1. Redistributions of source code must retain the above copyright notice,
23 |     this list of conditions and the following disclaimer.
24 | 
25 |  2. Redistributions in binary form must reproduce the above copyright notice,
26 |     this list of conditions and the following disclaimer in the documentation
27 |     and/or other materials provided with the distribution.
28 | 
29 |  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 |  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 |  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 |  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
33 |  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 |  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 |  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 |  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 |  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
39 |  THE POSSIBILITY OF SUCH DAMAGE.
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HdrLogProcessing
  2 | 
  3 | Utilities for HDR Histogram logs manipulation. This repo currently includes utilities for summarizing and unioning of logs.
  4 | 
  5 |  - Requires Maven to build and JDK8 to build/run.
  6 |  - Released under BSD licence.
  7 | 
  8 | For brevity in the following examples, lets assume you built the project and added the following alias:
  9 | 
 10 |     alias hodor=java -jar processor.jar
 11 | 
 12 | ## Summary tool
 13 | Using the above alias run:
 14 | 
 15 |     $ hodor summarize [...]
 16 | 
 17 | SummarizeHistogramLogsRange supports the following options:
 18 | 
 19 |     -start (-s) N                          : relative log start time in seconds, (default: 0.0)
 20 |     -end (-e) N                            : relative log end time in seconds, (default: MAX_DOUBLE)
 21 |     -ignoreTag (-it)                       : summary should not be split by tag, (default: false)
 22 |     -inputFile (-if) VAL                   : add an input hdr log from input path, also takes regexp
 23 |     -inputFilePath (-ifp) VAL              : add an input file by path relative to working dir or absolute
 24 |     -inputPath (-ip) VAL                   : set path to use for input files, defaults to current folder
 25 |     -outputBucketSize (-obs) N             : csv output bucket size, (default: 100)
 26 |     -outputFile (-of) VAL                  : set an output file destination, default goes to sysout
 27 |     -outputValueUnitRatio (-ovr) N         : output value unit ratio, (default: 1.0)
 28 |     -percentilesOutputTicksPerHalf (-tph) N: ticks per half percentile, used for hgrm output, (default: 5)
 29 |     -summaryType (-st) [CSV | PERCENTILES | HGRM]:  : summary type: csv, percentiles, hgrm                                   
 30 |     -verbose (-v) : verbose logging, (default: false)
 31 | 
 32 | This is useful when for example you are face with a histogram log you have collected from your application over time and you wish to summarize the percentiles from the full run:
 33 | 
 34 |      $ hodor summarize -if my-awesome-app-latencies.hdr
 35 |      TotalCount=27663673
 36 |      Period(ms)=205823
 37 |      Throughput(ops/sec)=134405.16
 38 |      Min=263
 39 |      Mean=6561.99
 40 |      50.000ptile=5491
 41 |      90.000ptile=8887
 42 |      99.000ptile=49023
 43 |      99.900ptile=72767
 44 |      99.990ptile=92927
 45 |      99.999ptile=116415
 46 |      Max=145151
 47 | 
 48 | Now perhaps the first 200 seconds of this run are an unstable warmup period I wish to exclude from my summary:
 49 | 
 50 |     $ hodor summarize -if my-awesome-app-latencies.hdr -s 200
 51 | 
 52 | Or maybe I got several logs, from several runs and I want an overall summary, excluding the first 60 seconds of the run and saving the output into a file:
 53 | 
 54 |     $ hodor summarize -if run1.hdr -if run2.hdr -if run3.hdr -s 60 -of runs-summary.out
 55 | 
 56 |     -OR you could use a regexp to get all the files-
 57 | 
 58 |     $ hodor summarize -if ^run.*.hdr -s 60 -of runs-summary.out
 59 | 
 60 | The default output is percentiles as shown above. We support HGRM output if you wish to plot the result with the useful plotter in HdrHistogram, and a CSV format to enable statistical analysis with other tools. The HGRM output with an output file will result in a file per tagged summary with the convention of: _outputfile.tag.hgrm_
 61 | 
 62 | The summary tool supports tags, and if your logs contains histograms of different tags they will get summarized separately. You can use the '-it|ignoreTag' option to summarize all tags together.
 63 | 
 64 | ## Union tool
 65 | Using the above alias run:
 66 | 
 67 |     $ hodor union [...]
 68 | 
 69 | UnionHistogramLogs supports the following options:
 70 | 
 71 |     -end (-e) N                 : relative log end time in seconds, (default: MAX_DOUBLE)
 72 |     -inputFile (-if) VAL        : add an input hdr log from input path, also takes regexp
 73 |     -inputPath (-ip) VAL        : set path to use for input files, defaults to current folder
 74 |     -outputFile (-of) VAL       : set an output file destination, default goes to sysout
 75 |     -relative (-r)              : relative timeline merge, (default: true)
 76 |     -start (-s) N               : relative log start time in seconds, (default: 0.0)
 77 |     -taggedInputFile (-tif) VAL : a <tag>=<filename> add an input file, tag all
 78 |                                   histograms from this file with tag. If histograms
 79 |                                   have a tag it will be conactanated to file tag
 80 |                                   <file-tag>::<histogram-tag>.
 81 |     -verbose (-v)               : verbose logging, (default: false)
 82 | 
 83 | Sometimes you got lots of files, and you really wish you could just throw them all into one file. For example, lets say you used 3 separate load-generating clients to measure your server latencies. You can union all the logs into a single log as follows:
 84 | 
 85 |     $ hodor union -if ^load-gen.*.hdr -of union-load-gens.hdr
 86 | 
 87 | The above union will use absolute time so the result will be as if all load generators were logged from a single source (assuming the clocks are reasonablely in sync). You may want to collect multiple runs timelines into a single union. This is possible using the '-r' option.
 88 | 
 89 | If each load generator represents a different operation you could use tags to differentiate them in the union:
 90 | 
 91 |     $ hodor union -tif READ=load-gen1.hdr -tif READ=load-gen2.hdr -tif WRITE=load-gen3.hdr -of union-load-gens.hdr
 92 | 
 93 | ## Split tool
 94 | Using the above alias run:
 95 | 
 96 |     $ hodor union [...]
 97 | 
 98 | SplitHistogramLogs supports the following options:
 99 | 
100 |     -end (-e) N           : relative log end time in  seconds,    (default: MAX_DOUBLE)
101 |     -excludeTag (-et) VAL : add a tag to exclude from input, 'default' is a special tag for the null tag.
102 |     -includeTag (-it) VAL : when include tags are used only the explicitly included will be split out, 'default' is a special tag for the null tag.
103 |     -inputFile (-if) VAL  : set the input hdr log from input     path
104 |     -inputPath (-ip) VAL  : set path to use for input files, defaults to current folder
105 |     -start (-s) N         : relative log start time in  seconds,    (default: 0.0)
106 |     -verbose (-v)         : verbose logging, (default:  false)
107 | 
108 | Some tools do not support tags yet, so you may want to split a log into several logs for post processing.
109 | 
110 |     $ hodor split -if taggyLog.hdr
111 | 
112 | Will result in the creation of a log file per tag, with the default tag going to the 'default' file. So tags A,B,C will end up in the files A.taggyLog.hdr, B.taggyLog.hdr, C.taggyLog.hdr respectively.
113 | If you need only certain tags A,B:
114 | 
115 |     $ hodor split -if taggyLog.hdr -it A -it B
116 | 
117 | ## HDR to CSV tool
118 | 
119 | Using the above alias, run:
120 | 
121 |     $ hodor to-csv -i INPUT_FILE
122 | 
123 | It will result in a strict transformation of a log file to CSV.  Intervals will
124 | be preserved.  For each interval, important percentiles will be written in
125 | dedicated columns.  The resulting CSV is printed on stdout.
126 | 
127 | Example usage:
128 | 
129 | ```
130 | $ hodor to-csv -i input.hgrm | tee output.csv
131 | #Timestamp,Throughput,Min,Avg,p50,p90,p95,p99,p999,p9999,Max
132 | 1523292112.000,2364,60608,143515,113599,221823,268543,442367,1638399,6348799,6348799
133 | 1523292113.000,192,64672,130923,116287,188031,205823,260351,366591,366591,366591
134 | 1523292114.000,384,67520,144460,118527,213759,264703,414463,1793023,1793023,1793023
135 | ...
136 | ```
137 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 | 	<modelVersion>4.0.0</modelVersion>
 4 | 	<groupId>psy.lob.saw</groupId>
 5 | 	<artifactId>HdrLogProcessing</artifactId>
 6 | 	<packaging>jar</packaging>
 7 | 	<version>1.0-SNAPSHOT</version>
 8 | 	<name>HdrLogProcessing</name>
 9 | 
10 | 	<dependencies>
11 | 		<dependency>
12 | 			<groupId>org.hdrhistogram</groupId>
13 | 			<artifactId>HdrHistogram</artifactId>
14 | 			<version>2.1.10</version>
15 | 		</dependency>
16 | 		<dependency>
17 | 			<groupId>args4j</groupId>
18 | 			<artifactId>args4j</artifactId>
19 | 			<version>2.33</version>
20 | 		</dependency>
21 | 	</dependencies>
22 | 
23 | 	<prerequisites>
24 | 		<maven>3.0</maven>
25 | 	</prerequisites>
26 | 
27 | 	<properties>
28 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
29 | 	</properties>
30 | 
31 | 	<build>
32 | 		<plugins>
33 | 			<plugin>
34 | 				<groupId>org.apache.maven.plugins</groupId>
35 | 				<artifactId>maven-compiler-plugin</artifactId>
36 | 				<version>2.3.2</version>
37 | 				<configuration>
38 | 					<compilerVersion>1.8</compilerVersion>
39 | 					<source>1.8</source>
40 | 					<target>1.8</target>
41 | 				</configuration>
42 | 			</plugin>
43 | 			<plugin>
44 | 				<artifactId>maven-assembly-plugin</artifactId>
45 | 				<executions>
46 | 					<execution>
47 | 						<phase>package</phase>
48 | 						<goals>
49 | 							<goal>single</goal>
50 | 						</goals>
51 | 					</execution>
52 | 				</executions>
53 | 				<configuration>
54 | 					<finalName>processor</finalName>
55 | 					<appendAssemblyId>false</appendAssemblyId>
56 | 					<descriptorRefs>
57 | 						<descriptorRef>jar-with-dependencies</descriptorRef>
58 | 					</descriptorRefs>
59 | 					<archive>
60 | 						<manifest>
61 | 							<mainClass>CommandDispatcherMain</mainClass>
62 | 						</manifest>
63 | 					</archive>
64 | 				</configuration>
65 | 			</plugin>
66 | 		</plugins>
67 | 	</build>
68 | </project>
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/src/main/java/CommandDispatcherMain.java:
--------------------------------------------------------------------------------
 1 | import java.util.Arrays;
 2 | import java.util.stream.Stream;
 3 | 
 4 | /**
 5 |  * This class is the main entry point of HdrLogProcessing.  It consumes the
 6 |  * first CLI parameter entered by the user and tries to match it against known
 7 |  * {@link Command}s.  If a match is found, then the {@code main(String[] args}
 8 |  * method of the class responsible for said command is invoked.
 9 |  * <p>
10 |  * This class is mostly a shortcut so that the user does not need to remember
11 |  * the class name to be invoked (like {@code UnionHistogramLogs}).
12 |  */
13 | public class CommandDispatcherMain
14 | {
15 |     private static void usage()
16 |     {
17 |         System.err.println("Usage: hodor COMMAND [options...]");
18 |         System.err.println("");
19 |         System.err.println("Valid commands:");
20 |         for (Command command : Command.values())
21 |         {
22 |             System.err.println("  " + command.niceName());
23 |         }
24 |     }
25 | 
26 |     public static void main(String[] args) throws Exception
27 |     {
28 |         if (args.length < 1)
29 |         {
30 |             System.err.println("Error: missing command");
31 |             usage();
32 |         }
33 |         else if (!Command.isValid(args[0]))
34 |         {
35 |             System.err.println("Error: invalid command '" + args[0] + "'");
36 |             usage();
37 |         }
38 |         else
39 |         {
40 |             // Remove the command name from `args` so that the all the remaining
41 |             // arguments can be passed to the underlying class.
42 |             String[] withoutCommand = Arrays.copyOfRange(args, 1, args.length);
43 |             Command.fromUserInput(args[0])
44 |                 .mainClass
45 |                 .getMethod("main", String[].class)
46 |                 .invoke(null, (Object) withoutCommand);
47 |         }
48 |     }
49 | 
50 |     private enum Command
51 |     {
52 |         TO_CSV(HdrToCsv.class),
53 |         SPLIT(SplitHistogramLogs.class),
54 |         SUMMARIZE(SummarizeHistogramLogs.class),
55 |         UNION(UnionHistogramLogs.class);
56 | 
57 |         private final Class<?> mainClass;
58 | 
59 |         Command(Class<?> mainClass)
60 |         {
61 |             this.mainClass = mainClass;
62 |         }
63 | 
64 |         private static boolean isValid(String command)
65 |         {
66 |             return Stream.of(values())
67 |                 .anyMatch(c -> c.niceName().equals(command));
68 |         }
69 | 
70 |         private static Command fromUserInput(String command)
71 |         {
72 |             return Stream.of(values())
73 |                 .filter(c -> c.niceName().equals(command))
74 |                 .findFirst()
75 |                 .orElseThrow(() -> new IllegalArgumentException(
76 |                     "Invalid command '" + command + "'"));
77 |         }
78 | 
79 |         private static String sanitize(String s)
80 |         {
81 |             return s.replace("_", "-").toLowerCase();
82 |         }
83 | 
84 |         private String niceName()
85 |         {
86 |             return sanitize(name());
87 |         }
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/java/HdrToCsv.java:
--------------------------------------------------------------------------------
 1 | import org.HdrHistogram.Histogram;
 2 | import org.kohsuke.args4j.Option;
 3 | import psy.lob.saw.OrderedHistogramLogReader;
 4 | 
 5 | import java.io.File;
 6 | import java.io.FileNotFoundException;
 7 | import java.nio.file.Paths;
 8 | import java.util.Locale;
 9 | 
10 | public class HdrToCsv implements Runnable
11 | {
12 |     private File inputFile;
13 | 
14 |     public static void main(String[] args)
15 |     {
16 |         ParseAndRunUtil.parseParamsAndRun(args, new HdrToCsv());
17 |     }
18 | 
19 |     @Option(name = "--input-file",
20 |         aliases = "-i",
21 |         usage = "Relative or absolute path to the input file to read",
22 |         required = true)
23 |     public void setInputFile(String fileName)
24 |     {
25 |         File in = Paths.get(fileName).toFile();
26 |         if (!in.exists())
27 |         {
28 |             throw new IllegalArgumentException(
29 |                 "Input file " + fileName + " does not exist");
30 |         }
31 |         inputFile = in;
32 |     }
33 | 
34 |     @Override
35 |     public void run()
36 |     {
37 |         OrderedHistogramLogReader reader = null;
38 |         try
39 |         {
40 |             reader = new OrderedHistogramLogReader(inputFile);
41 |         }
42 |         catch (FileNotFoundException e)
43 |         {
44 |             throw new RuntimeException(e);
45 |         }
46 | 
47 |         System.out.println(
48 |             "#Absolute timestamp,Relative timestamp,Throughput,Min,Avg,p50,p90,p95,p99,p999,p9999,Max");
49 |         while (reader.hasNext())
50 |         {
51 |             Histogram interval = (Histogram) reader.nextIntervalHistogram();
52 |             System.out.printf(Locale.US,
53 |                 "%.3f,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d%n",
54 |                 interval.getStartTimeStamp() / 1000.0,
55 |                 interval.getStartTimeStamp() / 1000 - (long) reader.getStartTimeSec(),
56 |                 interval.getTotalCount(), interval.getMinValue(),
57 |                 (long) interval.getMean(),
58 |                 interval.getValueAtPercentile(50),
59 |                 interval.getValueAtPercentile(90),
60 |                 interval.getValueAtPercentile(95),
61 |                 interval.getValueAtPercentile(99),
62 |                 interval.getValueAtPercentile(99.9),
63 |                 interval.getValueAtPercentile(99.99),
64 |                 interval.getMaxValue());
65 |         }
66 |     }
67 | }


--------------------------------------------------------------------------------
/src/main/java/ParseAndRunUtil.java:
--------------------------------------------------------------------------------
 1 | import org.kohsuke.args4j.CmdLineException;
 2 | import org.kohsuke.args4j.CmdLineParser;
 3 | 
 4 | class ParseAndRunUtil
 5 | {
 6 |     static void parseParamsAndRun(String[] args, Runnable app)
 7 |     {
 8 |         CmdLineParser parser = new CmdLineParser(app);
 9 |         try
10 |         {
11 |             parser.parseArgument(args);
12 |             app.run();
13 |         }
14 |         catch (CmdLineException | IllegalArgumentException e)
15 |         {
16 |             System.out.println(e.getMessage());
17 |             parser.printUsage(System.out);
18 |         }
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/SplitHistogramLogs.java:
--------------------------------------------------------------------------------
  1 | import org.HdrHistogram.Histogram;
  2 | import org.HdrHistogram.HistogramLogWriter;
  3 | import org.kohsuke.args4j.Option;
  4 | import psy.lob.saw.HdrHistogramUtil;
  5 | import psy.lob.saw.OrderedHistogramLogReader;
  6 | 
  7 | import java.io.File;
  8 | import java.io.FileNotFoundException;
  9 | import java.util.HashMap;
 10 | import java.util.HashSet;
 11 | import java.util.Map;
 12 | import java.util.Set;
 13 | 
 14 | import static psy.lob.saw.HdrHistogramUtil.logHistogramForVerbose;
 15 | 
 16 | public class SplitHistogramLogs implements Runnable
 17 | {
 18 |     @Option(name = "-start", aliases = "-s", usage = "relative log start time in seconds, (default: 0.0)", required = false)
 19 |     public double start = 0.0;
 20 | 
 21 |     @Option(name = "-end", aliases = "-e", usage = "relative log end time in seconds, (default: MAX_DOUBLE)", required = false)
 22 |     public double end = Double.MAX_VALUE;
 23 | 
 24 |     @Option(name = "-verbose", aliases = "-v", usage = "verbose logging, (default: false)", required = false)
 25 |     public boolean verbose = false;
 26 |     private File inputPath = new File(".");
 27 |     private File inputFile;
 28 |     private Set<String> excludeTags = new HashSet<>();
 29 |     private Set<String> includeTags = new HashSet<>();
 30 | 
 31 |     public static void main(String[] args) throws Exception
 32 |     {
 33 |         ParseAndRunUtil.parseParamsAndRun(args, new SplitHistogramLogs());
 34 |     }
 35 | 
 36 |     @Option(name = "-inputPath", aliases = "-ip", usage = "set path to use for input files, defaults to current folder", required = false)
 37 |     public void setInputPath(String inputFolderName)
 38 |     {
 39 |         inputPath = new File(inputFolderName);
 40 |         if (!inputPath.exists())
 41 |         {
 42 |             throw new IllegalArgumentException("inputPath:" + inputFolderName + " must exist!");
 43 |         }
 44 |         if (!inputPath.isDirectory())
 45 |         {
 46 |             throw new IllegalArgumentException("inputPath:" + inputFolderName + " must be a directory!");
 47 |         }
 48 |     }
 49 | 
 50 |     @Option(name = "-inputFile", aliases = "-if", usage = "set the input hdr log from input path", required = true)
 51 |     public void setInputFile(String inputFileName)
 52 |     {
 53 |         inputFile = new File(inputPath, inputFileName);
 54 |         if (!inputFile.exists())
 55 |         {
 56 | 
 57 |             inputFile = new File(inputFileName);
 58 |             if (!inputFile.exists())
 59 |             {
 60 |                 throw new IllegalArgumentException("inputFile:" + inputFileName + " must exist!");
 61 |             }
 62 |         }
 63 | 
 64 |     }
 65 | 
 66 |     @Option(name = "-excludeTag", aliases = "-et", usage = "add a tag to filter from input, 'default' is a special tag for the null tag.", required = false)
 67 |     public void addExcludeTag(String tag)
 68 |     {
 69 |         excludeTags.add(tag);
 70 |     }
 71 | 
 72 |     @Option(name = "-includeTag", aliases = "-it", usage = "when include tags are used only the explicitly included will be split out, 'default' is a special tag for the null tag.", required = false)
 73 |     public void addIncludeTag(String tag)
 74 |     {
 75 |         includeTags.add(tag);
 76 |     }
 77 | 
 78 |     @Override
 79 |     public void run()
 80 |     {
 81 |         if (verbose)
 82 |         {
 83 |             String absolutePath = inputPath.getAbsolutePath();
 84 |             String name = inputFile.getName();
 85 |             if (end != Double.MAX_VALUE)
 86 |             {
 87 |                 System.out.printf("start:%.2f end:%.2f path:%s file:%s \n", start, end, absolutePath, name);
 88 |             }
 89 |             else
 90 |             {
 91 |                 System.out.printf("start:%.2f end: MAX path:%s file:%s \n", start, absolutePath, name);
 92 |             }
 93 |         }
 94 |         try
 95 |         {
 96 |             split();
 97 |         }
 98 |         catch (Exception e)
 99 |         {
100 |             throw new RuntimeException(e);
101 |         }
102 |     }
103 | 
104 |     private void split() throws FileNotFoundException
105 |     {
106 |         OrderedHistogramLogReader reader = new OrderedHistogramLogReader(
107 |             inputFile,
108 |             start,
109 |             end,
110 |             tag -> shouldSkipTag(tag));
111 |         Map<String, HistogramLogWriter> writerByTag = new HashMap<>();
112 |         Histogram interval;
113 |         int i = 0;
114 |         while (reader.hasNext())
115 |         {
116 |             interval = (Histogram) reader.nextIntervalHistogram();
117 |             if (interval == null)
118 |             {
119 |                 continue;
120 |             }
121 |             String ntag = interval.getTag();
122 |             if (shouldSkipTag(ntag))
123 |             {
124 |                 throw new IllegalStateException("Should be filtered upfront by the reader");
125 |             }
126 |             if (verbose)
127 |             {
128 |                 logHistogramForVerbose(System.out, interval, i++);
129 |             }
130 |             interval.setTag(null);
131 |             HistogramLogWriter writer = writerByTag.computeIfAbsent(ntag, k -> createWriterForTag(reader, k));
132 |             writer.outputIntervalHistogram(interval);
133 | 
134 |         }
135 |     }
136 | 
137 |     private boolean shouldSkipTag(String ntag)
138 |     {
139 |         ntag = (ntag == null) ? "default" : ntag;
140 |         return excludeTags.contains(ntag) || (!includeTags.isEmpty() && !includeTags.contains(ntag));
141 |     }
142 | 
143 |     private HistogramLogWriter createWriterForTag(OrderedHistogramLogReader reader, String tag)
144 |     {
145 |         tag = (tag == null) ? "default" : tag;
146 |         File outputFile = new File(tag + "." + inputFile.getName());
147 |         String comment = "Splitting of:" + inputFile.getName() + " start:" + start + " end:" + end;
148 |         HistogramLogWriter writer = HdrHistogramUtil.createLogWriter(outputFile, comment, reader.getStartTimeSec());
149 |         return writer;
150 |     }
151 | }
152 | 


--------------------------------------------------------------------------------
/src/main/java/SummarizeHistogramLogs.java:
--------------------------------------------------------------------------------
  1 | import org.HdrHistogram.Histogram;
  2 | import org.HdrHistogram.HistogramIterationValue;
  3 | import org.kohsuke.args4j.Option;
  4 | import psy.lob.saw.OrderedHistogramLogReader;
  5 | 
  6 | import java.io.File;
  7 | import java.io.FileNotFoundException;
  8 | import java.io.FileOutputStream;
  9 | import java.io.PrintStream;
 10 | import java.util.*;
 11 | import java.util.function.Predicate;
 12 | import java.util.regex.Pattern;
 13 | 
 14 | import static psy.lob.saw.HdrHistogramUtil.logHistogramForVerbose;
 15 | 
 16 | public class SummarizeHistogramLogs implements Runnable
 17 | {
 18 | 
 19 |     @Option(name = "-ignoreTag", aliases = "-it", usage = "summary should not be split by tag, (default: false)", required = false)
 20 |     public boolean ignoreTag = false;
 21 |     @Option(name = "-ignoreTimeStamps", aliases = "-its", usage = "summary should ignore time stamps for period calculation, use interval length instead, (default: false)", required = false)
 22 |     public boolean ignoreTimeStamps = false;
 23 |     @Option(name = "-start", aliases = "-s", usage = "relative log start time in seconds, (default: 0.0)", required = false)
 24 |     public double start = 0.0;
 25 |     @Option(name = "-end", aliases = "-e", usage = "relative log end time in seconds, (default: MAX_DOUBLE)", required = false)
 26 |     public double end = Double.MAX_VALUE;
 27 |     @Option(name = "-verbose", aliases = "-v", usage = "verbose logging, (default: false)", required = false)
 28 |     public boolean verbose = false;
 29 |     @Option(name = "-summaryType", aliases = "-st", usage = "summary type: percentiles, csv [linear buckets], csve [exponential buckets],  csvr [raw hdr buckets],  hgrm (default: percentiles)", required = false)
 30 |     public SummaryType summaryType = SummaryType.PERCENTILES;
 31 |     @Option(name = "-percentilesOutputTicksPerHalf", aliases = "-tph", usage = "ticks per half percentile, used for hgrm output, (default: 5)", required = false)
 32 |     public int percentilesOutputTicksPerHalf = 5;
 33 |     @Option(name = "-outputValueUnitRatio", aliases = "-ovr", usage = "output value unit ratio, (default: 1.0)", required = false)
 34 |     public double outputValueUnitRatio = 1.0;
 35 |     @Option(name = "-outputBucketSize", aliases = "-obs", usage = "csv output bucket size, (default: 100)", required = false)
 36 |     public long outputBucketSize = 100;
 37 |     @Option(name = "-outputFile", aliases = "-of", usage = "set an output file destination, default goes to sysout", required = false)
 38 |     public String outputFile;
 39 |     @Option(name = "-excludeTag", aliases = "-excT", usage = "add a tag to filter from input, 'default' is a special tag for the null tag.", required = false)
 40 |     public void addExcludeTag(String tag)
 41 |     {
 42 |         excludeTags.add(tag);
 43 |     }
 44 | 
 45 |     @Option(name = "-includeTag", aliases = "-incT", usage = "when include tags are used only the explicitly included will be split out, 'default' is a special tag for the null tag.", required = false)
 46 |     public void addIncludeTag(String tag)
 47 |     {
 48 |         includeTags.add(tag);
 49 |     }
 50 |     private Set<String> excludeTags = new HashSet<>();
 51 |     private Set<String> includeTags = new HashSet<>();
 52 | 
 53 | 
 54 |     private File inputPath = new File(".");
 55 |     private Set<File> inputFiles = new HashSet<>();
 56 | 
 57 |     public static void main(String[] args)
 58 |     {
 59 |         ParseAndRunUtil.parseParamsAndRun(args, new SummarizeHistogramLogs());
 60 |     }
 61 | 
 62 |     @Option(name = "-inputPath", aliases = "-ip", usage = "set path to use for input files, defaults to current folder", required = false)
 63 |     public void setInputPath(String inputFolderName)
 64 |     {
 65 |         inputPath = new File(inputFolderName);
 66 |         if (!inputPath.exists())
 67 |         {
 68 |             throw new IllegalArgumentException("inputPath:" + inputFolderName + " must exist!");
 69 |         }
 70 |         if (!inputPath.isDirectory())
 71 |         {
 72 |             throw new IllegalArgumentException("inputPath:" + inputFolderName + " must be a directory!");
 73 |         }
 74 |     }
 75 | 
 76 |     @Option(name = "-inputFile", aliases = "-if", usage = "add an input hdr log from input path, also takes regexp", required = false)
 77 |     public void addInputFile(String inputFile)
 78 |     {
 79 |         final Predicate<String> predicate = Pattern.compile(inputFile).asPredicate();
 80 |         inputFiles.addAll(
 81 |             Arrays.asList(
 82 |                 inputPath.listFiles(pathname ->
 83 |                 {
 84 |                     return predicate.test(pathname.getName());
 85 |                 })
 86 |             )
 87 |         );
 88 |     }
 89 | 
 90 |     @Option(name = "-inputFilePath", aliases = "-ifp", usage = "add an input file by path relative to working dir or absolute", required = false)
 91 |     public void addInputFileAbs(String inputFileName)
 92 |     {
 93 |         File in = new File(inputFileName);
 94 |         if (!in.exists())
 95 |         {
 96 |             throw new IllegalArgumentException("file:" + inputFileName + " must exist!");
 97 |         }
 98 |         inputFiles.add(in);
 99 |     }
100 | 
101 |     public void run()
102 |     {
103 |         if (verbose)
104 |         {
105 |             if (end != Double.MAX_VALUE)
106 |             {
107 |                 System.out.printf("start:%.2f end:%.2f path:%s%n", start, end, inputPath.getAbsolutePath());
108 |             }
109 |             else
110 |             {
111 |                 System.out.printf("start:%.2f end: MAX path:%s%n", start, inputPath.getAbsolutePath());
112 |             }
113 |         }
114 |         if (inputFiles.isEmpty())
115 |         {
116 |             throw new IllegalArgumentException("Error: please specify inputs");
117 |         }
118 | 
119 |         try
120 |         {
121 |             summarizeAndPrint();
122 |         }
123 |         catch (Exception e)
124 |         {
125 |             throw new RuntimeException(e);
126 |         }
127 |     }
128 | 
129 |     private void summarizeAndPrint() throws FileNotFoundException
130 |     {
131 |         Map<String, Histogram> sumByTag = new HashMap<>();
132 | 
133 |         long period = 0;
134 |         long intervalLengthSum = 0;
135 |         for (File inputFile : inputFiles)
136 |         {
137 |             if (verbose)
138 |             {
139 |                 System.out.println("Summarizing file: " + inputFile.getName());
140 |             }
141 |             OrderedHistogramLogReader reader = new OrderedHistogramLogReader(
142 |                 inputFile,
143 |                 start,
144 |                 end,
145 |                 tag -> shouldSkipTag(tag));
146 |             Histogram interval;
147 |             int i = 0;
148 |             boolean first = true;
149 |             long startTime = 0;
150 | 
151 |             while (reader.hasNext())
152 |             {
153 |                 interval = (Histogram) reader.nextIntervalHistogram();
154 |                 if (interval == null)
155 |                 {
156 |                     continue;
157 |                 }
158 |                 if (first)
159 |                 {
160 |                     first = false;
161 |                     startTime = interval.getStartTimeStamp();
162 |                     if (verbose)
163 |                     {
164 |                         System.out.println("StartTime: " + new Date(startTime));
165 |                     }
166 | 
167 |                 }
168 |                 String ntag = ignoreTag ? null : interval.getTag();
169 |                 final int numberOfSignificantValueDigits = interval.getNumberOfSignificantValueDigits();
170 |                 Histogram sum = sumByTag.computeIfAbsent(ntag, k ->
171 |                 {
172 |                     Histogram h = new Histogram(numberOfSignificantValueDigits);
173 |                     h.setTag(k);
174 |                     return h;
175 |                 });
176 |                 final long intervalLength = interval.getEndTimeStamp() - interval.getStartTimeStamp();
177 |                 intervalLengthSum += intervalLength;
178 |                 sum.add(interval);
179 |                 if (verbose)
180 |                 {
181 |                     logHistogramForVerbose(System.out, interval, i++, outputValueUnitRatio);
182 |                 }
183 |             }
184 |             // calculate period
185 |             long maxPeriod = 0;
186 |             for (Histogram sum : sumByTag.values())
187 |             {
188 |                 long sumPeriod = (sum.getEndTimeStamp() - sum.getStartTimeStamp());
189 |                 if (verbose)
190 |                 {
191 |                     System.out.print(inputFile.getName());
192 |                     System.out.print(", ");
193 |                     logHistogramForVerbose(System.out, sum, i++, outputValueUnitRatio);
194 |                 }
195 |                 sum.setEndTimeStamp(0);
196 |                 sum.setStartTimeStamp(Long.MAX_VALUE);
197 |                 maxPeriod = Math.max(maxPeriod, sumPeriod);
198 |             }
199 |             period += maxPeriod;
200 |         }
201 |         if (ignoreTimeStamps)
202 |         {
203 |             period = intervalLengthSum;
204 |         }
205 |         for (Histogram sum : sumByTag.values())
206 |         {
207 |             String tag = (sum.getTag() == null) ? "" : "." + sum.getTag();
208 |             PrintStream out = getOut(tag);
209 |             switch (summaryType)
210 |             {
211 |                 case PERCENTILES:
212 |                     printPercentiles(out, sum, period);
213 |                     break;
214 |                 case CSV:
215 |                     printCsv(out, sum);
216 |                     break;
217 |                 case CSVE:
218 |                     printCsvE(out, sum);
219 |                     break;
220 |                 case CSVR:
221 |                     printCsvR(out, sum);
222 |                     break;
223 |                 case HGRM:
224 |                     printHgrm(out, sum);
225 |                     break;
226 |                 default:
227 |                     throw new IllegalStateException();
228 |             }
229 |         }
230 |     }
231 |     
232 |     private boolean shouldSkipTag(String ntag)
233 |     {
234 |         ntag = (ntag == null) ? "default" : ntag;
235 |         return excludeTags.contains(ntag) || (!includeTags.isEmpty() && !includeTags.contains(ntag));
236 |     }
237 |     
238 |     private PrintStream getOut(String tag) throws FileNotFoundException
239 |     {
240 |         PrintStream report = System.out;
241 |         if (outputFile != null)
242 |         {
243 |             report = new PrintStream(new FileOutputStream(outputFile + tag + ".hgrm"));
244 |         }
245 |         return report;
246 |     }
247 | 
248 |     private void printHgrm(PrintStream out, Histogram sum)
249 |     {
250 |         sum.outputPercentileDistribution(out, percentilesOutputTicksPerHalf, outputValueUnitRatio);
251 |     }
252 | 
253 |     private void printPercentiles(PrintStream out, Histogram sum, long period)
254 |     {
255 |         double avgThpt = (sum.getTotalCount() * 1000.0) / period;
256 |         String tag = (sum.getTag() == null) ? "" : sum.getTag() + ".";
257 |         out.printf("%sTotalCount=%d%n", tag, sum.getTotalCount());
258 |         out.printf("%sPeriod(ms)=%d%n", tag, period);
259 |         out.printf("%sThroughput(ops/sec)=%.2f%n", tag, avgThpt);
260 |         out.printf("%sMin=%d%n", tag, (long) (sum.getMinValue() / outputValueUnitRatio));
261 |         out.printf("%sMean=%.2f%n", tag, sum.getMean() / outputValueUnitRatio);
262 |         out.printf("%sStdDev=%.2f%n", tag, sum.getStdDeviation() / outputValueUnitRatio);
263 |         out.printf("%s50.000ptile=%d%n", tag, (long) (sum.getValueAtPercentile(50) / outputValueUnitRatio));
264 |         out.printf("%s90.000ptile=%d%n", tag, (long) (sum.getValueAtPercentile(90) / outputValueUnitRatio));
265 |         out.printf("%s99.000ptile=%d%n", tag, (long) (sum.getValueAtPercentile(99) / outputValueUnitRatio));
266 |         out.printf("%s99.900ptile=%d%n", tag, (long) (sum.getValueAtPercentile(99.9) / outputValueUnitRatio));
267 |         out.printf("%s99.990ptile=%d%n", tag, (long) (sum.getValueAtPercentile(99.99) / outputValueUnitRatio));
268 |         out.printf("%s99.999ptile=%d%n", tag, (long) (sum.getValueAtPercentile(99.999) / outputValueUnitRatio));
269 |         out.printf("%sMax=%d%n", tag, (long) (sum.getMaxValue() / outputValueUnitRatio));
270 |     }
271 | 
272 |     private void printCsv(PrintStream out, Histogram sum)
273 |     {
274 |         long min = (long) (sum.getMinValue() / outputValueUnitRatio);
275 |         long max = (long) (sum.getMaxValue() / outputValueUnitRatio);
276 |         long bucketStart = (min / outputBucketSize) * outputBucketSize;
277 |         out.println("BucketStart, Count");
278 |         for (; bucketStart < max; bucketStart += outputBucketSize)
279 |         {
280 |             long s = (long) (bucketStart * outputValueUnitRatio);
281 |             long e = (long) ((bucketStart + outputBucketSize) * outputValueUnitRatio);
282 |             long count = sum.getCountBetweenValues(s, e);
283 |             out.print(bucketStart);
284 |             out.print(",");
285 |             out.println(count);
286 |         }
287 |     }
288 |     
289 |     private void printCsvE(PrintStream out, Histogram sum)
290 |     {
291 |         long max = (long) (sum.getMaxValue() / outputValueUnitRatio);
292 |         long bucketStart = 0;
293 |         out.println("BucketStart, Count");
294 |         while (bucketStart < max)
295 |         {
296 |             long nextBucketStart = (bucketStart == 0) ? outputBucketSize : bucketStart * 2;
297 |             long s = (long) (bucketStart * outputValueUnitRatio);
298 |             long e = (long) (nextBucketStart * outputValueUnitRatio);
299 |             long count = sum.getCountBetweenValues(s, e);
300 |             out.print(bucketStart);
301 |             out.print(",");
302 |             out.println(count);
303 |             bucketStart = nextBucketStart;
304 |         }
305 |     }
306 | 
307 |     private void printCsvR(PrintStream out, Histogram sum)
308 |     {
309 |         out.println("BucketStart, Count");
310 |         long currentValue = -1;
311 |         long currentValueCount = 0;
312 |         for (HistogramIterationValue value : sum.recordedValues())
313 |         {
314 |             long newValue = (long) (value.getValueIteratedTo() / outputValueUnitRatio);
315 |             if (newValue != currentValue)
316 |             {
317 |                 if (currentValueCount != 0)
318 |                 {
319 |                     out.print(currentValue);
320 |                     out.print(",");
321 |                     out.println(currentValueCount);
322 |                     currentValueCount = 0;
323 |                 }
324 |                 currentValue = newValue;
325 |             }
326 |             currentValueCount += value.getCountAtValueIteratedTo();
327 |         }
328 |         // last value
329 |         if (currentValueCount != 0)
330 |         {
331 |             out.print(currentValue);
332 |             out.print(",");
333 |             out.println(currentValueCount);
334 |         }
335 |     }
336 | 
337 |     enum SummaryType
338 |     {
339 |         CSV, CSVE, CSVR, PERCENTILES, HGRM
340 |     }
341 | }
342 | 


--------------------------------------------------------------------------------
/src/main/java/UnionHistogramLogs.java:
--------------------------------------------------------------------------------
  1 | import org.HdrHistogram.Histogram;
  2 | import org.HdrHistogram.HistogramLogWriter;
  3 | import org.kohsuke.args4j.Option;
  4 | import psy.lob.saw.HistogramIterator;
  5 | import psy.lob.saw.HistogramSink;
  6 | import psy.lob.saw.OrderedHistogramLogReader;
  7 | import psy.lob.saw.UnionHistograms;
  8 | 
  9 | import java.io.File;
 10 | import java.io.FileOutputStream;
 11 | import java.io.PrintStream;
 12 | import java.util.*;
 13 | import java.util.function.Predicate;
 14 | import java.util.regex.Pattern;
 15 | 
 16 | import static psy.lob.saw.HdrHistogramUtil.createLogWriter;
 17 | 
 18 | public class UnionHistogramLogs implements Runnable
 19 | {
 20 | 
 21 |     @Option(name = "-start", aliases = "-s", usage = "relative log start time in seconds, (default: 0.0)", required = false)
 22 |     public double start = 0.0;
 23 | 
 24 |     @Option(name = "-end", aliases = "-e", usage = "relative log end time in seconds, (default: MAX_DOUBLE)", required = false)
 25 |     public double end = Double.MAX_VALUE;
 26 | 
 27 |     @Option(name = "-verbose", aliases = "-v", usage = "verbose logging, (default: false)", required = false)
 28 |     public boolean verbose = false;
 29 | 
 30 |     @Option(name = "-relative", aliases = "-r", usage = "relative timeline merge, (default: false)", required = false)
 31 |     public boolean relative = false;
 32 | 
 33 |     @Option(name = "-targetUnionSec", aliases = "-tus", usage = "target union interval length in seconds, (default: 0.0, which will use existing interval lengths)", required = false)
 34 |     public double targetUnionSec = 0.0;
 35 | 
 36 |     private File inputPath = new File(".");
 37 |     private Set<File> inputFiles = new HashSet<>();
 38 |     private Map<File, String> inputFilesTags = new HashMap<>();
 39 |     private File outputFile;
 40 | 
 41 |     public static void main(String[] args)
 42 |     {
 43 |         ParseAndRunUtil.parseParamsAndRun(args, new UnionHistogramLogs());
 44 |     }
 45 | 
 46 |     @Option(name = "-inputPath", aliases = "-ip", usage = "set path to use for input files, defaults to current folder", required = false)
 47 |     public void setInputPath(String inputFolderName)
 48 |     {
 49 |         inputPath = new File(inputFolderName);
 50 |         if (!inputPath.exists())
 51 |         {
 52 |             throw new IllegalArgumentException("inputPath:" + inputFolderName + " must exist!");
 53 |         }
 54 |         if (!inputPath.isDirectory())
 55 |         {
 56 |             throw new IllegalArgumentException("inputPath:" + inputFolderName + " must be a directory!");
 57 |         }
 58 |     }
 59 | 
 60 |     @Option(name = "-inputFile", aliases = "-if", usage = "add an input hdr log from input path, also takes regexp", required = false)
 61 |     public void addInputFile(String inputFile)
 62 |     {
 63 |         final Predicate<String> predicate = Pattern.compile(inputFile).asPredicate();
 64 |         inputFiles.addAll(Arrays.asList(inputPath.listFiles(pathname ->
 65 |         {
 66 |             return predicate.test(pathname.getName());
 67 |         })));
 68 |     }
 69 | 
 70 |     @Option(name = "-inputFilePath", aliases = "-ifp", usage = "add an input file by path relative to working dir or absolute", required = false)
 71 |     public void addInputFilePath(String inputFileName)
 72 |     {
 73 |         File in = new File(inputFileName);
 74 |         if (!in.exists())
 75 |         {
 76 |             throw new IllegalArgumentException("file:" + inputFileName + " must exist!");
 77 |         }
 78 |         inputFiles.add(in);
 79 |     }
 80 | 
 81 |     @Option(name = "-taggedInputFile", aliases = "-tif", usage = "a <tag>=<filename> add an input file, tag all histograms from this file with tag. If histograms have a tag it will be conactanated to file tag.", required = false)
 82 |     public void addTaggedInputFile(String inputFileNameAndTag)
 83 |     {
 84 |         String[] args = inputFileNameAndTag.split("=");
 85 |         if (args.length != 2)
 86 |         {
 87 |             throw new IllegalArgumentException("This value:" + inputFileNameAndTag +
 88 |                 " should be a <tag>=<file>, neither tag nor filename allow the '=' char");
 89 |         }
 90 |         String tag = args[0];
 91 |         String inputFileName = args[1];
 92 |         File in = new File(inputFileName);
 93 |         if (!in.exists())
 94 |         {
 95 |             throw new IllegalArgumentException("file:" + inputFileName + " must exist!");
 96 |         }
 97 |         inputFiles.add(in);
 98 |         inputFilesTags.put(in, tag);
 99 |     }
100 | 
101 |     @Option(name = "-outputFile", aliases = "-of", usage = "set an output file destination, default goes to sysout", required = false)
102 |     public void setOutputFile(String outputFileName)
103 |     {
104 |         outputFile = new File(outputFileName);
105 |     }
106 | 
107 |     @Override
108 |     public void run()
109 |     {
110 |         if (verbose)
111 |         {
112 |             if (end != Double.MAX_VALUE)
113 |             {
114 |                 System.out.printf("start:%.2f end:%.2f path:%s\n", start, end, inputPath.getAbsolutePath());
115 |             }
116 |             else
117 |             {
118 |                 System.out.printf("start:%.2f end: MAX path:%s\n", start, inputPath.getAbsolutePath());
119 |             }
120 | 
121 |             if (!inputFiles.isEmpty())
122 |             {
123 |                 System.out.println("Reading files:");
124 |             }
125 |             else
126 |             {
127 |                 System.out.println("No input files!");
128 |             }
129 | 
130 |             for (File inputFile : inputFiles)
131 |             {
132 |                 System.out.println(inputFile.getAbsolutePath());
133 |             }
134 |         }
135 |         if (inputFiles.isEmpty())
136 |         {
137 |             return;
138 |         }
139 | 
140 | 
141 |         try
142 |         {
143 |             final PrintStream report;
144 |             if (outputFile != null)
145 |             {
146 |                 report = new PrintStream(new FileOutputStream(outputFile));
147 |             }
148 |             else
149 |             {
150 |                 report = System.out;
151 |             }
152 |             List<HistogramIterator> ins = new ArrayList<>();
153 |             for (File inputFile : inputFiles)
154 |             {
155 |                 ins.add(new HistogramIterator(
156 |                     new OrderedHistogramLogReader(inputFile, start, end),
157 |                     inputFilesTags.get(inputFile),
158 |                     relative));
159 |             }
160 |             UnionHistograms unionHistograms = new UnionHistograms(verbose, System.out, ins, new HistogramSink()
161 |             {
162 |                 HistogramLogWriter writer;
163 | 
164 |                 @Override
165 |                 public void startTime(double st)
166 |                 {
167 |                     String comment = "Union of:" +
168 |                         inputFiles +
169 |                         " start:" +
170 |                         start +
171 |                         " end:" +
172 |                         end +
173 |                         " relative:" +
174 |                         relative;
175 |                     writer = createLogWriter(report, comment, relative ? 0.0 : st);
176 |                 }
177 | 
178 |                 @Override
179 |                 public void accept(Histogram h)
180 |                 {
181 |                     writer.outputIntervalHistogram(h);
182 |                 }
183 |             }, (long)(targetUnionSec * 1000));
184 |             unionHistograms.run();
185 |         }
186 |         catch (Exception e)
187 |         {
188 |             throw new RuntimeException(e);
189 |         }
190 |     }
191 | 
192 | }
193 | 


--------------------------------------------------------------------------------
/src/main/java/psy/lob/saw/HdrHistogramUtil.java:
--------------------------------------------------------------------------------
 1 | package psy.lob.saw;
 2 | 
 3 | import org.HdrHistogram.Histogram;
 4 | import org.HdrHistogram.HistogramLogWriter;
 5 | 
 6 | import java.io.File;
 7 | import java.io.FileNotFoundException;
 8 | import java.io.OutputStream;
 9 | import java.io.PrintStream;
10 | 
11 | public class HdrHistogramUtil
12 | {
13 |     public static void logHistogramForVerbose(PrintStream verboseOut, Histogram interval, int i)
14 |     {
15 |         logHistogramForVerbose(verboseOut, interval, i, 1);
16 |     }
17 | 
18 |     public static void logHistogramForVerbose(
19 |         PrintStream verboseOut,
20 |         Histogram interval,
21 |         int i,
22 |         double outputValueUnitRatio)
23 |     {
24 |         String tag = (interval.getTag() == null) ? "default" : interval.getTag();
25 |         double intervalLengthSec = (interval.getEndTimeStamp() - interval.getStartTimeStamp())/1000.0;
26 |         verboseOut.printf("%s %5d: (%8.3f to %8.3f) [count=%d,min=%d,max=%d,avg=%.2f,50=%d,99=%d,999=%d,ops/s=%.1f]%n",
27 |             tag, i,
28 |             interval.getStartTimeStamp() / 1000.0,
29 |             interval.getEndTimeStamp() / 1000.0,
30 |             interval.getTotalCount(),
31 |             (long) (interval.getMinValue() / outputValueUnitRatio),
32 |             (long) (interval.getMaxValue() / outputValueUnitRatio),
33 |             interval.getMean() / outputValueUnitRatio,
34 |             (long) (interval.getValueAtPercentile(50) / outputValueUnitRatio),
35 |             (long) (interval.getValueAtPercentile(99) / outputValueUnitRatio),
36 |             (long) (interval.getValueAtPercentile(99.9) / outputValueUnitRatio),
37 |             interval.getTotalCount() / intervalLengthSec);
38 |     }
39 | 
40 |     public static HistogramLogWriter createLogWriter(File output, String comment, double startTimeSec)
41 |     {
42 |         try
43 |         {
44 |             return createLogWriter(new HistogramLogWriter(output), comment, startTimeSec);
45 |         }
46 |         catch (FileNotFoundException e)
47 |         {
48 |             throw new RuntimeException(e);
49 |         }
50 |     }
51 | 
52 |     public static HistogramLogWriter createLogWriter(OutputStream output, String comment, double startTimeSec)
53 |     {
54 |         return createLogWriter(new HistogramLogWriter(output), comment, startTimeSec);
55 |     }
56 | 
57 |     public static HistogramLogWriter createLogWriter(PrintStream output, String comment, double startTimeSec)
58 |     {
59 |         return createLogWriter(new HistogramLogWriter(output), comment, startTimeSec);
60 |     }
61 | 
62 |     private static HistogramLogWriter createLogWriter(
63 |         HistogramLogWriter writer,
64 |         String comment,
65 |         double startTimeSec)
66 |     {
67 |         writer.outputLogFormatVersion();
68 |         if (comment != null)
69 |         {
70 |             writer.outputComment(comment);
71 |         }
72 |         if (startTimeSec != 0.0)
73 |         {
74 |             long startTimeStamp = (long) (startTimeSec * 1000);
75 |             writer.setBaseTime(startTimeStamp);
76 |             writer.outputBaseTime(startTimeStamp);
77 |             writer.outputStartTime(startTimeStamp);
78 |         }
79 |         writer.outputLegend();
80 |         return writer;
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/src/main/java/psy/lob/saw/HistogramIterator.java:
--------------------------------------------------------------------------------
  1 | package psy.lob.saw;
  2 | 
  3 | import org.HdrHistogram.Histogram;
  4 | 
  5 | public class HistogramIterator implements Comparable<HistogramIterator>
  6 | {
  7 |     private final OrderedHistogramLogReader reader;
  8 |     private Histogram next;
  9 |     private final String tag;
 10 |     private final boolean relative;
 11 | 
 12 |     public HistogramIterator(OrderedHistogramLogReader reader, boolean relative)
 13 |     {
 14 |         this(reader, null, relative);
 15 |     }
 16 | 
 17 |     public HistogramIterator(OrderedHistogramLogReader reader, String tag, boolean relative)
 18 |     {
 19 |         this.reader = reader;
 20 |         this.tag = tag;
 21 |         this.relative = relative;
 22 |         // init the reader
 23 |         read();
 24 |     }
 25 | 
 26 |     private void read()
 27 |     {
 28 |         do
 29 |         {
 30 |             next = (Histogram) reader.nextIntervalHistogram();
 31 |         }
 32 |         while (next == null && reader.hasNext());
 33 |         if (next == null)
 34 |         {
 35 |             return;
 36 |         }
 37 | 
 38 |         // replace start time with a relative one
 39 |         if (relative)
 40 |         {
 41 |             long length = next.getEndTimeStamp() - next.getStartTimeStamp();
 42 |             long nextStartTime = (long) (next.getStartTimeStamp() - reader.getStartTimeSec() * 1000);
 43 |             next.setStartTimeStamp(nextStartTime);
 44 |             next.setEndTimeStamp(next.getStartTimeStamp() + length);
 45 |         }
 46 |         if (tag != null)
 47 |         {
 48 |             String nextTag = next.getTag();
 49 |             if (nextTag == null)
 50 |             {
 51 |                 next.setTag(tag);
 52 |             }
 53 |             else
 54 |             {
 55 |                 next.setTag(tag + "::" + nextTag);
 56 |             }
 57 |         }
 58 |     }
 59 | 
 60 |     public Histogram next()
 61 |     {
 62 |         Histogram c = next;
 63 |         read();
 64 |         return c;
 65 |     }
 66 | 
 67 |     public boolean hasNext()
 68 |     {
 69 |         return next != null;
 70 |     }
 71 | 
 72 |     @Override
 73 |     public int compareTo(HistogramIterator o)
 74 |     {
 75 |         if (!hasNext() && !o.hasNext())
 76 |         {
 77 |             return 0;
 78 |         }
 79 |         if (!hasNext())
 80 |         {
 81 |             return -1;
 82 |         }
 83 |         if (!o.hasNext())
 84 |         {
 85 |             return 1;
 86 |         }
 87 |         return (int) (next.getStartTimeStamp() - o.next.getStartTimeStamp());
 88 |     }
 89 | 
 90 |     public double getStartTimeSec()
 91 |     {
 92 |         return reader.getStartTimeSec();
 93 |     }
 94 |     
 95 |     public String source()
 96 |     {
 97 |         return reader.source();
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/main/java/psy/lob/saw/HistogramLogScanner.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Written by Gil Tene of Azul Systems, and released to the public domain,
  3 |  * as explained at http://creativecommons.org/publicdomain/zero/1.0/
  4 |  *
  5 |  * @author Gil Tene
  6 |  */
  7 | package psy.lob.saw;
  8 | 
  9 | import org.HdrHistogram.DoubleHistogram;
 10 | import org.HdrHistogram.EncodableHistogram;
 11 | import org.HdrHistogram.Histogram;
 12 | 
 13 | import java.io.Closeable;
 14 | import java.io.File;
 15 | import java.io.FileNotFoundException;
 16 | import java.io.InputStream;
 17 | import java.nio.ByteBuffer;
 18 | import java.util.Base64;
 19 | import java.util.Locale;
 20 | import java.util.Scanner;
 21 | import java.util.zip.DataFormatException;
 22 | 
 23 | /**
 24 |  * A histogram log reader.
 25 |  * <p>
 26 |  * Histogram logs are used to capture full fidelity, per-time-interval
 27 |  * histograms of a recorded value.
 28 |  * <p>
 29 |  * For example, a histogram log can be used to capture high fidelity
 30 |  * reaction-time logs for some measured system or subsystem component.
 31 |  * Such a log would capture a full reaction time histogram for each
 32 |  * logged interval, and could be used to later reconstruct a full
 33 |  * HdrHistogram of the measured reaction time behavior for any arbitrary
 34 |  * time range within the log, by adding [only] the relevant interval
 35 |  * histograms.
 36 |  * <h3>Histogram log format:</h3>
 37 |  * A histogram log file consists of text lines. Lines beginning with
 38 |  * the "#" character are optional and treated as comments. Lines
 39 |  * containing the legend (starting with "Timestamp") are also optional
 40 |  * and ignored in parsing the histogram log. All other lines must
 41 |  * be valid interval description lines. Text fields are delimited by
 42 |  * commas, spaces.
 43 |  * <p>
 44 |  * A valid interval description line contains an optional Tag=tagString
 45 |  * text field, followed by an interval description.
 46 |  * <p>
 47 |  * A valid interval description must contain exactly four text fields:
 48 |  * <ul>
 49 |  * <li>StartTimestamp: The first field must contain a number parse-able as a Double value,
 50 |  * representing the start timestamp of the interval in seconds.</li>
 51 |  * <li>intervalLength: The second field must contain a number parse-able as a Double value,
 52 |  * representing the length of the interval in seconds.</li>
 53 |  * <li>Interval_Max: The third field must contain a number parse-able as a Double value,
 54 |  * which generally represents the maximum value of the interval histogram.</li>
 55 |  * <li>Interval_Compressed_Histogram: The fourth field must contain a text field
 56 |  * parse-able as a Base64 text representation of a compressed HdrHistogram.</li>
 57 |  * </ul>
 58 |  * The log file may contain an optional indication of a starting time. Starting time
 59 |  * is indicated using a special comments starting with "#[StartTime: " and followed
 60 |  * by a number parse-able as a double, representing the start time (in seconds)
 61 |  * that may be added to timestamps in the file to determine an absolute
 62 |  * timestamp (e.g. since the epoch) for each interval.
 63 |  */
 64 | public class HistogramLogScanner implements Closeable
 65 | {
 66 |     // can't use lambdas, and anyway we need to let the handler take the exception
 67 |     public interface EncodableHistogramSupplier
 68 |     {
 69 |         EncodableHistogram read() throws DataFormatException;
 70 |     }
 71 | 
 72 |     /**
 73 |      * Handles log events, return true to stop processing.
 74 |      */
 75 |     public interface EventHandler
 76 |     {
 77 |         /**
 78 |          * @param comment a non-standard comment observed in the log, e.g. "#Our's is a nice 'ouse, our's is, We've got no rats or mouses"
 79 |          * @return false to keep processing, true to stop
 80 |          */
 81 |         boolean onComment(String comment);
 82 | 
 83 |         /**
 84 |          * @param secondsSinceEpoch observed standard comment tag: "# BaseTime: "
 85 |          * @return false to keep processing, true to stop
 86 |          */
 87 |         boolean onBaseTime(double secondsSinceEpoch);
 88 | 
 89 |         /**
 90 |          * @param secondsSinceEpoch observed standard comment tag: "# StartTime: "
 91 |          * @return false to keep processing, true to stop
 92 |          */
 93 |         boolean onStartTime(double secondsSinceEpoch);
 94 | 
 95 |         /**
 96 |          * A lazy reader is provided to allow fast skipping of bulk of work where tag or timestamp are to be used as
 97 |          * a basis for filtering the {@link EncodableHistogram} anyway. The reader is to be called only once.
 98 |          *
 99 |          * @param tag        histogram tag or null if none exist
100 |          * @param timestamp  logged timestamp
101 |          * @param length     logged interval length
102 |          * @param lazyReader to be called if the histogram needs to be deserialized, given the tag/timestamp etc.
103 |          * @return false to keep processing, true to stop
104 |          */
105 |         boolean onHistogram(String tag, double timestamp, double length, EncodableHistogramSupplier lazyReader);
106 | 
107 |         /**
108 |          * @param t an exception observed while processing the log
109 |          * @return false to keep processing, true to stop
110 |          */
111 |         boolean onException(Throwable t);
112 |     }
113 | 
114 |     private static class LazyHistogramReader implements EncodableHistogramSupplier
115 |     {
116 | 
117 |         private final Scanner scanner;
118 |         private boolean gotIt = true;
119 | 
120 |         private LazyHistogramReader(Scanner scanner)
121 |         {
122 |             this.scanner = scanner;
123 |         }
124 | 
125 |         private void allowGet()
126 |         {
127 |             gotIt = false;
128 |         }
129 | 
130 |         @Override
131 |         public EncodableHistogram read() throws DataFormatException
132 |         {
133 |             // prevent double calls to this method
134 |             if (gotIt)
135 |             {
136 |                 throw new IllegalStateException();
137 |             }
138 |             gotIt = true;
139 | 
140 |             final String compressedPayloadString = scanner.next();
141 |             final ByteBuffer buffer = ByteBuffer.wrap(Base64.getDecoder().decode(compressedPayloadString));
142 | 
143 |             EncodableHistogram histogram = decodeFromCompressedByteBuffer(buffer, 0);
144 | 
145 |             return histogram;
146 |         }
147 |     }
148 | 
149 |     static EncodableHistogram decodeFromCompressedByteBuffer(ByteBuffer buffer, long minBarForHighestTrackableValue)
150 |         throws DataFormatException
151 |     {
152 |         int cookie = buffer.getInt(buffer.position());
153 |         return (EncodableHistogram) (isDoubleHistogramCookie(cookie) ?
154 |             DoubleHistogram.decodeFromCompressedByteBuffer(buffer, minBarForHighestTrackableValue) :
155 |             Histogram
156 |                 .decodeFromCompressedByteBuffer(buffer, minBarForHighestTrackableValue));
157 |     }
158 | 
159 |     static boolean isDoubleHistogramCookie(int cookie)
160 |     {
161 |         return isCompressedDoubleHistogramCookie(cookie) || isNonCompressedDoubleHistogramCookie(cookie);
162 |     }
163 | 
164 |     private static boolean isCompressedDoubleHistogramCookie(int cookie)
165 |     {
166 |         return cookie == 208802383;
167 |     }
168 | 
169 |     private static boolean isNonCompressedDoubleHistogramCookie(int cookie)
170 |     {
171 |         return cookie == 208802382;
172 |     }
173 | 
174 |     private final LazyHistogramReader lazyReader;
175 |     private final String source;
176 |     protected final Scanner scanner;
177 | 
178 |     /**
179 |      * @param inputFileName The name of the file to read from
180 |      * @throws FileNotFoundException when unable to find inputFileName
181 |      */
182 |     public HistogramLogScanner(final String inputFileName) throws FileNotFoundException
183 |     {
184 |         this(new Scanner(new File(inputFileName)), inputFileName);
185 |     }
186 | 
187 |     /**
188 |      * @param inputStream The InputStream to read from
189 |      */
190 |     public HistogramLogScanner(final InputStream inputStream)
191 |     {
192 |         this(new Scanner(inputStream), inputStream.toString());
193 |     }
194 | 
195 |     /**
196 |      * @param inputFile The File to read from
197 |      * @throws FileNotFoundException when unable to find inputFile
198 |      */
199 |     public HistogramLogScanner(final File inputFile) throws FileNotFoundException
200 |     {
201 |         this(new Scanner(inputFile), inputFile.getName());
202 |     }
203 | 
204 |     private HistogramLogScanner(Scanner scanner, String source)
205 |     {
206 |         this.scanner = scanner;
207 |         this.lazyReader = new LazyHistogramReader(scanner);
208 |         this.source = source;
209 |         initScanner();
210 |     }
211 | 
212 |     private void initScanner()
213 |     {
214 |         scanner.useLocale(Locale.US);
215 |         scanner.useDelimiter("[ ,\\r\\n]");
216 |     }
217 | 
218 |     /**
219 |      * Close underlying scanner. Note that if initialized with InputStream then the stream is closed as a result.
220 |      */
221 |     @Override
222 |     public void close()
223 |     {
224 |         scanner.close();
225 |     }
226 | 
227 |     /**
228 |      * Reads the log, delivering events to the provided handler until the handler signals to stop or the end of the log.
229 |      * 
230 |      * @param handler to handle s**t
231 |      */
232 |     public void process(EventHandler handler)
233 |     {
234 |         while (scanner.hasNextLine())
235 |         {
236 |             try
237 |             {
238 |                 if (scanner.hasNext("\\#.*"))
239 |                 {
240 |                     // comment line.
241 |                     // Look for explicit start time or base time notes in comments:
242 |                     if (scanner.hasNext("#\\[StartTime:"))
243 |                     {
244 |                         scanner.next("#\\[StartTime:");
245 |                         if (scanner.hasNextDouble())
246 |                         {
247 |                             double startTimeSec = scanner.nextDouble(); // start time represented as seconds since epoch
248 |                             if (handler.onStartTime(startTimeSec))
249 |                             {
250 |                                 return;
251 |                             }
252 |                         }
253 |                     }
254 |                     else if (scanner.hasNext("#\\[BaseTime:"))
255 |                     {
256 |                         scanner.next("#\\[BaseTime:");
257 |                         if (scanner.hasNextDouble())
258 |                         {
259 |                             double baseTimeSec = scanner.nextDouble(); // base time represented as seconds since epoch
260 |                             if (handler.onBaseTime(baseTimeSec))
261 |                             {
262 |                                 return;
263 |                             }
264 |                         }
265 |                     }
266 |                     else if (handler.onComment(scanner.next("\\#.*")))
267 |                     {
268 |                         return;
269 |                     }
270 |                     continue;
271 |                 }
272 | 
273 |                 if (scanner.hasNext("\"StartTimestamp\".*"))
274 |                 {
275 |                     // Legend line
276 |                     continue;
277 |                 }
278 | 
279 |                 String tagString = null;
280 |                 if (scanner.hasNext("Tag\\=.*"))
281 |                 {
282 |                     tagString = scanner.next("Tag\\=.*").substring(4);
283 |                 }
284 | 
285 |                 // Decode: startTimestamp, intervalLength, maxTime, histogramPayload
286 |                 final double logTimeStampInSec = scanner.nextDouble(); // Timestamp is expected to be in seconds
287 |                 final double intervalLengthSec = scanner.nextDouble(); // Timestamp length is expect to be in seconds
288 |                 scanner.nextDouble(); // Skip maxTime field, as max time can be deduced from the histogram.
289 | 
290 |                 lazyReader.allowGet();
291 |                 if (handler.onHistogram(tagString, logTimeStampInSec, intervalLengthSec, lazyReader))
292 |                 {
293 |                     return;
294 |                 }
295 | 
296 |             }
297 |             catch (Throwable ex)
298 |             {
299 |                 if (handler.onException(ex))
300 |                 {
301 |                     return;
302 |                 }
303 |             }
304 |             finally
305 |             {
306 |                 if (scanner.hasNextLine())
307 |                 {
308 |                     scanner.nextLine(); // Move to next line.
309 |                 }
310 |             }
311 |         }
312 |         return;
313 |     }
314 | 
315 |     /**
316 |      * Indicates whether or not additional intervals may exist in the log
317 |      *
318 |      * @return true if additional intervals may exist in the log
319 |      */
320 |     public boolean hasNextLine()
321 |     {
322 |         return scanner.hasNextLine();
323 |     }
324 | 
325 |     public String source()
326 |     {
327 |         return source;
328 |     }
329 | }
330 | 


--------------------------------------------------------------------------------
/src/main/java/psy/lob/saw/HistogramSink.java:
--------------------------------------------------------------------------------
 1 | package psy.lob.saw;
 2 | 
 3 | import org.HdrHistogram.Histogram;
 4 | 
 5 | public interface HistogramSink
 6 | {
 7 |     void startTime(double st);
 8 | 
 9 |     void accept(Histogram h);
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/psy/lob/saw/OrderedHistogramLogReader.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Written by Gil Tene of Azul Systems, and released to the public domain,
  3 |  * as explained at http://creativecommons.org/publicdomain/zero/1.0/
  4 |  *
  5 |  * @author Gil Tene
  6 |  */
  7 | 
  8 | package psy.lob.saw;
  9 | 
 10 | import org.HdrHistogram.EncodableHistogram;
 11 | 
 12 | import java.io.Closeable;
 13 | import java.io.File;
 14 | import java.io.FileNotFoundException;
 15 | import java.util.function.Predicate;
 16 | import java.util.zip.DataFormatException;
 17 | 
 18 | /**
 19 |  * Revised {@link org.HdrHistogram.HistogramLogReader} which utilizes the {@link HistogramLogScanner} to support better
 20 |  * iteration functionality.
 21 |  */
 22 | public class OrderedHistogramLogReader implements Closeable
 23 | {
 24 | 
 25 |     private final HistogramLogScanner scanner;
 26 |     private final HistogramLogScanner.EventHandler handler = new HistogramLogScanner.EventHandler()
 27 |     {
 28 |         @Override
 29 |         public boolean onComment(String comment)
 30 |         {
 31 | //            System.out.println(comment);
 32 |             return false;
 33 |         }
 34 | 
 35 |         @Override
 36 |         public boolean onBaseTime(double secondsSinceEpoch)
 37 |         {
 38 |             baseTimeSec = secondsSinceEpoch; // base time represented as seconds since epoch
 39 |             observedBaseTime = true;
 40 |             return false;
 41 |         }
 42 | 
 43 |         @Override
 44 |         public boolean onStartTime(double secondsSinceEpoch)
 45 |         {
 46 |             startTimeSec = secondsSinceEpoch; // start time represented as seconds since epoch
 47 |             observedStartTime = true;
 48 |             return false;
 49 |         }
 50 | 
 51 |         @Override
 52 |         public boolean onHistogram(
 53 |             String tag, double timestamp, double length,
 54 |             HistogramLogScanner.EncodableHistogramSupplier lazyReader)
 55 |         {
 56 |             final double logTimeStampInSec = timestamp; // Timestamp is expected to be in seconds
 57 | 
 58 |             if (!observedStartTime)
 59 |             {
 60 |                 // No explicit start time noted. Use 1st observed time:
 61 |                 startTimeSec = logTimeStampInSec;
 62 |                 observedStartTime = true;
 63 |             }
 64 | 
 65 |             if (!observedBaseTime)
 66 |             {
 67 |                 // No explicit base time noted. Deduce from 1st observed time (compared to start time):
 68 |                 if (logTimeStampInSec < startTimeSec - (365 * 24 * 3600.0))
 69 |                 {
 70 |                     // Criteria Note: if log timestamp is more than a year in the past (compared to
 71 |                     // StartTime), we assume that timestamps in the log are not absolute
 72 |                     baseTimeSec = startTimeSec;
 73 |                 }
 74 |                 else
 75 |                 {
 76 |                     // Timestamps are absolute
 77 |                     baseTimeSec = 0.0;
 78 |                 }
 79 |                 observedBaseTime = true;
 80 |             }
 81 | 
 82 |             final double absoluteStartTimeStampSec = logTimeStampInSec + baseTimeSec;
 83 |             final double offsetStartTimeStampSec = absoluteStartTimeStampSec - startTimeSec;
 84 | 
 85 |             final double intervalLengthSec = length; // Timestamp length is expect to be in seconds
 86 |             final double absoluteEndTimeStampSec = absoluteStartTimeStampSec + intervalLengthSec;
 87 | 
 88 |             final double startTimeStampToCheckRangeOn = absolute ? absoluteStartTimeStampSec : offsetStartTimeStampSec;
 89 | 
 90 |             if (startTimeStampToCheckRangeOn < rangeStartTimeSec)
 91 |             {
 92 |                 return false;
 93 |             }
 94 | 
 95 |             if (startTimeStampToCheckRangeOn > rangeEndTimeSec)
 96 |             {
 97 |                 // trip the inRange so that readers can stop now
 98 |                 inRange = false;
 99 |                 return true;
100 |             }
101 | 
102 |             // skip excluded
103 |             if (shouldExcludeTag.test(tag))
104 |             {
105 |                 return false;
106 |             }
107 | 
108 |             EncodableHistogram histogram;
109 |             try
110 |             {
111 |                 histogram = lazyReader.read();
112 |             }
113 |             catch (DataFormatException e)
114 |             {
115 |                 return true;
116 |             }
117 | 
118 |             histogram.setStartTimeStamp((long) (absoluteStartTimeStampSec * 1000.0));
119 |             histogram.setEndTimeStamp((long) (absoluteEndTimeStampSec * 1000.0));
120 |             histogram.setTag(tag);
121 |             nextHistogram = histogram;
122 |             return true;
123 |         }
124 | 
125 |         @Override
126 |         public boolean onException(Throwable t)
127 |         {
128 |             t.printStackTrace();
129 |             return false;
130 |         }
131 |     };
132 | 
133 |     // scanner handling state
134 |     private double startTimeSec = 0.0;
135 |     private boolean observedStartTime = false;
136 |     private double baseTimeSec = 0.0;
137 |     private boolean observedBaseTime = false;
138 | 
139 |     private final boolean absolute;
140 |     private final double rangeStartTimeSec;
141 |     private final double rangeEndTimeSec;
142 |     private final Predicate<String> shouldExcludeTag;
143 |     private EncodableHistogram nextHistogram;
144 |     private boolean inRange = true;
145 | 
146 |     public OrderedHistogramLogReader(final File inputFile) throws FileNotFoundException
147 |     {
148 |         this(inputFile, 0.0, Long.MAX_VALUE * 1.0, s -> false, false);
149 |     }
150 | 
151 |     public OrderedHistogramLogReader(File inputFile, double start, double end) throws FileNotFoundException
152 |     {
153 |         this(inputFile, start, end, s -> false, false);
154 |     }
155 | 
156 |     public OrderedHistogramLogReader(File inputFile, double start, double end, Predicate<String> shouldExcludeTag)
157 |         throws FileNotFoundException
158 |     {
159 |         this(inputFile, start, end, shouldExcludeTag, false);
160 |     }
161 | 
162 |     /**
163 |      * Constructs a new OrderedHistogramLogReader that produces intervals read from the specified file.
164 |      *
165 |      * @param inputFile         The File to read from
166 |      * @param rangeStartTimeSec
167 |      * @param rangeEndTimeSec
168 |      * @param shouldExcludeTag  predicate returns true is tag should be skipped
169 |      * @param absolute
170 |      * @throws FileNotFoundException when unable to find inputFile
171 |      */
172 |     public OrderedHistogramLogReader(
173 |         final File inputFile,
174 |         double rangeStartTimeSec,
175 |         double rangeEndTimeSec,
176 |         Predicate<String> shouldExcludeTag, boolean absolute) throws FileNotFoundException
177 |     {
178 |         scanner = new HistogramLogScanner(inputFile);
179 |         this.rangeStartTimeSec = rangeStartTimeSec;
180 |         this.rangeEndTimeSec = rangeEndTimeSec;
181 |         this.absolute = absolute;
182 |         this.shouldExcludeTag = shouldExcludeTag;
183 |     }
184 | 
185 |     /**
186 |      * get the latest start time found in the file so far (or 0.0),
187 |      * per the log file format explained above. Assuming the "#[StartTime:" comment
188 |      * line precedes the actual intervals recorded in the file, getStartTimeSec() can
189 |      * be safely used after each interval is read to determine's the offset of that
190 |      * interval's timestamp from the epoch.
191 |      *
192 |      * @return latest Start Time found in the file (or 0.0 if non found)
193 |      */
194 |     public double getStartTimeSec()
195 |     {
196 |         return startTimeSec;
197 |     }
198 | 
199 |     /**
200 |      * Read the next interval histogram from the log. Returns a Histogram object if
201 |      * an interval line was found, or null if not.
202 |      * <p>Upon encountering any unexpected format errors in reading the next interval
203 |      * from the input, this method will return a null. Use {@link #hasNext} to determine
204 |      * whether or not additional intervals may be available for reading in the log input.
205 |      *
206 |      * @return a DecodedInterval, or a null if no appropriately formatted interval was found
207 |      */
208 |     public EncodableHistogram nextIntervalHistogram()
209 |     {
210 | 
211 |         scanner.process(handler);
212 |         EncodableHistogram histogram = this.nextHistogram;
213 |         nextHistogram = null;
214 |         return histogram;
215 |     }
216 | 
217 |     /**
218 |      * Indicates whether or not additional intervals may exist in the log
219 |      *
220 |      * @return true if additional intervals may exist in the log
221 |      */
222 |     public boolean hasNext()
223 |     {
224 |         return scanner.hasNextLine() && inRange;
225 |     }
226 | 
227 |     @Override
228 |     public void close()
229 |     {
230 |         scanner.close();
231 |     }
232 |     
233 |     public String source()
234 |     {
235 |         return scanner.source();
236 |     }
237 | }
238 | 


--------------------------------------------------------------------------------
/src/main/java/psy/lob/saw/UnionHistograms.java:
--------------------------------------------------------------------------------
  1 | package psy.lob.saw;
  2 | 
  3 | import org.HdrHistogram.Histogram;
  4 | 
  5 | import java.io.PrintStream;
  6 | import java.util.Collections;
  7 | import java.util.HashMap;
  8 | import java.util.List;
  9 | import java.util.Map;
 10 | 
 11 | public class UnionHistograms implements Runnable
 12 | {
 13 | 
 14 |     private static class UnionHistogram
 15 |     {
 16 |         final Histogram h;
 17 |         int index;
 18 | 
 19 |         private UnionHistogram(int numberOfSignificantValueDigits)
 20 |         {
 21 |             this.h = new Histogram(numberOfSignificantValueDigits);
 22 |         }
 23 |     }
 24 |     private final boolean verbose;
 25 |     private final PrintStream verboseOut;
 26 |     private final List<HistogramIterator> inputs;
 27 |     private final HistogramSink output;
 28 |     private final long targetUnionMs;
 29 | 
 30 |     public UnionHistograms(
 31 |         boolean verbose,
 32 |         PrintStream verboseOut,
 33 |         List<HistogramIterator> inputs,
 34 |         HistogramSink output)
 35 |     {
 36 |         this(verbose,verboseOut, inputs, output, 0);
 37 |     }
 38 |     
 39 |     public UnionHistograms(
 40 |         boolean verbose,
 41 |         PrintStream verboseOut,
 42 |         List<HistogramIterator> inputs,
 43 |         HistogramSink output,
 44 |         long targetUnionMs)
 45 |     {
 46 |         this.verbose = verbose;
 47 |         this.verboseOut = verboseOut;
 48 |         this.inputs = inputs;
 49 |         this.output = output;
 50 |         this.targetUnionMs = targetUnionMs;
 51 |     }
 52 | 
 53 |     @Override
 54 |     public void run()
 55 |     {
 56 |         List<HistogramIterator> ins = inputs;
 57 |         ins.removeIf(e -> !e.hasNext());
 58 |         Collections.sort(ins);
 59 | 
 60 |         if (ins.isEmpty())
 61 |         {
 62 |             if (verbose)
 63 |             {
 64 |                 verboseOut.println("Input files do not contain range");
 65 |             }
 66 |             return;
 67 |         }
 68 | 
 69 |         output.startTime(ins.get(0).getStartTimeSec());
 70 | 
 71 |         Map<String, UnionHistogram> unionedByTag = new HashMap<>();
 72 |         while (!ins.isEmpty())
 73 |         {
 74 |             HistogramIterator input = ins.get(0);
 75 |             Histogram next = input.next();
 76 | 
 77 |             UnionHistogram union = unionedByTag.computeIfAbsent(next.getTag(), k ->
 78 |             {
 79 |                 UnionHistogram u = new UnionHistogram(next.getNumberOfSignificantValueDigits());
 80 |                 u.h.setEndTimeStamp(0L);
 81 |                 u.h.setStartTimeStamp(Long.MAX_VALUE);
 82 |                 u.h.setTag(k);
 83 |                 return u;
 84 |             });
 85 |             Histogram unionHgrm = union.h;
 86 |             final int unionIndex = union.index;
 87 | 
 88 |             long nextStart = next.getStartTimeStamp();
 89 |             long nextEnd = next.getEndTimeStamp();
 90 | 
 91 |             long unionStart = unionHgrm.getStartTimeStamp();
 92 |             long unionEnd = unionHgrm.getEndTimeStamp();
 93 |             // iterators are sorted, so we know nextStart >= unionStart
 94 |             boolean rollover = false;
 95 | 
 96 |             // new union
 97 |             if (unionStart == Long.MAX_VALUE)
 98 |             {
 99 |                 addNext(input.source(), unionIndex, next, unionHgrm);
100 |                 // expand union length to allow more intervals to fall into the same union
101 |                 if (unionHgrm.getEndTimeStamp() - unionHgrm.getStartTimeStamp() < targetUnionMs)
102 |                 {
103 |                     unionHgrm.setEndTimeStamp(unionHgrm.getStartTimeStamp()  + targetUnionMs);
104 |                 }
105 |             }
106 |             // next interval is inside union interval
107 |             else if (nextStart < unionEnd && nextEnd <= unionEnd)
108 |             {
109 |                 addNext(input.source(), unionIndex, next, unionHgrm);
110 |             }
111 |             // next interval starts before the end of this interval, but is not contained by it
112 |             else if (nextStart < unionEnd)
113 |             {
114 |                 double nextIntervalLength = nextEnd - nextStart;
115 |                 double overlap = (unionEnd - nextStart) / nextIntervalLength;
116 |                 // 80% or more of next is in fact in the current union 
117 |                 if (overlap > 0.8)
118 |                 {
119 |                     addNext(input.source(), unionIndex, next, unionHgrm);
120 |                     // prevent an ever expanding union
121 |                     unionHgrm.setStartTimeStamp(unionStart);
122 |                     unionHgrm.setEndTimeStamp(unionEnd);
123 |                 }
124 |                 else
125 |                 {
126 |                     rollover = true;
127 |                 }
128 |             }
129 |             else
130 |             {
131 |                 rollover = true;
132 |             }
133 |             if (rollover)
134 |             {
135 |                 outputUnion(unionIndex, unionHgrm);
136 |                 final int unionIndexNext = ++union.index;
137 |                 unionHgrm.reset();
138 |                 unionHgrm.setEndTimeStamp(0L);
139 |                 unionHgrm.setStartTimeStamp(Long.MAX_VALUE);
140 |                 unionHgrm.setTag(next.getTag());
141 |                 
142 |                 addNext(input.source(), unionIndexNext, next, unionHgrm);
143 |                 // expand union length to allow more intervals to fall into the same union
144 |                 if (unionHgrm.getEndTimeStamp() - unionHgrm.getStartTimeStamp() < targetUnionMs)
145 |                 {
146 |                     unionHgrm.setEndTimeStamp(unionHgrm.getStartTimeStamp()  + targetUnionMs);
147 |                 }
148 |             }
149 |             // trim and sort
150 |             ins.removeIf(e -> !e.hasNext());
151 |             Collections.sort(ins);
152 |         }
153 |         // write last hgrms
154 |         for (UnionHistogram u : unionedByTag.values())
155 |         {
156 |             outputUnion(u.index, u.h);
157 |         }
158 |     }
159 | 
160 |     private void outputUnion(int i, Histogram union)
161 |     {
162 |         if (verbose)
163 |         {
164 |             verboseOut.print("union, ");
165 |             HdrHistogramUtil.logHistogramForVerbose(verboseOut, union, i);
166 |         }
167 |         output.accept(union);
168 |     }
169 | 
170 |     private void addNext(String source, int i, Histogram next, Histogram union)
171 |     {
172 |         union.add(next);
173 |         if (verbose)
174 |         {
175 |             verboseOut.print(source);
176 |             verboseOut.print(", ");
177 |             HdrHistogramUtil.logHistogramForVerbose(verboseOut, next, i);
178 |         }
179 |     }
180 | 
181 | }
182 | 


--------------------------------------------------------------------------------