├── .classpath
├── .gitignore
├── .project
├── .settings
└── org.eclipse.core.resources.prefs
├── README.md
├── commons-lang3-3.4
├── CONTRIBUTING.md
├── LICENSE.txt
├── NOTICE.txt
├── README.md
├── RELEASE-NOTES.txt
├── commons-lang3-3.4-javadoc.jar
└── commons-lang3-3.4.jar
├── dict.txt
└── src
└── iplom
├── IPLoM.java
├── Pair.java
├── ReadSelectedLine.java
└── Test.java
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | iplom-java
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/iplom/IPLoM.java=UTF-8
3 | encoding/README.md=UTF-8
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # iplom-java (continuously working on it)
2 |
3 | # IPLoM (Iterative Partitioning Log Mining)
4 |
5 | Based on the log mining algorithms published on the following papers:
6 |
7 | [1] [Adetokunbo AO Makanju, A Nur Zincir-Heywood, and Evangelos E Milios. Clustering event logs using iterative partitioning. In Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining, pages 1255–1264. ACM, 2009.](https://web.cs.dal.ca/~makanju/publications/paper/kdd09.pdf)
8 |
9 | [2] [Adetokunbo Makanju, A Nur Zincir-Heywood, and Evangelos E Milios. A lightweight algorithm for message type extraction in system application logs. Knowledge and Data Engineering, IEEE Transactions on, 24(11):1921–1936, 2012.](http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5936060&tag=1)
10 |
11 |
--------------------------------------------------------------------------------
/commons-lang3-3.4/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 |
17 |
41 | Contributing to Apache Commons Lang
42 | ======================
43 |
44 | You have found a bug or you have an idea for a cool new feature? Contributing code is a great way to give something back to
45 | the open source community. Before you dig right into the code there are a few guidelines that we need contributors to
46 | follow so that we can have a chance of keeping on top of things.
47 |
48 | Getting Started
49 | ---------------
50 |
51 | + Make sure you have a [JIRA account](https://issues.apache.org/jira/).
52 | + Make sure you have a [GitHub account](https://github.com/signup/free).
53 | + If you're planning to implement a new feature it makes sense to discuss you're changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons Lang's scope.
54 | + Submit a ticket for your issue, assuming one does not already exist.
55 | + Clearly describe the issue including steps to reproduce when it is a bug.
56 | + Make sure you fill in the earliest version that you know has the issue.
57 | + Fork the repository on GitHub.
58 |
59 | Making Changes
60 | --------------
61 |
62 | + Create a topic branch from where you want to base your work (this is usually the master/trunk branch).
63 | + Make commits of logical units.
64 | + Respect the original code style:
65 | + Only use spaces for indentation.
66 | + Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change.
67 | + Check for unnecessary whitespace with git diff --check before committing.
68 | + Make sure your commit messages are in the proper format. Your commit message should contain the key of the JIRA issue.
69 | + Make sure you have added the necessary tests for your changes.
70 | + Run all the tests with `mvn clean verify` to assure nothing else was accidentally broken.
71 |
72 | Making Trivial Changes
73 | ----------------------
74 |
75 | For changes of a trivial nature to comments and documentation, it is not always necessary to create a new ticket in JIRA.
76 | In this case, it is appropriate to start the first line of a commit with '(doc)' instead of a ticket number.
77 |
78 | Submitting Changes
79 | ------------------
80 |
81 | + Sign the [Contributor License Agreement][cla] if you haven't already.
82 | + Push your changes to a topic branch in your fork of the repository.
83 | + Submit a pull request to the repository in the apache organization.
84 | + Update your JIRA ticket and include a link to the pull request in the ticket.
85 |
86 | Additional Resources
87 | --------------------
88 |
89 | + [Contributing patches](https://commons.apache.org/patches.html)
90 | + [Apache Commons Lang JIRA project page](https://issues.apache.org/jira/browse/LANG)
91 | + [Contributor License Agreement][cla]
92 | + [General GitHub documentation](https://help.github.com/)
93 | + [GitHub pull request documentation](https://help.github.com/send-pull-requests/)
94 | + [Apache Commons Twitter Account](https://twitter.com/ApacheCommons)
95 | + #apachecommons IRC channel on freenode.org
96 |
97 | [cla]:https://www.apache.org/licenses/#clas
98 |
--------------------------------------------------------------------------------
/commons-lang3-3.4/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/commons-lang3-3.4/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Apache Commons Lang
2 | Copyright 2001-2015 The Apache Software Foundation
3 |
4 | This product includes software developed at
5 | The Apache Software Foundation (http://www.apache.org/).
6 |
7 | This product includes software from the Spring Framework,
8 | under the Apache License 2.0 (see: StringUtils.containsWhitespace())
9 |
--------------------------------------------------------------------------------
/commons-lang3-3.4/README.md:
--------------------------------------------------------------------------------
1 |
17 |
43 | Apache Commons Lang
44 | ===================
45 |
46 | Apache Commons Lang, a package of Java utility classes for the
47 | classes that are in java.lang's hierarchy, or are considered to be so
48 | standard as to justify existence in java.lang.
49 |
50 | Documentation
51 | -------------
52 |
53 | More information can be found on the [homepage](https://commons.apache.org/proper/commons-lang3).
54 | The [JavaDoc](https://commons.apache.org/proper/commons-lang3/javadocs/api-release) can be browsed.
55 | Questions related to the usage of Apache Commons Lang should be posted to the [user mailing list][ml].
56 |
57 | Where can I get the latest release?
58 | -----------------------------------
59 | You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-lang3/download_lang3.cgi).
60 |
61 | Alternatively you can pull it from the central Maven repositories:
62 |
63 | ```xml
64 |
65 | org.apache.commons
66 | commons-lang3
67 | 3.3.2
68 |
69 | ```
70 |
71 | Contributing
72 | ------------
73 |
74 | We accept PRs via github. The [developer mailing list][ml] is the main channel of communication for contributors.
75 | There are some guidelines which will make applying PRs easier for us:
76 | + No tabs! Please use spaces for indentation.
77 | + Respect the code style.
78 | + Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change.
79 | + Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn clean test```.
80 |
81 | If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas).
82 | You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md).
83 |
84 | License
85 | -------
86 | Code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0.txt).
87 |
88 | Donations
89 | ---------
90 | You like Apache Commons Lang? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support the development.
91 |
92 | Additional Resources
93 | --------------------
94 |
95 | + [Apache Commons Homepage](https://commons.apache.org/)
96 | + [Apache Bugtracker (JIRA)](https://issues.apache.org/jira/)
97 | + [Apache Commons Twitter Account](https://twitter.com/ApacheCommons)
98 | + #apachecommons IRC channel on freenode.org
99 |
100 | [ml]:https://commons.apache.org/mail-lists.html
101 |
--------------------------------------------------------------------------------
/commons-lang3-3.4/RELEASE-NOTES.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluency03/iplom-java/df93cc3eb9ed594236e50b7977204d341fa393d4/commons-lang3-3.4/RELEASE-NOTES.txt
--------------------------------------------------------------------------------
/commons-lang3-3.4/commons-lang3-3.4-javadoc.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluency03/iplom-java/df93cc3eb9ed594236e50b7977204d341fa393d4/commons-lang3-3.4/commons-lang3-3.4-javadoc.jar
--------------------------------------------------------------------------------
/commons-lang3-3.4/commons-lang3-3.4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluency03/iplom-java/df93cc3eb9ed594236e50b7977204d341fa393d4/commons-lang3-3.4/commons-lang3-3.4.jar
--------------------------------------------------------------------------------
/dict.txt:
--------------------------------------------------------------------------------
1 | adetokunbo
2 | makanju
3 | nur
4 | zincir
5 | heywood
6 | evangelos
7 | milios
8 | edghklj
9 | chang
10 | liu
11 | str
12 | int
13 | outlier
14 | kimura
15 | tatsuaki
16 | al
17 | et
18 | proactive
19 |
--------------------------------------------------------------------------------
/src/iplom/IPLoM.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Class: IPLoM (Iterative Partitioning Log Mining)
3 | *
4 | * Based on the log mining algorithms published on the following papers:
5 | *
6 | * [1] Adetokunbo AO Makanju, A Nur Zincir-Heywood, and Evangelos E Milios. Clustering event logs using iterative partitioning.
7 | * In Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining, pages 1255–1264. ACM, 2009.
8 | *
9 | * [2] Adetokunbo Makanju, A Nur Zincir-Heywood, and Evangelos E Milios. A lightweight algorithm for message type extraction in
10 | * system application logs. Knowledge and Data Engineering, IEEE Transactions on, 24(11):1921–1936, 2012.
11 | *
12 | * @author ERICSSON/edghklj (Chang Liu)
13 | *
14 | * Initially Created: 2016-02-22
15 | *
16 | */
17 |
18 | package iplom;
19 |
20 | import static java.lang.System.out;
21 | import java.io.*;
22 | import java.util.*;
23 |
24 |
25 |
26 | public class IPLoM {
27 |
28 | /* ------------------------------------------------------------------------------------ */
29 | /* Elements */
30 | /* ------------------------------------------------------------------------------------ */
31 |
32 | /**
33 | * Define the delimiter for separating a log message into tokens
34 | * Default: " []=:()/|\'\""
35 | */
36 | private String delimiter = " []=:()/|\'\"";
37 |
38 | /**
39 | * Define the partition support threshold
40 | * Default: 0.00
41 | */
42 | private double partitionSupportThreshold = 0.00;
43 |
44 | /**
45 | * Define the cluster goodness threshold
46 | * Default: 0.34
47 | */
48 | private double clusterGoodnessThreshold = 0.34;
49 |
50 | /**
51 | * Define the upper bound (>0.5) and lower bound (<0.5)
52 | * Default: upperBound = 0.9 | lowerBound = 0.1
53 | */
54 | private double upperBound = 0.9;
55 | private double lowerBound = 0.1;
56 |
57 | /**
58 | * Define the source file name (path)
59 | */
60 | private File sourceFile = null;
61 |
62 |
63 | /* ------------------------------------------------------------------------------------ */
64 | /* Constructors */
65 | /* ------------------------------------------------------------------------------------ */
66 |
67 | public IPLoM () { }
68 |
69 | public IPLoM (String fileName) {
70 | this.sourceFile = new File(fileName);
71 | }
72 |
73 |
74 | /* ------------------------------------------------------------------------------------ */
75 | /* Methods */
76 | /* ------------------------------------------------------------------------------------ */
77 |
78 | /**
79 | * Set the source log file name (path)
80 | */
81 | public void setFile(String fileName) {
82 | this.sourceFile = new File(fileName);
83 | }
84 |
85 | /**
86 | * Print the analyzed log file name (path)
87 | */
88 | public File returnFile () {
89 | return this.sourceFile;
90 | }
91 |
92 | /**
93 | * Set the delimiter
94 | */
95 | public void setDelimiter(String delimiter) {
96 | this.delimiter = delimiter;
97 | }
98 |
99 | /**
100 | * Set the partition support threshold
101 | */
102 | public void setPartitionSupportThreshold(double support) {
103 | this.partitionSupportThreshold = support;
104 | }
105 |
106 | /**
107 | * Set the cluster goodness threshold
108 | */
109 | public void setClusterGoodnessThreshold(double goodness) {
110 | this.clusterGoodnessThreshold = goodness;
111 | }
112 |
113 | /**
114 | * Set upper bound
115 | */
116 | public void setUpperBound(double upperBound) {
117 | this.upperBound = upperBound;
118 | }
119 |
120 | /**
121 | * Set lower bound
122 | */
123 | public void setLowerBound(double lowerBound) {
124 | this.lowerBound = lowerBound;
125 | }
126 |
127 | /**
128 | * Set both the lower and upper bounds
129 | */
130 | public void setBounds(double lowerBound, double upperBound) {
131 | this.lowerBound = lowerBound;
132 | this.upperBound = upperBound;
133 | }
134 |
135 | /**
136 | * Check the token type
137 | */
138 | public Integer checkTokenType(String token) {
139 | Integer tokenType = 0;
140 |
141 | // TODO:
142 | /* ------------------- Five types of tokens ------------------- */
143 | String regexOnlySymbols = "";
144 | String regexOnlyLetters = "^[A-Za-z]+$";
145 | String regexSymbolsLetters = "";
146 | String regexNumbersLetters = "";
147 | String regexNumbersSymbols = "";
148 | /* ------------------- Five types of tokens ------------------- */
149 |
150 |
151 | // TODO:
152 |
153 |
154 |
155 |
156 |
157 |
158 | return tokenType;
159 | }
160 |
161 |
162 | /* ----------------------------------------------------------------------------------- */
163 |
164 | /**
165 | * Read the log file by lines
166 | * @param
167 | * String fileName: input log file name(path)
168 | */
169 | public void readByLines() {
170 |
171 | BufferedReader reader = null;
172 |
173 | try {
174 | out.println("\nRead the file by lines.");
175 | reader = new BufferedReader(new FileReader(this.sourceFile));
176 | String tempString = null;
177 | int currentLine = 1;
178 |
179 | while ((tempString = reader.readLine()) != null) {
180 | singleLinePrint(tempString, currentLine);
181 | currentLine ++;
182 | }
183 |
184 | reader.close();
185 | } catch (IOException e) {
186 | e.printStackTrace();
187 | } finally {
188 | if (reader != null) {
189 | try {
190 | reader.close();
191 | } catch (Exception e1) {
192 | }
193 | }
194 | }
195 |
196 | }
197 |
198 |
199 | /**
200 | * Process a single line of the log
201 | * @param
202 | * String str: input string
203 | * int currentLine: current line number
204 | */
205 | private void singleLinePrint(String str, int currentLine) {
206 | out.println("LINE " + currentLine + ": " + str);
207 | out.println("#Tokens: " + tokenSizeOfString(str));
208 | }
209 |
210 |
211 | /**
212 | * Count the #tokens of a string
213 | * @param
214 | * String str: input string
215 | */
216 | private int tokenSizeOfString(String str) {
217 | StringTokenizer tokens = new StringTokenizer(str, this.delimiter);
218 | return tokens.countTokens();
219 | }
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 | /* ------------------------------------------------------------------------------------ */
229 | /* Step 1 - Partition by token size */
230 | /* ------------------------------------------------------------------------------------ */
231 |
232 | /**
233 | * Partition the log messages based on the #tokens
234 | * @param
235 | *
236 | */
237 | public Map> partitionByTokenSize() {
238 |
239 | out.println("\nPartition by token size.");
240 |
241 | BufferedReader reader = null;
242 | Map> partitionsBySize = new HashMap<>();
243 | String timeRegex = "^((Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (0?[0-9]|[12][0-9]|3[01]) (([0-1][0-9]|2[0-4]):[0-5][0-9]:[0-5][0-9]) )";
244 |
245 | try {
246 | out.println("Partition by token size.");
247 | reader = new BufferedReader(new FileReader(this.sourceFile));
248 | String currString = reader.readLine();
249 | String addedString = currString;
250 | //int currentLine = 1;
251 | do {
252 |
253 | /*
254 | * Check whether it is a line without time-stamp
255 | * And merge the logs without time-stamp with their nearest previous one with time-stamp
256 | */
257 | if (currString.length() < 16) {
258 | addedString = addedString + " " + currString;
259 | continue;
260 | } else if (!currString.substring(0, 16).matches(timeRegex)) {
261 | addedString = addedString + " " + currString;
262 | continue;
263 | } else {
264 | addedString = addedString.substring(16, addedString.length());
265 |
266 | Integer tokenSize = tokenSizeOfString(addedString);
267 | if (partitionsBySize.containsKey(tokenSize)) {
268 | partitionsBySize.get(tokenSize).add(addedString);
269 | } else {
270 | ArrayList tempList = new ArrayList<>();
271 | tempList.add(addedString);
272 | partitionsBySize.put(tokenSize, tempList);
273 | }
274 | //currentLine ++;
275 | addedString = currString;
276 | }
277 |
278 | } while ((currString = reader.readLine()) != null) ;
279 | reader.close();
280 |
281 | } catch (IOException e) {
282 | e.printStackTrace();
283 | } finally {
284 | if (reader != null) {
285 | try {
286 | reader.close();
287 | } catch (Exception e1) {
288 | }
289 | }
290 | }
291 | /* -------------------- For debugging ---------------------- */
292 | //printSizePartition(partitionsBySize);
293 | /* -------------------- For debugging ---------------------- */
294 |
295 | return partitionsBySize;
296 |
297 | }
298 |
299 |
300 | /**
301 | * Print the partitions based on token size
302 | * Used for debugging
303 | */
304 | private void printSizePartition(Map> partitionsBySize) {
305 | //Map> partitionsBySize = partitionByTokenSize();
306 | for (Map.Entry> entry: partitionsBySize.entrySet()) {
307 | // out.println(entry.getKey() + " " + entry.getValue().size() + " " + entry.getValue());
308 | out.println(entry.getKey() + " " + entry.getValue().size());
309 | for (String oneLog: entry.getValue()) {
310 | out.println(oneLog);
311 | }
312 | }
313 | }
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 | /* ------------------------------------------------------------------------------------ */
323 | /* Step 2 - Partition by token position */
324 | /* ------------------------------------------------------------------------------------ */
325 |
326 | /**
327 | * partitionByTokenPosition
328 | * @return
329 | * Map, ArrayList>> partitionByPosition
330 | */
331 | public Map, ArrayList>> partitionByTokenPosition() {
332 |
333 | Map> partitionsBySize = partitionByTokenSize();
334 | Map>> matirxBySize = new HashMap<>();
335 | Map, ArrayList>> partitionByPosition = new HashMap<>();
336 |
337 | out.println("\nPartition by token position.");
338 |
339 | /*
340 | * For each of the partition divided based on token size
341 | */
342 | for (Map.Entry> partitionEntry: partitionsBySize.entrySet()) {
343 |
344 | //out.println(partitionEntry.getKey() + " " + partitionEntry.getValue().size() + " " + partitionEntry.getValue());
345 | Integer tempSize = partitionEntry.getKey();
346 | matirxBySize.put(tempSize, new ArrayList>());
347 | List> tokenCollection = new ArrayList<>(tempSize);
348 |
349 | while(tokenCollection.size() < tempSize) {
350 | tokenCollection.add(new HashMap());
351 | }
352 |
353 | for (String oneLog: partitionEntry.getValue()) {
354 | StringTokenizer oneLogTokens = new StringTokenizer(oneLog, this.delimiter);
355 | ArrayList logArray = new ArrayList<>(oneLogTokens.countTokens());
356 |
357 | for (int i = 0; i < tempSize; i++) {
358 | String oneToken = oneLogTokens.nextToken();
359 | logArray.add(oneToken);
360 | HashMap logEntry = tokenCollection.get(i);
361 | logEntry.put(oneToken, logEntry.containsKey(oneToken) ? (logEntry.get(oneToken) + 1) : 1);
362 | }
363 | matirxBySize.get(tempSize).add(logArray);
364 | }
365 | /* -------------------- For debugging ---------------------- */
366 | // printTokenCollection(tokenCollection);
367 | /* -------------------- For debugging ---------------------- */
368 |
369 |
370 | /*
371 | * Calculate the partitioning position:
372 | * Reason for putting it here instead of merging it with the above for-loop:
373 | * Merging with above for-loop adding lots of computation, when loop is rolling
374 | */
375 | int chosenPosition = positionCardinality(tokenCollection).getLeft();
376 | //out.println("Position with lowest cardinality: " + choosenPosition);
377 |
378 | //out.println(tempSize);
379 | for (ArrayList logMatrix: matirxBySize.get(tempSize)) {
380 | String key = logMatrix.get(chosenPosition);
381 | ArrayList