├── .classpath
├── .gitignore
├── .project
├── .settings
    └── org.eclipse.jdt.core.prefs
├── LICENSE
├── README.md
├── pom.xml
└── src
    ├── main
        └── java
        │   └── codemining
        │       ├── cpp
        │           └── codeutils
        │           │   ├── AbstractCdtASTAnnotatedTokenizer.java
        │           │   ├── AbstractCdtAstExtractor.java
        │           │   ├── CASTAnnotatedTokenizer.java
        │           │   ├── CAstExtractor.java
        │           │   ├── CDTTokenizer.java
        │           │   ├── CppASTAnnotatedTokenizer.java
        │           │   ├── CppASTExtractor.java
        │           │   ├── CppTokenTypeTokenizer.java
        │           │   ├── CppWhitespaceTokenizer.java
        │           │   └── package-info.java
        │       ├── java
        │           ├── codedata
        │           │   ├── MethodRetriever.java
        │           │   ├── PackageInfoExtractor.java
        │           │   ├── metrics
        │           │   │   ├── CyclomaticCalculator.java
        │           │   │   ├── IFileMetricRetriever.java
        │           │   │   ├── JavaMethodClassCounter.java
        │           │   │   ├── LinesOfCodeMetric.java
        │           │   │   └── package-info.java
        │           │   └── package-info.java
        │           ├── codeutils
        │           │   ├── IdentifierPerType.java
        │           │   ├── JavaASTExtractor.java
        │           │   ├── JavaApproximateTypeInferencer.java
        │           │   ├── JavaTypeHierarchyExtractor.java
        │           │   ├── MethodExtractor.java
        │           │   ├── MethodUtils.java
        │           │   ├── MethodsInClass.java
        │           │   ├── ProjectTypeInformation.java
        │           │   ├── UsagePointExtractor.java
        │           │   ├── binding
        │           │   │   ├── AbstractJavaNameBindingsExtractor.java
        │           │   │   ├── JavaApproximateVariableBindingExtractor.java
        │           │   │   ├── JavaExactVariableBindingsExtractor.java
        │           │   │   ├── JavaFeatureExtractor.java
        │           │   │   ├── JavaMethodDeclarationBindingExtractor.java
        │           │   │   ├── JavaMethodInvocationBindingExtractor.java
        │           │   │   ├── JavaTypeDeclarationBindingExtractor.java
        │           │   │   ├── JavaVariableFeatureExtractor.java
        │           │   │   └── tui
        │           │   │   │   ├── JavaBindingsPerFeatureTypeToJson.java
        │           │   │   │   └── JavaBindingsToJson.java
        │           │   ├── package-info.java
        │           │   └── scopes
        │           │   │   ├── AllScopeExtractor.java
        │           │   │   ├── MethodScopeExtractor.java
        │           │   │   ├── ScopedIdentifierRenaming.java
        │           │   │   ├── ScopesTUI.java
        │           │   │   ├── TypenameScopeExtractor.java
        │           │   │   ├── VariableScopeExtractor.java
        │           │   │   └── package-info.java
        │           └── tokenizers
        │           │   ├── JavaASTAnnotatedTokenizer.java
        │           │   ├── JavaIdentifierAnnotatedTokenizer.java
        │           │   ├── JavaTokenTypeTokenizer.java
        │           │   ├── JavaTokenizer.java
        │           │   ├── JavaTokenizerSomeTokens.java
        │           │   ├── JavaTypeTokenizer.java
        │           │   ├── JavaWhitespaceTokenizer.java
        │           │   └── JavaWidthAnnotatedWhitespaceTokenizer.java
        │       ├── js
        │           ├── codedata
        │           │   └── metrics
        │           │   │   ├── IJavascriptFileMetricRetriever.java
        │           │   │   ├── JavascriptCyclomaticCalculator.java
        │           │   │   └── JavascriptFunctionCounter.java
        │           └── codeutils
        │           │   ├── FunctionRetriever.java
        │           │   ├── JavascriptASTExtractor.java
        │           │   ├── JavascriptTokenizer.java
        │           │   ├── NodeFinder.java
        │           │   ├── binding
        │           │       ├── AbstractJavascriptNameBindingsExtractor.java
        │           │       ├── JavascriptApproximateVariableBindingExtractor.java
        │           │       └── JavascriptExactVariableBindingsExtractor.java
        │           │   └── package-info.java
        │       ├── langs
        │           └── codeutils
        │           │   ├── AbstractJygmentsTokenizer.java
        │           │   ├── CodeTokenizer.java
        │           │   ├── TokenTypeTokenizer.java
        │           │   └── package-info.java
        │       ├── languagetools
        │           ├── ClassHierarchy.java
        │           ├── CodePrinter.java
        │           ├── ColoredToken.java
        │           ├── FormattingTokenizer.java
        │           ├── IAstAnnotatedTokenizer.java
        │           ├── IFormattingTokenizer.java
        │           ├── IScopeExtractor.java
        │           ├── ITokenizer.java
        │           ├── ParseType.java
        │           ├── Scope.java
        │           ├── TokenizerUtils.java
        │           ├── bindings
        │           │   ├── AbstractNameBindingsExtractor.java
        │           │   ├── ResolvedSourceCode.java
        │           │   └── TokenNameBinding.java
        │           ├── package-info.java
        │           ├── tokenizers
        │           │   └── whitespace
        │           │   │   ├── WhitespaceToTokenConverter.java
        │           │   │   └── WhitespaceTokenConverter.java
        │           └── tui
        │           │   ├── DistinctTokenCount.java
        │           │   ├── TokenCounter.java
        │           │   ├── TokenizerTUI.java
        │           │   └── package-info.java
        │       └── python
        │           └── codeutils
        │               ├── AbstractPythonTokenizer.java
        │               ├── Python27Tokenizer.java
        │               └── Python30Tokenizer.java
    └── test
        ├── java
            └── codemining
            │   ├── java
            │       └── codeutils
            │       │   ├── JavaApproximateTypeInferencerTest.java
            │       │   ├── JavaAstExtractorTest.java
            │       │   ├── JavaWhitespaceTokenizerTest.java
            │       │   ├── TokenizeJavaCodeTest.java
            │       │   └── binding
            │       │       ├── BindingTester.java
            │       │       ├── JavaApproximateVariableBindingExtractorTest.java
            │       │       ├── JavaExactVariableBindingsExtractorTest.java
            │       │       ├── JavaMethodBindingExtractorTest.java
            │       │       └── JavaTypeBindingExtractorTest.java
            │   ├── js
            │       └── codeutils
            │       │   ├── JavascriptASTExtractorTest.java
            │       │   ├── TokenizeJavascriptCodeTest.java
            │       │   └── binding
            │       │       ├── JavascriptApproximateVariableBindingExtractorTest.java
            │       │       └── JavascriptExactVariableBindingsExtractorTest.java
            │   └── languagetools
            │       └── TokenizerUtilsTest.java
        └── resources
            ├── SampleClass.txt
            ├── SampleClass2.txt
            ├── SampleClass2WhitespaceTokens.txt
            ├── SampleClass3.txt
            ├── SampleJavascript.txt
            ├── SampleJavascript2.txt
            ├── SampleJavascript3.txt
            └── SampleMethod.txt


/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
16 | 		<attributes>
17 | 			<attribute name="maven.pomderived" value="true"/>
18 | 		</attributes>
19 | 	</classpathentry>
20 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
21 | 		<attributes>
22 | 			<attribute name="maven.pomderived" value="true"/>
23 | 		</attributes>
24 | 	</classpathentry>
25 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
26 | 		<attributes>
27 | 			<attribute name="maven.pomderived" value="true"/>
28 | 		</attributes>
29 | 	</classpathentry>
30 | 	<classpathentry kind="output" path="target/classes"/>
31 | </classpath>
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /target
3 | /target/
4 | /target/
5 | /target/
6 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>codemining-core</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.jdt.core.javanature</nature>
21 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
22 | 	</natures>
23 | </projectDescription>
24 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
 1 | eclipse.preferences.version=1
 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
 5 | org.eclipse.jdt.core.compiler.compliance=1.8
 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
12 | org.eclipse.jdt.core.compiler.source=1.8
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, School of Informatics, University of Edinburgh
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of [project] nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | codemining-core
2 | ===============
3 | A set of tools for extracting tokens and ASTs from code.
4 | 
5 | codemining-core depends on the [codemining-utils](https://github.com/mast-group/codemining-utils) package
6 | 
7 | codemining-core is released under a BSD license.
8 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/cpp/codeutils/AbstractCdtAstExtractor.java:
--------------------------------------------------------------------------------
 1 | package codemining.cpp.codeutils;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | import org.eclipse.cdt.core.dom.ast.IASTTranslationUnit;
 6 | import org.eclipse.cdt.core.index.IIndex;
 7 | import org.eclipse.cdt.core.model.ILanguage;
 8 | import org.eclipse.cdt.core.parser.DefaultLogService;
 9 | import org.eclipse.cdt.core.parser.FileContent;
10 | import org.eclipse.cdt.core.parser.IParserLogService;
11 | import org.eclipse.cdt.core.parser.IScannerInfo;
12 | import org.eclipse.cdt.core.parser.IncludeFileContentProvider;
13 | import org.eclipse.cdt.core.parser.ScannerInfo;
14 | import org.eclipse.core.runtime.CoreException;
15 | 
16 | import com.google.common.collect.Maps;
17 | 
18 | /**
19 |  * Inteface for all classes that are able to retrieve a CDT-compatible AST.
20 |  * Macros and inclusions are not resolved, unless in the same file.
21 |  * 
22 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
23 |  * 
24 |  */
25 | public abstract class AbstractCdtAstExtractor {
26 | 
27 | 	/**
28 | 	 * Return an AST for the following CDT-compatible code;
29 | 	 * 
30 | 	 * @param code
31 | 	 * @return
32 | 	 * @throws CoreException
33 | 	 */
34 | 	public final IASTTranslationUnit getAST(final char[] code,
35 | 			final String baseIncludePath) throws CoreException {
36 | 		final FileContent fc = FileContent.create(baseIncludePath, code);
37 | 		final Map<String, String> macroDefinitions = Maps.newHashMap();
38 | 		final String[] includeSearchPaths = new String[0];
39 | 		final IScannerInfo si = new ScannerInfo(macroDefinitions,
40 | 				includeSearchPaths);
41 | 		final IncludeFileContentProvider ifcp = IncludeFileContentProvider
42 | 				.getEmptyFilesProvider();
43 | 		final IIndex idx = null;
44 | 		final int options = ILanguage.OPTION_IS_SOURCE_UNIT;
45 | 		final IParserLogService log = new DefaultLogService();
46 | 		return getAstForLanguage(fc, si, ifcp, idx, options, log);
47 | 	}
48 | 
49 | 	/**
50 | 	 * To be overrided for each language.
51 | 	 * 
52 | 	 * @param fc
53 | 	 * @param si
54 | 	 * @param ifcp
55 | 	 * @param idx
56 | 	 * @param options
57 | 	 * @param log
58 | 	 * @return
59 | 	 * @throws CoreException
60 | 	 */
61 | 	protected abstract IASTTranslationUnit getAstForLanguage(FileContent fc,
62 | 			IScannerInfo si, IncludeFileContentProvider ifcp, IIndex idx,
63 | 			int options, IParserLogService log) throws CoreException;
64 | 
65 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/cpp/codeutils/CASTAnnotatedTokenizer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.cpp.codeutils;
 5 | 
 6 | import codemining.languagetools.ITokenizer;
 7 | 
 8 | /**
 9 |  * A C AST annotated tokenizer.
10 |  * 
11 |  * @author Miltos Allamanis<m.allamanis@ed.ac.uk>
12 |  * 
13 |  */
14 | public class CASTAnnotatedTokenizer extends AbstractCdtASTAnnotatedTokenizer {
15 | 
16 | 	private static final long serialVersionUID = 6395574519739472995L;
17 | 
18 | 	/**
19 | 	 * @param extractorClass
20 | 	 */
21 | 	public CASTAnnotatedTokenizer() {
22 | 		super(CAstExtractor.class, "");
23 | 	}
24 | 
25 | 	/**
26 | 	 * @param base
27 | 	 * @param extractorClass
28 | 	 */
29 | 	public CASTAnnotatedTokenizer(final ITokenizer base) {
30 | 		super(base, CAstExtractor.class, "");
31 | 	}
32 | 
33 | 	public CASTAnnotatedTokenizer(final ITokenizer base,
34 | 			final String codeBasePath) {
35 | 		super(base, CAstExtractor.class, codeBasePath);
36 | 	}
37 | 
38 | 	public CASTAnnotatedTokenizer(final String codeBasePath) {
39 | 		super(CAstExtractor.class, codeBasePath);
40 | 	}
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/cpp/codeutils/CAstExtractor.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.cpp.codeutils;
 5 | 
 6 | import org.eclipse.cdt.core.dom.ast.IASTTranslationUnit;
 7 | import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage;
 8 | import org.eclipse.cdt.core.index.IIndex;
 9 | import org.eclipse.cdt.core.parser.FileContent;
10 | import org.eclipse.cdt.core.parser.IParserLogService;
11 | import org.eclipse.cdt.core.parser.IScannerInfo;
12 | import org.eclipse.cdt.core.parser.IncludeFileContentProvider;
13 | import org.eclipse.core.runtime.CoreException;
14 | 
15 | /**
16 |  * A C AST extractor.
17 |  * 
18 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
19 |  * 
20 |  */
21 | public class CAstExtractor extends AbstractCdtAstExtractor {
22 | 
23 | 	@Override
24 | 	protected IASTTranslationUnit getAstForLanguage(final FileContent fc,
25 | 			final IScannerInfo si, final IncludeFileContentProvider ifcp,
26 | 			final IIndex idx, final int options, final IParserLogService log)
27 | 			throws CoreException {
28 | 		return GCCLanguage.getDefault().getASTTranslationUnit(fc, si, ifcp,
29 | 				idx, options, log);
30 | 	}
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/cpp/codeutils/CDTTokenizer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.cpp.codeutils;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.Collection;
  9 | import java.util.List;
 10 | import java.util.SortedMap;
 11 | 
 12 | import org.apache.commons.io.FileUtils;
 13 | import org.apache.commons.io.filefilter.AbstractFileFilter;
 14 | import org.apache.commons.io.filefilter.RegexFileFilter;
 15 | import org.apache.commons.lang.NotImplementedException;
 16 | import org.eclipse.cdt.internal.formatter.scanner.Scanner;
 17 | import org.eclipse.cdt.internal.formatter.scanner.Token;
 18 | 
 19 | import codemining.languagetools.ITokenizer;
 20 | 
 21 | import com.google.common.collect.Lists;
 22 | import com.google.common.collect.Maps;
 23 | 
 24 | /**
 25 |  * A C/C++ tokenizer provided by the Eclipse CDT.
 26 |  *
 27 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 28 |  *
 29 |  */
 30 | public class CDTTokenizer implements ITokenizer {
 31 | 
 32 | 	private static final long serialVersionUID = 3954406410244227404L;
 33 | 
 34 | 	/**
 35 | 	 * A filter for the files being tokenized.
 36 | 	 */
 37 | 	public static final RegexFileFilter C_CODE_TOKENIZER = new RegexFileFilter(
 38 | 			".*\\.(c|cc|cpp|h)$");
 39 | 
 40 | 	/*
 41 | 	 * (non-Javadoc)
 42 | 	 *
 43 | 	 * @see codemining.languagetools.ITokenizer#fullTokenListWithPos(char[])
 44 | 	 */
 45 | 	@Override
 46 | 	public SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code) {
 47 | 		final SortedMap<Integer, FullToken> tokens = Maps.newTreeMap();
 48 | 		tokens.put(-1, new FullToken(SENTENCE_START, SENTENCE_START));
 49 | 		tokens.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END, SENTENCE_END));
 50 | 
 51 | 		final Scanner scanner = new Scanner();
 52 | 		scanner.setSource(code);
 53 | 		do {
 54 | 			final int token = scanner.getNextToken();
 55 | 			if (token == Token.tWHITESPACE) {
 56 | 				continue;
 57 | 			}
 58 | 			final String nxtToken = new String(scanner.getCurrentTokenSource());
 59 | 			tokens.put(scanner.getCurrentPosition(), new FullToken(nxtToken,
 60 | 					Integer.toString(token)));
 61 | 		} while (!scanner.atEnd());
 62 | 		return tokens;
 63 | 	}
 64 | 
 65 | 	/*
 66 | 	 * (non-Javadoc)
 67 | 	 *
 68 | 	 * @see codemining.languagetools.ITokenizer#getFileFilter()
 69 | 	 */
 70 | 	@Override
 71 | 	public AbstractFileFilter getFileFilter() {
 72 | 		return C_CODE_TOKENIZER;
 73 | 	}
 74 | 
 75 | 	/*
 76 | 	 * (non-Javadoc)
 77 | 	 *
 78 | 	 * @see codemining.languagetools.ITokenizer#getIdentifierType()
 79 | 	 */
 80 | 	@Override
 81 | 	public String getIdentifierType() {
 82 | 		return Integer.toString(Token.tIDENTIFIER);
 83 | 	}
 84 | 
 85 | 	@Override
 86 | 	public Collection<String> getKeywordTypes() {
 87 | 		throw new NotImplementedException();
 88 | 	}
 89 | 
 90 | 	@Override
 91 | 	public Collection<String> getLiteralTypes() {
 92 | 		throw new NotImplementedException();
 93 | 	}
 94 | 
 95 | 	/*
 96 | 	 * (non-Javadoc)
 97 | 	 *
 98 | 	 * @see
 99 | 	 * codemining.languagetools.ITokenizer#getTokenFromString(java.lang.String)
100 | 	 */
101 | 	@Override
102 | 	public FullToken getTokenFromString(final String token) {
103 | 		if (token.equals(ITokenizer.SENTENCE_START)) {
104 | 			return new FullToken(ITokenizer.SENTENCE_START,
105 | 					ITokenizer.SENTENCE_START);
106 | 		}
107 | 
108 | 		if (token.equals(ITokenizer.SENTENCE_END)) {
109 | 			return new FullToken(ITokenizer.SENTENCE_END,
110 | 					ITokenizer.SENTENCE_END);
111 | 		}
112 | 		return getTokenListFromCode(token.toCharArray()).get(1);
113 | 	}
114 | 
115 | 	/*
116 | 	 * (non-Javadoc)
117 | 	 *
118 | 	 * @see codemining.languagetools.ITokenizer#getTokenListFromCode(char[])
119 | 	 */
120 | 	@Override
121 | 	public List<FullToken> getTokenListFromCode(final char[] code) {
122 | 		final List<FullToken> tokens = Lists.newArrayList();
123 | 		tokens.add(new FullToken(SENTENCE_START, SENTENCE_START));
124 | 
125 | 		final Scanner scanner = new Scanner();
126 | 		scanner.setSource(code);
127 | 
128 | 		do {
129 | 			final int token = scanner.getNextToken();
130 | 			if (token == Token.tWHITESPACE) {
131 | 				continue;
132 | 			}
133 | 			final String nxtToken = new String(scanner.getCurrentTokenSource());
134 | 			tokens.add(new FullToken(nxtToken, Integer.toString(token)));
135 | 		} while (!scanner.atEnd());
136 | 
137 | 		tokens.add(new FullToken(SENTENCE_END, SENTENCE_END));
138 | 		return tokens;
139 | 	}
140 | 
141 | 	@Override
142 | 	public List<FullToken> getTokenListFromCode(final File codeFile)
143 | 			throws IOException {
144 | 		return getTokenListFromCode(FileUtils.readFileToString(codeFile)
145 | 				.toCharArray());
146 | 	}
147 | 
148 | 	/*
149 | 	 * (non-Javadoc)
150 | 	 *
151 | 	 * @see codemining.languagetools.ITokenizer#tokenListFromCode(char[])
152 | 	 */
153 | 	@Override
154 | 	public List<String> tokenListFromCode(final char[] code) {
155 | 		final List<String> tokens = Lists.newArrayList();
156 | 		tokens.add(SENTENCE_START);
157 | 
158 | 		final Scanner scanner = new Scanner();
159 | 		scanner.setSource(code);
160 | 
161 | 		do {
162 | 			final int token = scanner.getNextToken();
163 | 			if (token == Token.tWHITESPACE) {
164 | 				continue;
165 | 			}
166 | 			final String nxtToken = new String(scanner.getCurrentTokenSource());
167 | 			tokens.add(nxtToken);
168 | 		} while (!scanner.atEnd());
169 | 
170 | 		tokens.add(SENTENCE_END);
171 | 		return tokens;
172 | 	}
173 | 
174 | 	@Override
175 | 	public List<String> tokenListFromCode(final File codeFile)
176 | 			throws IOException {
177 | 		return tokenListFromCode(FileUtils.readFileToString(codeFile)
178 | 				.toCharArray());
179 | 	}
180 | 
181 | 	/*
182 | 	 * (non-Javadoc)
183 | 	 *
184 | 	 * @see codemining.languagetools.ITokenizer#tokenListWithPos(char[])
185 | 	 */
186 | 	@Override
187 | 	public SortedMap<Integer, String> tokenListWithPos(final char[] code) {
188 | 		final SortedMap<Integer, String> tokens = Maps.newTreeMap();
189 | 		tokens.put(-1, SENTENCE_START);
190 | 		tokens.put(Integer.MAX_VALUE, SENTENCE_END);
191 | 
192 | 		final Scanner scanner = new Scanner();
193 | 		scanner.setSource(code);
194 | 		do {
195 | 			final int token = scanner.getNextToken();
196 | 			if (token == Token.tWHITESPACE) {
197 | 				continue;
198 | 			}
199 | 			final String nxtToken = new String(scanner.getCurrentTokenSource());
200 | 			tokens.put(scanner.getCurrentPosition(), nxtToken);
201 | 		} while (!scanner.atEnd());
202 | 		return tokens;
203 | 	}
204 | 
205 | 	@Override
206 | 	public SortedMap<Integer, FullToken> tokenListWithPos(final File file)
207 | 			throws IOException {
208 | 		return fullTokenListWithPos(FileUtils.readFileToString(file)
209 | 				.toCharArray());
210 | 	}
211 | 
212 | }
213 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/cpp/codeutils/CppASTAnnotatedTokenizer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.cpp.codeutils;
 5 | 
 6 | import codemining.languagetools.ITokenizer;
 7 | 
 8 | /**
 9 |  * A C++ AST Annotated Tokenizer
10 |  * 
11 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
12 |  * 
13 |  */
14 | public class CppASTAnnotatedTokenizer extends AbstractCdtASTAnnotatedTokenizer {
15 | 
16 | 	private static final long serialVersionUID = -8016456170070671980L;
17 | 
18 | 	/**
19 | 	 * 
20 | 	 */
21 | 	public CppASTAnnotatedTokenizer() {
22 | 		super(CppASTExtractor.class, "");
23 | 	}
24 | 
25 | 	/**
26 | 	 * @param base
27 | 	 */
28 | 	public CppASTAnnotatedTokenizer(final ITokenizer base) {
29 | 		super(base, CppASTExtractor.class, "");
30 | 	}
31 | 
32 | 	public CppASTAnnotatedTokenizer(final ITokenizer base,
33 | 			final String codeBasePath) {
34 | 		super(base, CppASTExtractor.class, codeBasePath);
35 | 	}
36 | 
37 | 	public CppASTAnnotatedTokenizer(final String codeBasePath) {
38 | 		super(CppASTExtractor.class, codeBasePath);
39 | 	}
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/cpp/codeutils/CppASTExtractor.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.cpp.codeutils;
 5 | 
 6 | import org.eclipse.cdt.core.dom.ast.IASTTranslationUnit;
 7 | import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage;
 8 | import org.eclipse.cdt.core.index.IIndex;
 9 | import org.eclipse.cdt.core.parser.FileContent;
10 | import org.eclipse.cdt.core.parser.IParserLogService;
11 | import org.eclipse.cdt.core.parser.IScannerInfo;
12 | import org.eclipse.cdt.core.parser.IncludeFileContentProvider;
13 | import org.eclipse.core.runtime.CoreException;
14 | 
15 | /**
16 |  * A C++ AST Extractor.
17 |  * 
18 |  * For more look here
19 |  * http://www.inf.unibz.it/~gsucci/publications/full%20text/full
20 |  * %20text/OSS12.pdf
21 |  * 
22 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
23 |  * 
24 |  */
25 | public class CppASTExtractor extends AbstractCdtAstExtractor {
26 | 
27 | 	@Override
28 | 	protected IASTTranslationUnit getAstForLanguage(final FileContent fc,
29 | 			final IScannerInfo si, final IncludeFileContentProvider ifcp,
30 | 			final IIndex idx, final int options, final IParserLogService log)
31 | 			throws CoreException {
32 | 		return GPPLanguage.getDefault().getASTTranslationUnit(fc, si, ifcp,
33 | 				idx, options, log);
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/cpp/codeutils/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * Code utilities related to C/C++
3 |  */
4 | package codemining.cpp.codeutils;


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/MethodRetriever.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.codedata;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.Map;
 9 | 
10 | import org.eclipse.jdt.core.dom.ASTNode;
11 | import org.eclipse.jdt.core.dom.ASTVisitor;
12 | import org.eclipse.jdt.core.dom.CompilationUnit;
13 | import org.eclipse.jdt.core.dom.MethodDeclaration;
14 | 
15 | import codemining.java.codeutils.JavaASTExtractor;
16 | 
17 | import com.google.common.collect.Maps;
18 | 
19 | /**
20 |  * A utility class that retrieves the methods (as AST Nodes) of a file.
21 |  * 
22 |  * @author Miltos Allamanis
23 |  * 
24 |  */
25 | public final class MethodRetriever extends ASTVisitor {
26 | 
27 | 	public static Map<String, MethodDeclaration> getMethodNodes(final File file)
28 | 			throws IOException {
29 | 		final JavaASTExtractor astExtractor = new JavaASTExtractor(false);
30 | 		final MethodRetriever m = new MethodRetriever();
31 | 		final CompilationUnit cu = astExtractor.getAST(file);
32 | 		cu.accept(m);
33 | 		return m.methods;
34 | 	}
35 | 
36 | 	public static Map<String, MethodDeclaration> getMethodNodes(
37 | 			final String file) throws Exception {
38 | 		final JavaASTExtractor astExtractor = new JavaASTExtractor(false);
39 | 		final MethodRetriever m = new MethodRetriever();
40 | 		final ASTNode cu = astExtractor.getBestEffortAstNode(file);
41 | 		cu.accept(m);
42 | 		return m.methods;
43 | 	}
44 | 
45 | 	private final Map<String, MethodDeclaration> methods = Maps.newTreeMap();
46 | 
47 | 	private MethodRetriever() {
48 | 
49 | 	}
50 | 
51 | 	@Override
52 | 	public boolean visit(final MethodDeclaration node) {
53 | 		methods.put(node.getName().toString(), node);
54 | 		return super.visit(node);
55 | 	}
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/PackageInfoExtractor.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.codedata;
 5 | 
 6 | import java.util.Collections;
 7 | import java.util.List;
 8 | 
 9 | import org.eclipse.jdt.core.dom.ASTVisitor;
10 | import org.eclipse.jdt.core.dom.CompilationUnit;
11 | import org.eclipse.jdt.core.dom.ImportDeclaration;
12 | import org.eclipse.jdt.core.dom.PackageDeclaration;
13 | 
14 | import com.google.common.collect.Lists;
15 | 
16 | /**
17 |  * Get package information from Java source code.
18 |  * 
19 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
20 |  * 
21 |  */
22 | public class PackageInfoExtractor extends ASTVisitor {
23 | 
24 | 	private String packageName;
25 | 	private final List<String> packageImports;
26 | 
27 | 	private final CompilationUnit cu;
28 | 
29 | 	public PackageInfoExtractor(final CompilationUnit cu) {
30 | 		this.cu = cu;
31 | 		packageImports = Lists.newArrayList();
32 | 		cu.accept(this);
33 | 	}
34 | 
35 | 	public List<String> getImports() {
36 | 		return Collections.unmodifiableList(packageImports);
37 | 	}
38 | 
39 | 	public String getPackageName() {
40 | 		return packageName;
41 | 	}
42 | 
43 | 	@Override
44 | 	public boolean visit(ImportDeclaration node) {
45 | 		packageImports.add(node.getName().getFullyQualifiedName());
46 | 		return false;
47 | 	}
48 | 
49 | 	@Override
50 | 	public boolean visit(PackageDeclaration node) {
51 | 		packageName = node.getName().toString();
52 | 		return false;
53 | 	}
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/metrics/CyclomaticCalculator.java:
--------------------------------------------------------------------------------
  1 | package codemining.java.codedata.metrics;
  2 | 
  3 | import java.io.File;
  4 | import java.io.IOException;
  5 | import java.util.logging.Logger;
  6 | 
  7 | import org.eclipse.jdt.core.dom.ASTNode;
  8 | import org.eclipse.jdt.core.dom.ASTVisitor;
  9 | import org.eclipse.jdt.core.dom.CatchClause;
 10 | import org.eclipse.jdt.core.dom.ConditionalExpression;
 11 | import org.eclipse.jdt.core.dom.DoStatement;
 12 | import org.eclipse.jdt.core.dom.EnhancedForStatement;
 13 | import org.eclipse.jdt.core.dom.ForStatement;
 14 | import org.eclipse.jdt.core.dom.IfStatement;
 15 | import org.eclipse.jdt.core.dom.MethodDeclaration;
 16 | import org.eclipse.jdt.core.dom.SwitchCase;
 17 | import org.eclipse.jdt.core.dom.WhileStatement;
 18 | 
 19 | import codemining.java.codeutils.JavaASTExtractor;
 20 | 
 21 | /**
 22 |  * Compute McCabe's Cyclomatic Complexity.
 23 |  * 
 24 |  * @author Miltos Allamanis
 25 |  * 
 26 |  */
 27 | public class CyclomaticCalculator implements IFileMetricRetriever {
 28 | 
 29 | 	/**
 30 | 	 * Visit all "junctions" in an AST and increment complexity.
 31 | 	 * 
 32 | 	 */
 33 | 	private static class JunctionVisitor extends ASTVisitor {
 34 | 		int complexity = 0;
 35 | 
 36 | 		@Override
 37 | 		public boolean visit(final CatchClause arg0) {
 38 | 			complexity++;
 39 | 			return super.visit(arg0);
 40 | 		}
 41 | 
 42 | 		@Override
 43 | 		public boolean visit(final ConditionalExpression arg0) {
 44 | 			complexity++;
 45 | 			return super.visit(arg0);
 46 | 		}
 47 | 
 48 | 		@Override
 49 | 		public boolean visit(final DoStatement arg0) {
 50 | 			complexity++;
 51 | 			return super.visit(arg0);
 52 | 		}
 53 | 
 54 | 		@Override
 55 | 		public boolean visit(final EnhancedForStatement arg0) {
 56 | 			complexity++;
 57 | 			return super.visit(arg0);
 58 | 		}
 59 | 
 60 | 		@Override
 61 | 		public boolean visit(final ForStatement arg0) {
 62 | 			complexity++;
 63 | 			return super.visit(arg0);
 64 | 		}
 65 | 
 66 | 		@Override
 67 | 		public boolean visit(final IfStatement arg0) {
 68 | 			complexity++;
 69 | 			return super.visit(arg0);
 70 | 		}
 71 | 
 72 | 		@Override
 73 | 		public boolean visit(final MethodDeclaration arg0) {
 74 | 			/*
 75 | 			 * if (isConcrete(arg0)) { complexity.startMethod(); return
 76 | 			 * super.visit(arg0); } return false;
 77 | 			 */
 78 | 			complexity++; // TODO: Not exactly true, but we'll use that
 79 | 			return super.visit(arg0);
 80 | 		}
 81 | 
 82 | 		@Override
 83 | 		public boolean visit(final SwitchCase arg0) {
 84 | 			complexity++;
 85 | 			return super.visit(arg0);
 86 | 		}
 87 | 
 88 | 		@Override
 89 | 		public boolean visit(final WhileStatement arg0) {
 90 | 			complexity++;
 91 | 			return super.visit(arg0);
 92 | 		}
 93 | 	}
 94 | 
 95 | 	private static final Logger LOGGER = Logger
 96 | 			.getLogger(CyclomaticCalculator.class.getName());
 97 | 
 98 | 	public int getComplexity(final File file) throws IOException {
 99 | 		final JavaASTExtractor ast = new JavaASTExtractor(false);
100 | 		final JunctionVisitor visitor = new JunctionVisitor();
101 | 		ast.getAST(file).accept(visitor);
102 | 		return visitor.complexity;
103 | 	}
104 | 
105 | 	@Override
106 | 	public double getMetricForASTNode(final ASTNode node) {
107 | 		final JunctionVisitor visitor = new JunctionVisitor();
108 | 		node.accept(visitor);
109 | 		return visitor.complexity;
110 | 	}
111 | 
112 | 	@Override
113 | 	public double getMetricForFile(final File file) throws IOException {
114 | 		return getComplexity(file);
115 | 	}
116 | }
117 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/metrics/IFileMetricRetriever.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.codedata.metrics;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | 
 9 | import org.eclipse.jdt.core.dom.ASTNode;
10 | 
11 | /**
12 |  * An interface for all the classes that can return a metric
13 |  * 
14 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
15 |  * 
16 |  */
17 | public interface IFileMetricRetriever {
18 | 	double getMetricForASTNode(final ASTNode node);
19 | 
20 | 	double getMetricForFile(final File file) throws IOException;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/metrics/JavaMethodClassCounter.java:
--------------------------------------------------------------------------------
 1 | package codemining.java.codedata.metrics;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.util.List;
 6 | 
 7 | import org.apache.commons.io.FileUtils;
 8 | import org.eclipse.jdt.core.dom.ASTNode;
 9 | import org.eclipse.jdt.core.dom.ASTVisitor;
10 | import org.eclipse.jdt.core.dom.CompilationUnit;
11 | 
12 | import codemining.java.codeutils.JavaASTExtractor;
13 | 
14 | public final class JavaMethodClassCounter {
15 | 
16 | 	public static class MethodClassCountVisitor extends ASTVisitor {
17 | 
18 | 		public int noMethods = 0;
19 | 		public int noClasses = 0;
20 | 
21 | 		@Override
22 | 		public void postVisit(final ASTNode node) {
23 | 
24 | 			if (node.getNodeType() == ASTNode.METHOD_DECLARATION)
25 | 				noMethods++;
26 | 
27 | 			if (node.getNodeType() == ASTNode.TYPE_DECLARATION
28 | 					|| node.getNodeType() == ASTNode.ENUM_DECLARATION)
29 | 				noClasses++;
30 | 		}
31 | 
32 | 	}
33 | 
34 | 	public static void main(final String[] args) throws IOException {
35 | 		if (args.length != 1) {
36 | 			System.err.println("Usage <inputDirectory>");
37 | 			System.exit(-1);
38 | 		}
39 | 		final File directory = new File(args[0]);
40 | 		countMethodsClasses(directory);
41 | 	}
42 | 
43 | 	public static void countMethodsClasses(final File projectDir)
44 | 			throws IOException {
45 | 
46 | 		System.out.println("\n===== Project " + projectDir);
47 | 		final MethodClassCountVisitor mccv = new MethodClassCountVisitor();
48 | 		final JavaASTExtractor astExtractor = new JavaASTExtractor(false);
49 | 
50 | 		final List<File> files = (List<File>) FileUtils.listFiles(projectDir,
51 | 				new String[] { "java" }, true);
52 | 
53 | 		int count = 0;
54 | 		for (final File file : files) {
55 | 
56 | 			final CompilationUnit cu = astExtractor.getAST(file);
57 | 			cu.accept(mccv);
58 | 
59 | 			if (count % 1000 == 0)
60 | 				System.out.println("At file " + count + " of " + files.size());
61 | 			count++;
62 | 		}
63 | 
64 | 		System.out.println("Project " + projectDir);
65 | 		System.out.println("No. *.java files " + files.size());
66 | 		System.out.println("No. Methods: " + mccv.noMethods);
67 | 		System.out.println("No. Classes: " + mccv.noClasses);
68 | 	}
69 | 
70 | 	private JavaMethodClassCounter() {
71 | 
72 | 	}
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/metrics/LinesOfCodeMetric.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.codedata.metrics;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | 
 9 | import org.apache.commons.io.FileUtils;
10 | import org.eclipse.jdt.core.dom.ASTNode;
11 | 
12 | /**
13 |  * Find how many lines of code there are in the given file.
14 |  * 
15 |  * Note that if you give a file it returns the length including the contents,
16 |  * while giving an AST Node ignores them.
17 |  * 
18 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
19 |  * 
20 |  */
21 | public class LinesOfCodeMetric implements IFileMetricRetriever {
22 | 
23 | 	/*
24 | 	 * (non-Javadoc)
25 | 	 * 
26 | 	 * @see
27 | 	 * uk.ac.ed.inf.codedataextractors.IFileMetricRetriever#getMetricForASTNode
28 | 	 * (org.eclipse.jdt.core.dom.ASTNode)
29 | 	 */
30 | 	@Override
31 | 	public double getMetricForASTNode(final ASTNode node) {
32 | 		return node.toString().split(System.getProperty("line.separator")).length;
33 | 	}
34 | 
35 | 	/*
36 | 	 * (non-Javadoc)
37 | 	 * 
38 | 	 * @see
39 | 	 * uk.ac.ed.inf.codedataextractors.IFileMetricRetriever#getMetricForFile
40 | 	 * (java.io.File)
41 | 	 */
42 | 	@Override
43 | 	public double getMetricForFile(File file) throws IOException {
44 | 		final String fileContents = FileUtils.readFileToString(file);
45 | 		// This returns the real lines, while the other returns without the
46 | 		// comments.
47 | 		return fileContents.split(System.getProperty("line.separator")).length;
48 | 	}
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/metrics/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * Java Code Metrics
3 |  */
4 | package codemining.java.codedata.metrics;


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codedata/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * Package providing information and datasets from Java files.
3 |  */
4 | package codemining.java.codedata;
5 | 
6 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/MethodExtractor.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.java.codeutils;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.ArrayList;
  9 | import java.util.List;
 10 | import java.util.Stack;
 11 | 
 12 | import org.apache.commons.lang.exception.ExceptionUtils;
 13 | import org.eclipse.jdt.core.dom.ASTVisitor;
 14 | import org.eclipse.jdt.core.dom.CompilationUnit;
 15 | import org.eclipse.jdt.core.dom.ImportDeclaration;
 16 | import org.eclipse.jdt.core.dom.MethodDeclaration;
 17 | import org.eclipse.jdt.core.dom.TypeDeclaration;
 18 | 
 19 | import com.google.common.collect.Lists;
 20 | 
 21 | /**
 22 |  * Extract all methods in a class
 23 |  *
 24 |  * @author Miltos Allamanis<m.allamanis@ed.ac.uk>
 25 |  *
 26 |  */
 27 | public class MethodExtractor {
 28 | 
 29 | 	private static class MethodVisitor extends ASTVisitor {
 30 | 
 31 | 		final List<MethodDeclaration> allMethods = Lists.newArrayList();
 32 | 
 33 | 		Stack<String> className = new Stack<String>();
 34 | 
 35 | 		private String currentPackageName;
 36 | 
 37 | 		private final ProjectTypeInformation pti;
 38 | 
 39 | 		public MethodVisitor(final ProjectTypeInformation pti) {
 40 | 			this.pti = pti;
 41 | 		}
 42 | 
 43 | 		@Override
 44 | 		public void endVisit(final TypeDeclaration node) {
 45 | 			className.pop();
 46 | 			super.endVisit(node);
 47 | 		}
 48 | 
 49 | 		/**
 50 | 		 * @param node
 51 | 		 * @return
 52 | 		 */
 53 | 		public boolean isOverride(final MethodDeclaration node) {
 54 | 			try {
 55 | 				final boolean hasAnnotation = MethodUtils.hasOverrideAnnotation(node);
 56 | 				if (pti == null || hasAnnotation) {
 57 | 					return hasAnnotation;
 58 | 				}
 59 | 
 60 | 				final boolean isOverride = pti.isMethodOverride(className.peek(), node);
 61 | 				return hasAnnotation || isOverride;
 62 | 			} catch (final Throwable e) {
 63 | 				System.err.println(e + ":" + node.toString());
 64 | 				return false;
 65 | 			}
 66 | 		}
 67 | 
 68 | 		@Override
 69 | 		public boolean visit(final CompilationUnit node) {
 70 | 			if (node.getPackage() != null) {
 71 | 				currentPackageName = node.getPackage().getName().getFullyQualifiedName();
 72 | 			} else {
 73 | 				currentPackageName = "";
 74 | 			}
 75 | 			return super.visit(node);
 76 | 		}
 77 | 
 78 | 		@Override
 79 | 		public boolean visit(final ImportDeclaration node) {
 80 | 			// Don't visit. It's boring
 81 | 			return false;
 82 | 		}
 83 | 
 84 | 		@Override
 85 | 		public boolean visit(final MethodDeclaration node) {
 86 | 			if (node.isConstructor()) {
 87 | 				return super.visit(node);
 88 | 			} else if (isOverride(node)) {
 89 | 				return super.visit(node);
 90 | 			}
 91 | 			allMethods.add(node);
 92 | 			return super.visit(node);
 93 | 		}
 94 | 
 95 | 		@Override
 96 | 		public boolean visit(final TypeDeclaration node) {
 97 | 			if (className.isEmpty()) {
 98 | 				className.push(currentPackageName + "." + node.getName().getIdentifier());
 99 | 			} else {
100 | 				className.push(className.peek() + "." + node.getName().getIdentifier());
101 | 			}
102 | 			return super.visit(node);
103 | 		}
104 | 
105 | 	}
106 | 
107 | 	public static List<MethodDeclaration> getMethods(final File file) throws IOException {
108 | 		return getMethods(file, null);
109 | 	}
110 | 
111 | 	public static List<MethodDeclaration> getMethods(final File file, final ProjectTypeInformation pti)
112 | 			throws IOException {
113 | 		try {
114 | 			final JavaASTExtractor ex = new JavaASTExtractor(false);
115 | 			final MethodVisitor mv = new MethodVisitor(pti);
116 | 			final CompilationUnit cu = ex.getAST(file);
117 | 			cu.accept(mv);
118 | 			return mv.allMethods;
119 | 		} catch (Exception e) {
120 | 			System.err.println(ExceptionUtils.getFullStackTrace(e));
121 | 		}
122 | 		return new ArrayList<>();
123 | 	}
124 | 
125 | 	private MethodExtractor() {
126 | 
127 | 	}
128 | 
129 | }
130 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/MethodUtils.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  */
 4 | package codemining.java.codeutils;
 5 | 
 6 | import java.util.List;
 7 | 
 8 | import org.eclipse.jdt.core.dom.Annotation;
 9 | import org.eclipse.jdt.core.dom.IExtendedModifier;
10 | import org.eclipse.jdt.core.dom.MethodDeclaration;
11 | import org.eclipse.jdt.core.dom.SingleVariableDeclaration;
12 | 
13 | /**
14 |  * A set of utility methods for Java Methods.
15 |  *
16 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
17 |  *
18 |  */
19 | public final class MethodUtils {
20 | 
21 | 	/**
22 | 	 * @param node
23 | 	 * @return
24 | 	 */
25 | 	public static String getMethodType(final MethodDeclaration node) {
26 | 		final StringBuffer typeSb = new StringBuffer();
27 | 		if (node.getReturnType2() != null) {
28 | 			typeSb.append(node.getReturnType2().toString()).append("(");
29 | 		} else if (node.isConstructor()) {
30 | 			typeSb.append("constructor(");
31 | 		} else {
32 | 			typeSb.append("void(");
33 | 		}
34 | 		for (final Object svd : node.parameters()) {
35 | 			final SingleVariableDeclaration decl = (SingleVariableDeclaration) svd;
36 | 			typeSb.append(decl.getType().toString());
37 | 			typeSb.append(",");
38 | 		}
39 | 		typeSb.append(")");
40 | 
41 | 		final String methodType = typeSb.toString();
42 | 		return methodType;
43 | 	}
44 | 
45 | 	public static boolean hasOverrideAnnotation(final MethodDeclaration node) {
46 | 		final List modifiers = node.modifiers();
47 | 		for (final Object mod : modifiers) {
48 | 			final IExtendedModifier modifier = (IExtendedModifier) mod;
49 | 			if (modifier.isAnnotation()) {
50 | 				final Annotation annotation = (Annotation) modifier;
51 | 				if (annotation.getTypeName().toString().equals("Override")) {
52 | 					return true;
53 | 				}
54 | 			}
55 | 		}
56 | 		return false;
57 | 	}
58 | 
59 | 	private MethodUtils() {
60 | 	}
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/MethodsInClass.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.java.codeutils;
  5 | 
  6 | import java.io.File;
  7 | import java.util.Collection;
  8 | import java.util.Stack;
  9 | import java.util.logging.Logger;
 10 | 
 11 | import org.apache.commons.io.FileUtils;
 12 | import org.apache.commons.io.filefilter.DirectoryFileFilter;
 13 | import org.eclipse.jdt.core.dom.ASTVisitor;
 14 | import org.eclipse.jdt.core.dom.CompilationUnit;
 15 | import org.eclipse.jdt.core.dom.EnumDeclaration;
 16 | import org.eclipse.jdt.core.dom.ImportDeclaration;
 17 | import org.eclipse.jdt.core.dom.MethodDeclaration;
 18 | import org.eclipse.jdt.core.dom.TypeDeclaration;
 19 | 
 20 | import codemining.java.tokenizers.JavaTokenizer;
 21 | 
 22 | import com.google.common.collect.HashMultimap;
 23 | import com.google.common.collect.Multimap;
 24 | 
 25 | /**
 26 |  * Retrieve all the methods contained in a given class.
 27 |  *
 28 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 29 |  *
 30 |  */
 31 | public class MethodsInClass {
 32 | 
 33 | 	private class MethodExtractor extends ASTVisitor {
 34 | 
 35 | 		Stack<String> className = new Stack<String>();
 36 | 
 37 | 		private String currentPackageName;
 38 | 
 39 | 		@Override
 40 | 		public void endVisit(final EnumDeclaration node) {
 41 | 			className.pop();
 42 | 			super.endVisit(node);
 43 | 		}
 44 | 
 45 | 		/*
 46 | 		 * (non-Javadoc)
 47 | 		 * 
 48 | 		 * @see
 49 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#endVisit(org.eclipse.jdt.core
 50 | 		 * .dom.TypeDeclaration)
 51 | 		 */
 52 | 		@Override
 53 | 		public void endVisit(final TypeDeclaration node) {
 54 | 			className.pop();
 55 | 			super.endVisit(node);
 56 | 		}
 57 | 
 58 | 		/*
 59 | 		 * (non-Javadoc)
 60 | 		 * 
 61 | 		 * @see
 62 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
 63 | 		 * .CompilationUnit)
 64 | 		 */
 65 | 		@Override
 66 | 		public boolean visit(final CompilationUnit node) {
 67 | 			if (node.getPackage() != null) {
 68 | 				currentPackageName = node.getPackage().getName()
 69 | 						.getFullyQualifiedName();
 70 | 			}
 71 | 			return super.visit(node);
 72 | 		}
 73 | 
 74 | 		@Override
 75 | 		public boolean visit(final EnumDeclaration node) {
 76 | 			if (className.isEmpty()) {
 77 | 				className.push(currentPackageName + "."
 78 | 						+ node.getName().getIdentifier());
 79 | 			} else {
 80 | 				className.push(className.peek() + "."
 81 | 						+ node.getName().getIdentifier());
 82 | 			}
 83 | 			return super.visit(node);
 84 | 		}
 85 | 
 86 | 		/*
 87 | 		 * (non-Javadoc)
 88 | 		 * 
 89 | 		 * @see
 90 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
 91 | 		 * .ImportDeclaration)
 92 | 		 */
 93 | 		@Override
 94 | 		public boolean visit(final ImportDeclaration node) {
 95 | 			// Don't visit. It's boring
 96 | 			return false;
 97 | 		}
 98 | 
 99 | 		/*
100 | 		 * (non-Javadoc)
101 | 		 * 
102 | 		 * @see
103 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
104 | 		 * .MethodDeclaration)
105 | 		 */
106 | 		@Override
107 | 		public boolean visit(final MethodDeclaration node) {
108 | 			final String methodType = MethodUtils.getMethodType(node);
109 | 			methodsForClasses.put(className.peek(), node.getName()
110 | 					.getIdentifier() + ":" + methodType);
111 | 			return false;
112 | 		}
113 | 
114 | 		/*
115 | 		 * (non-Javadoc)
116 | 		 * 
117 | 		 * @see
118 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
119 | 		 * .TypeDeclaration)
120 | 		 */
121 | 		@Override
122 | 		public boolean visit(final TypeDeclaration node) {
123 | 			if (className.isEmpty()) {
124 | 				className.push(currentPackageName + "."
125 | 						+ node.getName().getIdentifier());
126 | 			} else {
127 | 				className.push(className.peek() + "."
128 | 						+ node.getName().getIdentifier());
129 | 			}
130 | 			return super.visit(node);
131 | 		}
132 | 
133 | 	}
134 | 
135 | 	public static void main(final String[] args) {
136 | 		if (args.length != 1) {
137 | 			System.err.println("Usage <projectDir>");
138 | 			System.exit(-1);
139 | 		}
140 | 
141 | 		final MethodsInClass mic = new MethodsInClass();
142 | 		mic.scan(FileUtils
143 | 				.listFiles(new File(args[0]), JavaTokenizer.javaCodeFileFilter,
144 | 						DirectoryFileFilter.DIRECTORY));
145 | 		System.out.println(mic);
146 | 	}
147 | 
148 | 	/**
149 | 	 * Class -> MethodName
150 | 	 */
151 | 	private final Multimap<String, String> methodsForClasses = HashMultimap
152 | 			.create();
153 | 
154 | 	private static final Logger LOGGER = Logger.getLogger(MethodsInClass.class
155 | 			.getName());
156 | 
157 | 	public MethodsInClass() {
158 | 		methodsForClasses.put("java.lang.Object", "toString:String()");
159 | 		methodsForClasses.put("java.lang.Object", "equals:boolean(Object,)");
160 | 		methodsForClasses.put("java.lang.Object", "hashCode:int()");
161 | 		methodsForClasses.put("java.lang.Runnable", "run:void()");
162 | 	}
163 | 
164 | 	public Collection<String> getMethodsForClass(final String classname) {
165 | 		return methodsForClasses.get(classname);
166 | 	}
167 | 
168 | 	public void scan(final Collection<File> files) {
169 | 		final MethodExtractor me = new MethodExtractor();
170 | 		final JavaASTExtractor jEx = new JavaASTExtractor(false);
171 | 		for (final File f : files) {
172 | 			try {
173 | 				final CompilationUnit cu = jEx.getAST(f);
174 | 				cu.accept(me);
175 | 			} catch (final Throwable e) {
176 | 				LOGGER.warning("Failed to get methods from " + f);
177 | 			}
178 | 		}
179 | 	}
180 | 
181 | 	/*
182 | 	 * (non-Javadoc)
183 | 	 * 
184 | 	 * @see java.lang.Object#toString()
185 | 	 */
186 | 	@Override
187 | 	public String toString() {
188 | 		return methodsForClasses.toString();
189 | 	}
190 | 
191 | }
192 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/ProjectTypeInformation.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  */
 4 | package codemining.java.codeutils;
 5 | 
 6 | import java.io.File;
 7 | import java.util.Collection;
 8 | 
 9 | import org.apache.commons.io.FileUtils;
10 | import org.apache.commons.io.filefilter.DirectoryFileFilter;
11 | import org.eclipse.jdt.core.dom.MethodDeclaration;
12 | 
13 | import codemining.java.tokenizers.JavaTokenizer;
14 | import codemining.languagetools.ClassHierarchy;
15 | import codemining.languagetools.ClassHierarchy.Type;
16 | 
17 | import com.google.common.base.Optional;
18 | 
19 | /**
20 |  * Collect information about classes and their implementing methods.
21 |  *
22 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
23 |  *
24 |  */
25 | public class ProjectTypeInformation {
26 | 
27 | 	private final File projectDirectory;
28 | 	private final MethodsInClass methodInformation = new MethodsInClass();
29 | 	private ClassHierarchy hierarchy = null;
30 | 
31 | 	public ProjectTypeInformation(final File projectDirectory) {
32 | 		this.projectDirectory = projectDirectory;
33 | 	}
34 | 
35 | 	public void collect() {
36 | 		final Collection<File> allFiles = FileUtils
37 | 				.listFiles(projectDirectory, JavaTokenizer.javaCodeFileFilter,
38 | 						DirectoryFileFilter.DIRECTORY);
39 | 		methodInformation.scan(allFiles);
40 | 		final JavaTypeHierarchyExtractor hierarchyExtractor = new JavaTypeHierarchyExtractor();
41 | 		hierarchyExtractor.addFilesToCorpus(allFiles);
42 | 		hierarchy = hierarchyExtractor.getHierarchy();
43 | 	}
44 | 
45 | 	public boolean isMethodOverride(final String fullyQualifiedNameOfClass,
46 | 			final MethodDeclaration method) {
47 | 		final String methodSignature = method.getName().getIdentifier() + ":"
48 | 				+ MethodUtils.getMethodType(method);
49 | 		if (!methodInformation.getMethodsForClass(fullyQualifiedNameOfClass)
50 | 				.contains(methodSignature)) {
51 | 			return false;
52 | 		}
53 | 		final Optional<Type> type = hierarchy
54 | 				.getTypeForName(fullyQualifiedNameOfClass);
55 | 		if (!type.isPresent()) {
56 | 			return false;
57 | 		}
58 | 		for (final Type implementor : type.get().getImplementingTypesClosure()) {
59 | 			if (methodInformation.getMethodsForClass(
60 | 					implementor.fullQualifiedName).contains(methodSignature)) {
61 | 				return true;
62 | 			}
63 | 		}
64 | 		return false;
65 | 	}
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/UsagePointExtractor.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.java.codeutils;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.List;
  9 | import java.util.Set;
 10 | 
 11 | import org.apache.commons.io.FileUtils;
 12 | import org.apache.commons.io.filefilter.DirectoryFileFilter;
 13 | import org.eclipse.jdt.core.dom.ASTNode;
 14 | import org.eclipse.jdt.core.dom.ASTVisitor;
 15 | import org.eclipse.jdt.core.dom.FieldDeclaration;
 16 | import org.eclipse.jdt.core.dom.ImportDeclaration;
 17 | import org.eclipse.jdt.core.dom.SingleVariableDeclaration;
 18 | import org.eclipse.jdt.core.dom.VariableDeclarationExpression;
 19 | import org.eclipse.jdt.core.dom.VariableDeclarationStatement;
 20 | 
 21 | import codemining.java.tokenizers.JavaTokenizer;
 22 | 
 23 | import com.google.common.collect.Lists;
 24 | import com.google.common.collect.Sets;
 25 | 
 26 | /**
 27 |  * Given a Java file and a fully qualified name of a class, find those blocks
 28 |  * that use the class in question.
 29 |  * 
 30 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 31 |  * 
 32 |  */
 33 | public class UsagePointExtractor {
 34 | 
 35 | 	private static final class UsageExtractor extends ASTVisitor {
 36 | 		/**
 37 | 		 * Return the imported class.
 38 | 		 * 
 39 | 		 * @param qName
 40 | 		 * @return
 41 | 		 */
 42 | 		private static String getImportedClass(final String qName) {
 43 | 			return qName.substring(qName.lastIndexOf('.') + 1);
 44 | 		}
 45 | 
 46 | 		final List<ASTNode> interestingNodes = Lists.newArrayList();
 47 | 
 48 | 		final String fullyQualifiedName;
 49 | 		final Set<String> className = Sets.newTreeSet();
 50 | 
 51 | 		/*
 52 | 		 * (non-Javadoc)
 53 | 		 * 
 54 | 		 * @see
 55 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
 56 | 		 * .ImportDeclaration)
 57 | 		 */
 58 | 		UsageExtractor(final String fullyQualifiedName) {
 59 | 			this.fullyQualifiedName = fullyQualifiedName;
 60 | 			// Add the fully qualified name in the rare case where
 61 | 			// no import is needed (i.e. in java.lang.)
 62 | 			className.add(fullyQualifiedName);
 63 | 		}
 64 | 
 65 | 		/*
 66 | 		 * (non-Javadoc)
 67 | 		 * 
 68 | 		 * @see
 69 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#preVisit2(org.eclipse.jdt.core
 70 | 		 * .dom.ASTNode)
 71 | 		 */
 72 | 		@Override
 73 | 		public boolean preVisit2(final ASTNode node) {
 74 | 			return !interestingNodes.contains(node);
 75 | 		}
 76 | 
 77 | 		/*
 78 | 		 * (non-Javadoc)
 79 | 		 * 
 80 | 		 * @see
 81 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
 82 | 		 * .FieldDeclaration)
 83 | 		 */
 84 | 		@Override
 85 | 		public boolean visit(final FieldDeclaration node) {
 86 | 			if (className.contains(node.getType().toString())) {
 87 | 				interestingNodes.add(node.getParent());
 88 | 			}
 89 | 			return false;
 90 | 		}
 91 | 
 92 | 		@Override
 93 | 		public boolean visit(final ImportDeclaration node) {
 94 | 			final String qualifiedName = node.getName().getFullyQualifiedName();
 95 | 			if (qualifiedName.startsWith(fullyQualifiedName)) {
 96 | 				className.add(getImportedClass(qualifiedName));
 97 | 				className.add(qualifiedName);
 98 | 			}
 99 | 			return false;
100 | 		}
101 | 
102 | 		/*
103 | 		 * (non-Javadoc)
104 | 		 * 
105 | 		 * @see
106 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
107 | 		 * .SingleVariableDeclaration)
108 | 		 */
109 | 		@Override
110 | 		public boolean visit(final SingleVariableDeclaration node) {
111 | 			if (className.contains(node.getType().toString())) {
112 | 				interestingNodes.add(node.getParent());
113 | 			}
114 | 			return false;
115 | 		}
116 | 
117 | 		/*
118 | 		 * (non-Javadoc)
119 | 		 * 
120 | 		 * @see
121 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
122 | 		 * .VariableDeclarationExpression)
123 | 		 */
124 | 		@Override
125 | 		public boolean visit(final VariableDeclarationExpression node) {
126 | 			if (className.contains(node.getType().toString())) {
127 | 				interestingNodes.add(node.getParent());
128 | 			}
129 | 			return false;
130 | 		}
131 | 
132 | 		/*
133 | 		 * (non-Javadoc)
134 | 		 * 
135 | 		 * @see
136 | 		 * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom
137 | 		 * .VariableDeclarationStatement)
138 | 		 */
139 | 		@Override
140 | 		public boolean visit(final VariableDeclarationStatement node) {
141 | 			if (className.contains(node.getType().toString())) {
142 | 				interestingNodes.add(node.getParent());
143 | 			}
144 | 			return false;
145 | 		}
146 | 
147 | 	}
148 | 
149 | 	/**
150 | 	 * @param args
151 | 	 */
152 | 	public static void main(final String[] args) {
153 | 		if (args.length != 2) {
154 | 			System.err.println("Usage <fullyQualifiedClass> <directory>");
155 | 			System.exit(-1);
156 | 		}
157 | 
158 | 		final File directory = new File(args[1]);
159 | 		final String qualifiedClass = args[0];
160 | 
161 | 		for (final File fi : FileUtils
162 | 				.listFiles(directory, JavaTokenizer.javaCodeFileFilter,
163 | 						DirectoryFileFilter.DIRECTORY)) {
164 | 			try {
165 | 				final List<ASTNode> usages = usagePoints(qualifiedClass, fi);
166 | 				if (!usages.isEmpty()) {
167 | 					System.out.println(fi.getAbsolutePath());
168 | 					for (final ASTNode node : usages) {
169 | 						System.out
170 | 								.println("----------------------------------------------");
171 | 						System.out.println(node);
172 | 					}
173 | 				}
174 | 			} catch (final Exception e) {
175 | 				System.err.println("Error processing " + fi.getName());
176 | 			}
177 | 
178 | 		}
179 | 
180 | 	}
181 | 
182 | 	/**
183 | 	 * 
184 | 	 * @param qualifiedName
185 | 	 *            the fully qualified name of the class or the package
186 | 	 * @param f
187 | 	 * @return
188 | 	 * @throws IOException
189 | 	 */
190 | 	public static List<ASTNode> usagePoints(final String qualifiedName,
191 | 			final File f) throws IOException {
192 | 		final JavaASTExtractor ex = new JavaASTExtractor(false);
193 | 		final UsageExtractor usageExtractor = new UsageExtractor(qualifiedName);
194 | 		ex.getAST(f).accept(usageExtractor);
195 | 		return usageExtractor.interestingNodes;
196 | 	}
197 | 
198 | }
199 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/binding/JavaMethodInvocationBindingExtractor.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.java.codeutils.binding;
  5 | 
  6 | import static com.google.common.base.Preconditions.checkArgument;
  7 | 
  8 | import java.util.Collection;
  9 | import java.util.Map.Entry;
 10 | import java.util.Set;
 11 | 
 12 | import org.eclipse.jdt.core.dom.ASTNode;
 13 | import org.eclipse.jdt.core.dom.ASTVisitor;
 14 | import org.eclipse.jdt.core.dom.MethodInvocation;
 15 | 
 16 | import codemining.java.tokenizers.JavaTokenizer;
 17 | import codemining.languagetools.ITokenizer;
 18 | 
 19 | import com.google.common.collect.HashMultimap;
 20 | import com.google.common.collect.Multimap;
 21 | import com.google.common.collect.Sets;
 22 | 
 23 | /**
 24 |  * Extract Java method bindings. Each method call or definition is used by
 25 |  * itself
 26 |  *
 27 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 28 |  *
 29 |  */
 30 | public class JavaMethodInvocationBindingExtractor extends
 31 | 		AbstractJavaNameBindingsExtractor {
 32 | 
 33 | 	public static enum AvailableFeatures {
 34 | 		IMPLEMENTOR_VOCABULARY, ANCESTRY, NUMBER_ARGUMENTS
 35 | 	}
 36 | 
 37 | 	private static class MethodBindings extends ASTVisitor {
 38 | 		/**
 39 | 		 * A map from the method name to the position.
 40 | 		 */
 41 | 		final Multimap<String, ASTNode> methodNamePostions = HashMultimap
 42 | 				.create();
 43 | 
 44 | 		@Override
 45 | 		public boolean visit(final MethodInvocation node) {
 46 | 			final String name = node.getName().toString();
 47 | 			methodNamePostions.put(name, node.getName());
 48 | 			return super.visit(node);
 49 | 		}
 50 | 	}
 51 | 
 52 | 	private final Set<AvailableFeatures> activeFeatures = Sets
 53 | 			.newHashSet(AvailableFeatures.values());
 54 | 
 55 | 	public JavaMethodInvocationBindingExtractor() {
 56 | 		super(new JavaTokenizer());
 57 | 	}
 58 | 
 59 | 	public JavaMethodInvocationBindingExtractor(final ITokenizer tokenizer) {
 60 | 		super(tokenizer);
 61 | 	}
 62 | 
 63 | 	@Override
 64 | 	public Set<?> getAvailableFeatures() {
 65 | 		return Sets.newHashSet(AvailableFeatures.values());
 66 | 	}
 67 | 
 68 | 	@Override
 69 | 	protected Set<String> getFeatures(final Set<ASTNode> boundNodes) {
 70 | 		checkArgument(boundNodes.size() == 1);
 71 | 		final ASTNode method = boundNodes.iterator().next().getParent();
 72 | 		final Set<String> features = Sets.newHashSet();
 73 | 		checkArgument(method instanceof MethodInvocation);
 74 | 		final MethodInvocation mi = (MethodInvocation) method;
 75 | 		if (activeFeatures.contains(AvailableFeatures.NUMBER_ARGUMENTS)) {
 76 | 			features.add("nArgs:" + mi.arguments().size());
 77 | 		}
 78 | 		if (activeFeatures.contains(AvailableFeatures.IMPLEMENTOR_VOCABULARY)) {
 79 | 			JavaFeatureExtractor.addImplementorVocab(mi, features);
 80 | 		}
 81 | 		if (activeFeatures.contains(AvailableFeatures.ANCESTRY)) {
 82 | 			JavaFeatureExtractor.addAstAncestryFeatures(features, method);
 83 | 		}
 84 | 		return features;
 85 | 	}
 86 | 
 87 | 	@Override
 88 | 	public Set<Set<ASTNode>> getNameBindings(final ASTNode node) {
 89 | 		final MethodBindings mb = new MethodBindings();
 90 | 		node.accept(mb);
 91 | 
 92 | 		final Set<Set<ASTNode>> nameBindings = Sets.newHashSet();
 93 | 		for (final Entry<String, ASTNode> entry : mb.methodNamePostions
 94 | 				.entries()) {
 95 | 			final Set<ASTNode> boundNodes = Sets.newIdentityHashSet();
 96 | 			boundNodes.add(entry.getValue());
 97 | 			nameBindings.add(boundNodes);
 98 | 		}
 99 | 		return nameBindings;
100 | 	}
101 | 
102 | 	@Override
103 | 	public void setActiveFeatures(final Set<?> activeFeatures) {
104 | 		this.activeFeatures.clear();
105 | 		this.activeFeatures
106 | 				.addAll((Collection<? extends AvailableFeatures>) activeFeatures);
107 | 	}
108 | 
109 | }
110 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/binding/JavaVariableFeatureExtractor.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.java.codeutils.binding;
  5 | 
  6 | import java.util.Collection;
  7 | import java.util.List;
  8 | import java.util.Set;
  9 | 
 10 | import org.eclipse.jdt.core.dom.ASTNode;
 11 | import org.eclipse.jdt.core.dom.FieldDeclaration;
 12 | import org.eclipse.jdt.core.dom.SingleVariableDeclaration;
 13 | import org.eclipse.jdt.core.dom.Type;
 14 | import org.eclipse.jdt.core.dom.VariableDeclaration;
 15 | import org.eclipse.jdt.core.dom.VariableDeclarationExpression;
 16 | import org.eclipse.jdt.core.dom.VariableDeclarationFragment;
 17 | import org.eclipse.jdt.core.dom.VariableDeclarationStatement;
 18 | 
 19 | import com.google.common.collect.Sets;
 20 | 
 21 | /**
 22 |  * Utility class to extract features from a variable.
 23 |  *
 24 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 25 |  *
 26 |  */
 27 | public class JavaVariableFeatureExtractor {
 28 | 
 29 | 	public static enum AvailableFeatures {
 30 | 		IMPLEMENTOR_VOCABULARY, TYPE, MODIFIERS, ANCESTRY
 31 | 	}
 32 | 
 33 | 	private Set<AvailableFeatures> activeFeatures = Sets
 34 | 			.newHashSet(AvailableFeatures.values());
 35 | 
 36 | 	public JavaVariableFeatureExtractor() {
 37 | 	}
 38 | 
 39 | 	/**
 40 | 	 * @param features
 41 | 	 * @param declarationPoint
 42 | 	 */
 43 | 	private void getDeclarationFeatures(final Set<String> features,
 44 | 			final ASTNode declarationPoint) {
 45 | 		final Type variableType;
 46 | 		final List modifiers;
 47 | 		final ASTNode ancestryFrom;
 48 | 		if (declarationPoint.getParent() instanceof SingleVariableDeclaration) {
 49 | 			final SingleVariableDeclaration declaration = (SingleVariableDeclaration) declarationPoint
 50 | 					.getParent();
 51 | 			variableType = declaration.getType();
 52 | 			modifiers = declaration.modifiers();
 53 | 			ancestryFrom = declaration;
 54 | 		} else if (declarationPoint.getParent() instanceof VariableDeclarationStatement) {
 55 | 			final VariableDeclarationStatement declaration = (VariableDeclarationStatement) declarationPoint
 56 | 					.getParent();
 57 | 			variableType = declaration.getType();
 58 | 			modifiers = declaration.modifiers();
 59 | 			ancestryFrom = declaration;
 60 | 		} else if (declarationPoint.getParent() instanceof VariableDeclarationFragment) {
 61 | 			if (declarationPoint.getParent().getParent() instanceof VariableDeclarationStatement) {
 62 | 				final VariableDeclarationStatement declaration = (VariableDeclarationStatement) declarationPoint
 63 | 						.getParent().getParent();
 64 | 				variableType = declaration.getType();
 65 | 				modifiers = declaration.modifiers();
 66 | 				ancestryFrom = declaration;
 67 | 			} else if (declarationPoint.getParent().getParent() instanceof FieldDeclaration) {
 68 | 				final FieldDeclaration declaration = (FieldDeclaration) declarationPoint
 69 | 						.getParent().getParent();
 70 | 				variableType = declaration.getType();
 71 | 				modifiers = declaration.modifiers();
 72 | 				ancestryFrom = declaration;
 73 | 			} else if (declarationPoint.getParent().getParent() instanceof VariableDeclarationExpression) {
 74 | 				final VariableDeclarationExpression declaration = (VariableDeclarationExpression) declarationPoint
 75 | 						.getParent().getParent();
 76 | 				variableType = declaration.getType();
 77 | 				modifiers = declaration.modifiers();
 78 | 				ancestryFrom = declaration;
 79 | 			} else {
 80 | 				return;
 81 | 			}
 82 | 		} else {
 83 | 			throw new IllegalStateException("Should not reach this");
 84 | 		}
 85 | 
 86 | 		if (activeFeatures.contains(AvailableFeatures.TYPE)) {
 87 | 			JavaFeatureExtractor.addTypeFeatures(variableType, features);
 88 | 		}
 89 | 		if (activeFeatures.contains(AvailableFeatures.MODIFIERS)) {
 90 | 			JavaFeatureExtractor.addModifierFeatures(features, modifiers);
 91 | 		}
 92 | 		if (activeFeatures.contains(AvailableFeatures.ANCESTRY)) {
 93 | 			JavaFeatureExtractor.addAstAncestryFeatures(features, ancestryFrom);
 94 | 		}
 95 | 	}
 96 | 
 97 | 	public void setActiveFeatures(final Collection<AvailableFeatures> features) {
 98 | 		activeFeatures = Sets.newHashSet(features);
 99 | 	}
100 | 
101 | 	public Set<String> variableFeatures(final Set<ASTNode> boundNodesOfVariable) {
102 | 		// Find the declaration and extract features
103 | 		final Set<String> features = Sets.newHashSet();
104 | 		for (final ASTNode node : boundNodesOfVariable) {
105 | 			if (!(node.getParent() instanceof VariableDeclaration)) {
106 | 				continue;
107 | 			}
108 | 			getDeclarationFeatures(features, node);
109 | 			if (activeFeatures
110 | 					.contains(AvailableFeatures.IMPLEMENTOR_VOCABULARY)) {
111 | 				JavaFeatureExtractor.addImplementorVocab(node, features);
112 | 			}
113 | 			break;
114 | 		}
115 | 		return features;
116 | 	}
117 | 
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/binding/tui/JavaBindingsPerFeatureTypeToJson.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  */
 4 | package codemining.java.codeutils.binding.tui;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.Collections;
 9 | 
10 | import codemining.java.codeutils.binding.AbstractJavaNameBindingsExtractor;
11 | 
12 | import com.google.common.collect.Sets;
13 | import com.google.gson.JsonIOException;
14 | 
15 | /**
16 |  * Extract bindings for a given type, including one type of feature per time.
17 |  *
18 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
19 |  *
20 |  */
21 | public class JavaBindingsPerFeatureTypeToJson {
22 | 
23 | 	/**
24 | 	 * @param args
25 | 	 */
26 | 	public static void main(final String[] args) {
27 | 		if (args.length != 3) {
28 | 			System.err
29 | 					.println("Usage <inputFolder> variables|methodinvocations|"
30 | 							+ "methodinvocations_typegram|methoddeclarations|methoddeclarations_nooverride"
31 | 							+ "methoddeclarations_typegram|types <outputFolderAndPrefix>");
32 | 			System.exit(-1);
33 | 		}
34 | 
35 | 		final File inputFolder = new File(args[0]);
36 | 		final String outputFolderAndPrefix = args[2];
37 | 		final AbstractJavaNameBindingsExtractor bindingExtractor = JavaBindingsToJson
38 | 				.getExtractorForName(args[1], inputFolder);
39 | 
40 | 		for (final Object featureType : bindingExtractor.getAvailableFeatures()) {
41 | 			try {
42 | 				System.out.println("Using only " + featureType + " feature");
43 | 				bindingExtractor
44 | 						.setActiveFeatures(Sets.newHashSet(featureType));
45 | 				final File outputFile = new File(outputFolderAndPrefix
46 | 						+ featureType.toString() + ".json");
47 | 				System.out.println("Generating at " + outputFile);
48 | 				JavaBindingsToJson.extractBindings(inputFolder, outputFile,
49 | 						bindingExtractor);
50 | 			} catch (JsonIOException | IOException e) {
51 | 				e.printStackTrace();
52 | 			}
53 | 		}
54 | 
55 | 		try {
56 | 			System.out.println("Using no features");
57 | 			bindingExtractor.setActiveFeatures(Collections.EMPTY_SET);
58 | 			final File outputFile = new File(outputFolderAndPrefix
59 | 					+ "NO_FEAT.json");
60 | 			System.out.println("Generating at " + outputFile);
61 | 			JavaBindingsToJson.extractBindings(inputFolder, outputFile,
62 | 					bindingExtractor);
63 | 		} catch (JsonIOException | IOException e) {
64 | 			e.printStackTrace();
65 | 		}
66 | 	}
67 | 
68 | 	private JavaBindingsPerFeatureTypeToJson() {
69 | 		// No instantiations.
70 | 	}
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * Java Code Utilities for tokenizing and retrieving 
3 |  * Java AST.
4 |  */
5 | package codemining.java.codeutils;
6 | 
7 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/scopes/AllScopeExtractor.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.java.codeutils.scopes;
  5 | 
  6 | import static com.google.common.base.Preconditions.checkArgument;
  7 | 
  8 | import java.io.File;
  9 | import java.io.IOException;
 10 | import java.util.List;
 11 | 
 12 | import org.eclipse.jdt.core.dom.ASTNode;
 13 | 
 14 | import codemining.languagetools.IScopeExtractor;
 15 | import codemining.languagetools.ParseType;
 16 | import codemining.languagetools.Scope;
 17 | 
 18 | import com.google.common.collect.Lists;
 19 | import com.google.common.collect.Multimap;
 20 | import com.google.common.collect.TreeMultimap;
 21 | 
 22 | /**
 23 |  * Aggregate all extractors.
 24 |  * 
 25 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 26 |  * 
 27 |  */
 28 | public class AllScopeExtractor {
 29 | 
 30 | 	public static final class AllScopeSnippetExtractor implements
 31 | 			IScopeExtractor {
 32 | 
 33 | 		public AllScopeSnippetExtractor() {
 34 | 			allExtractors = Lists.newArrayList();
 35 | 			allExtractors
 36 | 					.add(new VariableScopeExtractor.VariableScopeSnippetExtractor());
 37 | 			allExtractors
 38 | 					.add(new MethodScopeExtractor.MethodScopeSnippetExtractor(
 39 | 							true));
 40 | 			allExtractors
 41 | 					.add(new TypenameScopeExtractor.TypenameSnippetExtractor(
 42 | 							true));
 43 | 		}
 44 | 
 45 | 		public AllScopeSnippetExtractor(final boolean variables,
 46 | 				final boolean methods, final boolean types) {
 47 | 			allExtractors = Lists.newArrayList();
 48 | 			checkArgument(variables | methods | types,
 49 | 					"At least one option must be set");
 50 | 			if (variables) {
 51 | 				allExtractors
 52 | 						.add(new VariableScopeExtractor.VariableScopeSnippetExtractor());
 53 | 			}
 54 | 			if (methods) {
 55 | 				allExtractors
 56 | 						.add(new MethodScopeExtractor.MethodScopeSnippetExtractor(
 57 | 								true));
 58 | 			}
 59 | 			if (types) {
 60 | 				allExtractors
 61 | 						.add(new TypenameScopeExtractor.TypenameSnippetExtractor(
 62 | 								true));
 63 | 			}
 64 | 		}
 65 | 
 66 | 		private final List<IScopeExtractor> allExtractors;
 67 | 
 68 | 		@Override
 69 | 		public Multimap<Scope, String> getFromFile(final File file)
 70 | 				throws IOException {
 71 | 			final Multimap<Scope, String> scopes = TreeMultimap.create();
 72 | 			for (final IScopeExtractor extractor : allExtractors) {
 73 | 				scopes.putAll(extractor.getFromFile(file));
 74 | 			}
 75 | 			return scopes;
 76 | 		}
 77 | 
 78 | 		@Override
 79 | 		public Multimap<Scope, String> getFromNode(ASTNode node) {
 80 | 			final Multimap<Scope, String> scopes = TreeMultimap.create();
 81 | 			for (final IScopeExtractor extractor : allExtractors) {
 82 | 				scopes.putAll(extractor.getFromNode(node));
 83 | 			}
 84 | 			return scopes;
 85 | 		}
 86 | 
 87 | 		@Override
 88 | 		public Multimap<Scope, String> getFromString(final String file,
 89 | 				final ParseType parseType) {
 90 | 			final Multimap<Scope, String> scopes = TreeMultimap.create();
 91 | 			for (final IScopeExtractor extractor : allExtractors) {
 92 | 				scopes.putAll(extractor.getFromString(file, parseType));
 93 | 			}
 94 | 			return scopes;
 95 | 		}
 96 | 	}
 97 | 
 98 | 	/**
 99 | 	 * 
100 | 	 */
101 | 	private AllScopeExtractor() {
102 | 	}
103 | 
104 | }
105 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/scopes/MethodScopeExtractor.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.java.codeutils.scopes;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.Map.Entry;
  9 | import java.util.logging.Logger;
 10 | 
 11 | import org.eclipse.jdt.core.dom.ASTNode;
 12 | import org.eclipse.jdt.core.dom.ASTVisitor;
 13 | import org.eclipse.jdt.core.dom.MethodDeclaration;
 14 | import org.eclipse.jdt.core.dom.MethodInvocation;
 15 | import org.eclipse.jdt.core.dom.TypeDeclaration;
 16 | 
 17 | import codemining.java.codeutils.JavaASTExtractor;
 18 | import codemining.languagetools.IScopeExtractor;
 19 | import codemining.languagetools.ParseType;
 20 | import codemining.languagetools.Scope;
 21 | import codemining.languagetools.Scope.ScopeType;
 22 | 
 23 | import com.google.common.base.Objects;
 24 | import com.google.common.collect.HashMultimap;
 25 | import com.google.common.collect.Multimap;
 26 | import com.google.common.collect.TreeMultimap;
 27 | 
 28 | /**
 29 |  * Extract method names from a scope.
 30 |  * 
 31 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 32 |  * 
 33 |  */
 34 | public class MethodScopeExtractor {
 35 | 
 36 | 	public static class Method {
 37 | 
 38 | 		public final String name;
 39 | 		public final ScopeType type;
 40 | 
 41 | 		public Method(final String name, final ScopeType type) {
 42 | 			this.name = name;
 43 | 			this.type = type;
 44 | 		}
 45 | 
 46 | 		@Override
 47 | 		public boolean equals(Object obj) {
 48 | 			if (!(obj instanceof Method)) {
 49 | 				return false;
 50 | 			}
 51 | 			Method other = (Method) obj;
 52 | 			return name.equals(other.name) && type == other.type;
 53 | 		}
 54 | 
 55 | 		@Override
 56 | 		public int hashCode() {
 57 | 			return Objects.hashCode(name, type);
 58 | 		}
 59 | 	}
 60 | 
 61 | 	public static final class MethodScopeSnippetExtractor implements
 62 | 			IScopeExtractor {
 63 | 
 64 | 		final boolean methodAsRoots;
 65 | 
 66 | 		public MethodScopeSnippetExtractor(final boolean useMethodsAsRoots) {
 67 | 			methodAsRoots = useMethodsAsRoots;
 68 | 		}
 69 | 
 70 | 		@Override
 71 | 		public final Multimap<Scope, String> getFromFile(final File f) {
 72 | 			try {
 73 | 				return getScopeSnippets(f, methodAsRoots);
 74 | 			} catch (IOException e) {
 75 | 				LOGGER.severe("Unable to extract method scope snippets from file "
 76 | 						+ f.getName());
 77 | 				throw new IllegalArgumentException(
 78 | 						"Unable to extract method scope snippets from file");
 79 | 			}
 80 | 		}
 81 | 
 82 | 		@Override
 83 | 		public Multimap<Scope, String> getFromNode(final ASTNode node) {
 84 | 			return getScopeSnippets(node, methodAsRoots);
 85 | 		}
 86 | 
 87 | 		@Override
 88 | 		public final Multimap<Scope, String> getFromString(final String code,
 89 | 				final ParseType parseType) {
 90 | 			return getScopeSnippets(code, methodAsRoots, parseType);
 91 | 		}
 92 | 	}
 93 | 
 94 | 	private static class ScopeFinder extends ASTVisitor {
 95 | 
 96 | 		final Multimap<ASTNode, Method> methods = HashMultimap.create();
 97 | 
 98 | 		ASTNode classNode = null;
 99 | 		ASTNode currentMethodNode = null;
100 | 		final boolean methodAsRoot;
101 | 
102 | 		public ScopeFinder(final boolean methodAsRoots) {
103 | 			methodAsRoot = methodAsRoots;
104 | 		}
105 | 
106 | 		@Override
107 | 		public void endVisit(MethodDeclaration node) {
108 | 			if (currentMethodNode == node) {
109 | 				currentMethodNode = null;
110 | 			}
111 | 			super.endVisit(node);
112 | 		}
113 | 
114 | 		@Override
115 | 		public boolean visit(MethodDeclaration node) {
116 | 			if (currentMethodNode == null) {
117 | 				currentMethodNode = node;
118 | 			}
119 | 			if (node.isConstructor())
120 | 				return super.visit(node);
121 | 			final String name = node.getName().toString();
122 | 
123 | 			final Method mth = new Method(name, ScopeType.SCOPE_CLASS);
124 | 			if (classNode != null) {
125 | 				methods.put(classNode, mth);
126 | 			}
127 | 			return super.visit(node);
128 | 		}
129 | 
130 | 		@Override
131 | 		public boolean visit(MethodInvocation node) {
132 | 			final String name = node.getName().toString();
133 | 
134 | 			if (methodAsRoot && currentMethodNode != null) {
135 | 				final Method mth = new Method(name, ScopeType.SCOPE_METHOD);
136 | 				methods.put(currentMethodNode, mth);
137 | 			} else {
138 | 				final Method mth = new Method(name, ScopeType.SCOPE_CLASS);
139 | 				methods.put(classNode, mth);
140 | 			}
141 | 			return super.visit(node);
142 | 		}
143 | 
144 | 		@Override
145 | 		public boolean visit(TypeDeclaration node) {
146 | 			if (classNode == null) {
147 | 				classNode = node;
148 | 			}
149 | 			return super.visit(node);
150 | 		}
151 | 	}
152 | 
153 | 	public static final String METHOD_CALL = "%MethodCall%";
154 | 
155 | 	private static final Logger LOGGER = Logger
156 | 			.getLogger(MethodScopeExtractor.class.getName());
157 | 
158 | 	public static Multimap<Scope, String> getScopeSnippets(final ASTNode node,
159 | 			final boolean methodAsRoots) {
160 | 		final ScopeFinder scopeFinder = new ScopeFinder(methodAsRoots);
161 | 		node.accept(scopeFinder);
162 | 
163 | 		final Multimap<Scope, String> scopes = TreeMultimap.create();
164 | 		for (final Entry<ASTNode, Method> method : scopeFinder.methods
165 | 				.entries()) {
166 | 			scopes.put(new Scope(method.getKey().toString(),
167 | 					method.getValue().type, METHOD_CALL, 0, 0), method
168 | 					.getValue().name);
169 | 		}
170 | 
171 | 		return scopes;
172 | 
173 | 	}
174 | 
175 | 	public static Multimap<Scope, String> getScopeSnippets(final File file,
176 | 			final boolean methodAsRoots) throws IOException {
177 | 		final JavaASTExtractor ex = new JavaASTExtractor(false);
178 | 		return getScopeSnippets(ex.getAST(file), methodAsRoots);
179 | 	}
180 | 
181 | 	public static Multimap<Scope, String> getScopeSnippets(final String code,
182 | 			final boolean methodAsRoots, final ParseType parseType) {
183 | 		final JavaASTExtractor ex = new JavaASTExtractor(false);
184 | 		return getScopeSnippets(ex.getAST(code, parseType), methodAsRoots);
185 | 	}
186 | }
187 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/scopes/ScopedIdentifierRenaming.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.java.codeutils.scopes;
  5 | 
  6 | import java.util.Collections;
  7 | import java.util.List;
  8 | import java.util.Map;
  9 | 
 10 | import codemining.java.codeutils.JavaASTExtractor;
 11 | import codemining.java.tokenizers.JavaTokenizer;
 12 | import codemining.languagetools.IScopeExtractor;
 13 | import codemining.languagetools.ITokenizer;
 14 | import codemining.languagetools.ParseType;
 15 | import codemining.languagetools.Scope;
 16 | 
 17 | import com.google.common.collect.Maps;
 18 | import com.google.common.collect.Multimap;
 19 | 
 20 | /**
 21 |  * Rename an identifer given a scope.
 22 |  * 
 23 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 24 |  * 
 25 |  */
 26 | public class ScopedIdentifierRenaming {
 27 | 
 28 | 	final IScopeExtractor scopeExtractor;
 29 | 
 30 | 	final ITokenizer tokenizer = new JavaTokenizer();
 31 | 
 32 | 	final ParseType parseKindToUseOnOriginal;
 33 | 
 34 | 	public ScopedIdentifierRenaming(final IScopeExtractor scopeExtractor,
 35 | 			final ParseType parseType) {
 36 | 		this.scopeExtractor = scopeExtractor;
 37 | 		parseKindToUseOnOriginal = parseType;
 38 | 	}
 39 | 
 40 | 	public String getFormattedRenamedCode(final String originalScopeCode,
 41 | 			final String from, final String to, final String wholeFile) {
 42 | 		final String code = getRenamedCode(originalScopeCode, from, to,
 43 | 				wholeFile);
 44 | 		final JavaASTExtractor ex = new JavaASTExtractor(false);
 45 | 		return ex.getASTNode(code, parseKindToUseOnOriginal).toString();
 46 | 	}
 47 | 
 48 | 	/**
 49 | 	 * @param originalScopeCode
 50 | 	 * @param from
 51 | 	 * @param to
 52 | 	 * @param wholeFile
 53 | 	 * @return
 54 | 	 */
 55 | 	public String getRenamedCode(final String originalScopeCode,
 56 | 			final String from, final String to, final String wholeFile) {
 57 | 		final Map<String, String> varMapping = Maps.newTreeMap();
 58 | 		varMapping.put(from, to);
 59 | 		return getRenamedCode(originalScopeCode, wholeFile, varMapping);
 60 | 	}
 61 | 
 62 | 	/**
 63 | 	 * @param originalScopeCode
 64 | 	 * @param wholeFile
 65 | 	 * @param varMapping
 66 | 	 * @return
 67 | 	 */
 68 | 	public String getRenamedCode(final String originalScopeCode,
 69 | 			final String wholeFile, final Map<String, String> varMapping) {
 70 | 		final JavaASTExtractor ex = new JavaASTExtractor(false);
 71 | 		final String originalCode = renameVariableInSnippet(
 72 | 				ex.getASTNode(wholeFile, parseKindToUseOnOriginal).toString(),
 73 | 				Collections.EMPTY_MAP);
 74 | 		final String snippetToBeReplaced = renameVariableInSnippet(
 75 | 				originalScopeCode, varMapping);
 76 | 
 77 | 		final String code = originalCode.replace(
 78 | 				renameVariableInSnippet(originalScopeCode,
 79 | 						Collections.EMPTY_MAP), snippetToBeReplaced);
 80 | 		return code;
 81 | 	}
 82 | 
 83 | 	public Multimap<Scope, String> getRenamedScopes(final Scope originalScope,
 84 | 			final String from, final String to, final String wholeFile) {
 85 | 		return getRenamedScopes(originalScope.code, from, to, wholeFile);
 86 | 	}
 87 | 
 88 | 	public Multimap<Scope, String> getRenamedScopes(
 89 | 			final String originalScopeCode, final String from, final String to,
 90 | 			final String wholeFile) {
 91 | 		final String code = getRenamedCode(originalScopeCode, from, to,
 92 | 				wholeFile);
 93 | 		return scopeExtractor.getFromString(code, parseKindToUseOnOriginal);
 94 | 	}
 95 | 
 96 | 	/**
 97 | 	 * Crudely rename the name of an identifier by searching for similarly named
 98 | 	 * tokens.
 99 | 	 * 
100 | 	 * @param snippet
101 | 	 * @param variableMapping
102 | 	 *            from, to
103 | 	 * @return
104 | 	 */
105 | 	private String renameVariableInSnippet(final String snippet,
106 | 			final Map<String, String> variableMapping) {
107 | 		final List<String> tokens = tokenizer.tokenListFromCode(snippet
108 | 				.toCharArray());
109 | 
110 | 		final StringBuffer bf = new StringBuffer();
111 | 		for (final String token : tokens) {
112 | 			if (variableMapping.containsKey(token)) {
113 | 				bf.append(variableMapping.get(token));
114 | 			} else if (token.equals(ITokenizer.SENTENCE_START)
115 | 					|| token.equals(ITokenizer.SENTENCE_END)) {
116 | 				continue;
117 | 			} else {
118 | 				bf.append(token);
119 | 			}
120 | 			bf.append(" ");
121 | 		}
122 | 		return bf.toString();
123 | 
124 | 	}
125 | 
126 | }
127 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/scopes/ScopesTUI.java:
--------------------------------------------------------------------------------
 1 | package codemining.java.codeutils.scopes;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | 
 6 | import codemining.java.codeutils.scopes.AllScopeExtractor.AllScopeSnippetExtractor;
 7 | import codemining.languagetools.IScopeExtractor;
 8 | 
 9 | public class ScopesTUI {
10 | 
11 | 	/**
12 | 	 * @param name
13 | 	 * @return
14 | 	 * @throws UnsupportedOperationException
15 | 	 */
16 | 	public static IScopeExtractor getScopeExtractorByName(final String name)
17 | 			throws UnsupportedOperationException {
18 | 		final IScopeExtractor scopeExtractor;
19 | 		if (name.equals("variable")) {
20 | 			scopeExtractor = new VariableScopeExtractor.VariableScopeSnippetExtractor();
21 | 		} else if (name.equals("method")) {
22 | 			scopeExtractor = new MethodScopeExtractor.MethodScopeSnippetExtractor(
23 | 					true);
24 | 		} else if (name.equals("type")) {
25 | 			scopeExtractor = new TypenameScopeExtractor.TypenameSnippetExtractor(
26 | 					true);
27 | 		} else if (name.equals("all")) {
28 | 			scopeExtractor = new AllScopeSnippetExtractor();
29 | 		} else {
30 | 			throw new UnsupportedOperationException(
31 | 					"Unknown type of identifier.");
32 | 		}
33 | 		return scopeExtractor;
34 | 	}
35 | 
36 | 	/**
37 | 	 * @param args
38 | 	 * @throws IOException
39 | 	 */
40 | 	public static void main(String[] args) throws IOException {
41 | 		if (args.length < 2) {
42 | 			System.err.println("Usage <file> all|variable|method|type");
43 | 			return;
44 | 		}
45 | 		final String name = args[1];
46 | 		final IScopeExtractor scopeExtractor = getScopeExtractorByName(name);
47 | 
48 | 		System.out.println(scopeExtractor.getFromFile(new File(args[0])));
49 | 
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/codeutils/scopes/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * Code that has to do with extracting data from scopes.
3 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
4 |  *
5 |  */
6 | package codemining.java.codeutils.scopes;


--------------------------------------------------------------------------------
/src/main/java/codemining/java/tokenizers/JavaIdentifierAnnotatedTokenizer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.java.tokenizers;
  5 | 
  6 | import java.util.List;
  7 | import java.util.Map;
  8 | import java.util.SortedMap;
  9 | import java.util.logging.Logger;
 10 | 
 11 | import org.apache.commons.lang.exception.ExceptionUtils;
 12 | import org.eclipse.jdt.core.compiler.ITerminalSymbols;
 13 | import org.eclipse.jdt.core.compiler.InvalidInputException;
 14 | import org.eclipse.jdt.internal.core.util.PublicScanner;
 15 | 
 16 | import codemining.java.codeutils.IdentifierPerType;
 17 | 
 18 | import com.google.common.collect.Lists;
 19 | import com.google.common.collect.Maps;
 20 | import com.google.common.collect.Range;
 21 | import com.google.common.collect.RangeSet;
 22 | 
 23 | /**
 24 |  * A Java tokenizer that annotates the type of the identifier tokens.
 25 |  * 
 26 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 27 |  * 
 28 |  */
 29 | public class JavaIdentifierAnnotatedTokenizer extends JavaTokenizer {
 30 | 
 31 | 	private static class IdentifierTypeRetriever {
 32 | 
 33 | 		final Map<String, RangeSet<Integer>> variables;
 34 | 		final Map<String, RangeSet<Integer>> methods;
 35 | 		final Map<String, RangeSet<Integer>> types;
 36 | 
 37 | 		public IdentifierTypeRetriever(final char[] code) throws Exception {
 38 | 			variables = IdentifierPerType.getVariableIdentifiersRanges(code);
 39 | 			methods = IdentifierPerType.getMethodIdentifiersRanges(code);
 40 | 			types = IdentifierPerType.getTypeIdentifiersRanges(code);
 41 | 		}
 42 | 
 43 | 		public String getIdentifierType(final PublicScanner scanner) {
 44 | 			final int startPos = scanner.getCurrentTokenStartPosition();
 45 | 			final int endPos = scanner.getCurrentTokenEndPosition();
 46 | 			final Range<Integer> tokenRange = Range
 47 | 					.closedOpen(startPos, endPos);
 48 | 
 49 | 			final String tokenName = scanner.getCurrentTokenString();
 50 | 
 51 | 			// TODO: Find the tightest of all
 52 | 			if (isInSet(tokenName, tokenRange, variables)) {
 53 | 				return IDENTIFIER_PREFIX + "_VAR";
 54 | 			} else if (isInSet(tokenName, tokenRange, methods)) {
 55 | 				return IDENTIFIER_PREFIX + "_METHOD";
 56 | 			} else if (isInSet(tokenName, tokenRange, types)) {
 57 | 				return IDENTIFIER_PREFIX + "_TYPE";
 58 | 			}
 59 | 			return IDENTIFIER_PREFIX + "_UNK";
 60 | 		}
 61 | 
 62 | 		private boolean isInSet(final String token,
 63 | 				final Range<Integer> tokenRange,
 64 | 				final Map<String, RangeSet<Integer>> set) {
 65 | 			if (!set.containsKey(token)) {
 66 | 				return false;
 67 | 			}
 68 | 			// TODO: Check if in scope
 69 | 			return true;
 70 | 		}
 71 | 	}
 72 | 
 73 | 	private static final long serialVersionUID = -4779695380807928575L;
 74 | 
 75 | 	private static final Logger LOGGER = Logger
 76 | 			.getLogger(JavaIdentifierAnnotatedTokenizer.class.getName());
 77 | 
 78 | 	public static final String IDENTIFIER_PREFIX = "IDENTIFIER";
 79 | 
 80 | 	public static final String LITERAL = "LITERAL";
 81 | 
 82 | 	public JavaIdentifierAnnotatedTokenizer() {
 83 | 		super();
 84 | 	}
 85 | 
 86 | 	public JavaIdentifierAnnotatedTokenizer(final boolean tokenizeComments) {
 87 | 		super(tokenizeComments);
 88 | 	}
 89 | 
 90 | 	@Override
 91 | 	public SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code) {
 92 | 		IdentifierTypeRetriever idRetriever;
 93 | 		try {
 94 | 			idRetriever = new IdentifierTypeRetriever(code);
 95 | 		} catch (final Exception e) {
 96 | 			throw new IllegalArgumentException(e);
 97 | 		}
 98 | 
 99 | 		final PublicScanner scanner = prepareScanner();
100 | 		final SortedMap<Integer, FullToken> tokens = Maps.newTreeMap();
101 | 		tokens.put(-1, new FullToken(SENTENCE_START, SENTENCE_START));
102 | 		tokens.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END, SENTENCE_END));
103 | 		scanner.setSource(code);
104 | 		while (!scanner.atEnd()) {
105 | 			do {
106 | 				try {
107 | 					final int token = scanner.getNextToken();
108 | 					if (token == ITerminalSymbols.TokenNameEOF) {
109 | 						break;
110 | 					}
111 | 					final String nxtToken = transformToken(token,
112 | 							scanner.getCurrentTokenString());
113 | 					final String tokenType = getTokenType(token, scanner,
114 | 							idRetriever);
115 | 
116 | 					final int position = scanner.getCurrentTokenStartPosition();
117 | 					tokens.put(position, new FullToken(nxtToken, tokenType));
118 | 				} catch (final InvalidInputException e) {
119 | 					LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
120 | 				}
121 | 			} while (!scanner.atEnd());
122 | 
123 | 		}
124 | 		return tokens;
125 | 	}
126 | 
127 | 	@Override
128 | 	public String getIdentifierType() {
129 | 		throw new UnsupportedOperationException(
130 | 				"There is no single indentifier type for this tokenizer.");
131 | 	}
132 | 
133 | 	@Override
134 | 	public FullToken getTokenFromString(final String token) {
135 | 		throw new UnsupportedOperationException(
136 | 				"Cannot compute token from just a string using this tokenizer.");
137 | 	}
138 | 
139 | 	@Override
140 | 	public List<FullToken> getTokenListFromCode(final char[] code) {
141 | 		IdentifierTypeRetriever idRetriever;
142 | 		try {
143 | 			idRetriever = new IdentifierTypeRetriever(code);
144 | 		} catch (final Exception e) {
145 | 			throw new IllegalArgumentException(e);
146 | 		}
147 | 
148 | 		final List<FullToken> tokens = Lists.newArrayList();
149 | 		tokens.add(new FullToken(SENTENCE_START, SENTENCE_START));
150 | 		final PublicScanner scanner = prepareScanner();
151 | 		scanner.setSource(code);
152 | 		do {
153 | 			try {
154 | 				final int token = scanner.getNextToken();
155 | 				if (token == ITerminalSymbols.TokenNameEOF) {
156 | 					break;
157 | 				}
158 | 				final String nxtToken = transformToken(token,
159 | 						scanner.getCurrentTokenString());
160 | 
161 | 				final String tokenType = getTokenType(token, scanner,
162 | 						idRetriever);
163 | 				tokens.add(new FullToken(stripTokenIfNeeded(nxtToken),
164 | 						tokenType));
165 | 			} catch (final InvalidInputException e) {
166 | 				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
167 | 			} catch (final StringIndexOutOfBoundsException e) {
168 | 				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
169 | 			}
170 | 		} while (!scanner.atEnd());
171 | 		tokens.add(new FullToken(SENTENCE_END, SENTENCE_END));
172 | 		return tokens;
173 | 	}
174 | 
175 | 	private final String getTokenType(final int tokenType,
176 | 			final PublicScanner scanner, final IdentifierTypeRetriever retriever) {
177 | 		if (tokenType == ITerminalSymbols.TokenNameIdentifier) {
178 | 			return retriever.getIdentifierType(scanner);
179 | 		} else if (JavaTokenTypeTokenizer.isLiteralToken(tokenType)) {
180 | 			return LITERAL;
181 | 		} else {
182 | 			return scanner.getCurrentTokenString();
183 | 		}
184 | 	}
185 | }
186 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/tokenizers/JavaTokenizerSomeTokens.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.tokenizers;
 5 | 
 6 | import java.util.ArrayList;
 7 | import java.util.List;
 8 | import java.util.Set;
 9 | import java.util.SortedMap;
10 | import java.util.TreeMap;
11 | import java.util.logging.Logger;
12 | 
13 | import org.apache.commons.lang.exception.ExceptionUtils;
14 | import org.eclipse.jdt.core.compiler.ITerminalSymbols;
15 | 
16 | import codemining.java.codeutils.IdentifierPerType;
17 | import codemining.util.SettingsLoader;
18 | 
19 | /**
20 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
21 |  * 
22 |  */
23 | public class JavaTokenizerSomeTokens extends JavaTokenizer {
24 | 	public static final String GENERIC_IDENTIFIER = "%IDENTIFIER%";
25 | 
26 | 	private static final long serialVersionUID = -8566029315110514304L;
27 | 
28 | 	private static final Logger LOGGER = Logger
29 | 			.getLogger(JavaTokenizerSomeTokens.class.getName());
30 | 
31 | 	private Set<String> methodIds;
32 | 	private Set<String> typeIds;
33 | 	private Set<String> varIds;
34 | 
35 | 	private final boolean REMOVE_METHOD_IDENTIFIERS = SettingsLoader
36 | 			.getBooleanSetting("removeMethodIdentifiers", false);
37 | 	private final boolean REMOVE_VAR_IDENTIFIERS = SettingsLoader
38 | 			.getBooleanSetting("removeVariableIdentifiers", false);
39 | 	private final boolean REMOVE_TYPE_IDENTIFIERS = SettingsLoader
40 | 			.getBooleanSetting("removeTypeIdentifiers", false);
41 | 
42 | 	private void generateValidTokList(final char[] code) throws Exception {
43 | 		methodIds = IdentifierPerType.getMethodIdentifiers(code);
44 | 		typeIds = IdentifierPerType.getTypeIdentifiers(code);
45 | 		varIds = IdentifierPerType.getVariableIdentifiers(code);
46 | 	}
47 | 
48 | 	@Override
49 | 	public List<String> tokenListFromCode(final char[] code) {
50 | 		try {
51 | 			generateValidTokList(code);
52 | 			return super.tokenListFromCode(code);
53 | 		} catch (final Exception e) {
54 | 			LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
55 | 		}
56 | 		return new ArrayList<String>();
57 | 	}
58 | 
59 | 	@Override
60 | 	public SortedMap<Integer, String> tokenListWithPos(final char[] code) {
61 | 		try {
62 | 			generateValidTokList(code);
63 | 			return super.tokenListWithPos(code);
64 | 		} catch (final Exception e) {
65 | 			LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
66 | 		}
67 | 		return new TreeMap<Integer, String>();
68 | 	}
69 | 
70 | 	@Override
71 | 	protected String transformToken(final int tokenType, final String token) {
72 | 		if (tokenType != ITerminalSymbols.TokenNameIdentifier) {
73 | 			return token;
74 | 		}
75 | 		if (methodIds.contains(token) && REMOVE_METHOD_IDENTIFIERS) {
76 | 			return GENERIC_IDENTIFIER;
77 | 		} else if (varIds.contains(token) && REMOVE_VAR_IDENTIFIERS) {
78 | 			return GENERIC_IDENTIFIER;
79 | 		} else if (typeIds.contains(token) && REMOVE_TYPE_IDENTIFIERS) {
80 | 			return GENERIC_IDENTIFIER;
81 | 		}
82 | 		return token;
83 | 	}
84 | }
85 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/java/tokenizers/JavaWidthAnnotatedWhitespaceTokenizer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.java.tokenizers;
  5 | 
  6 | import java.util.List;
  7 | import java.util.Map.Entry;
  8 | import java.util.SortedMap;
  9 | 
 10 | import codemining.util.SettingsLoader;
 11 | 
 12 | import com.google.common.collect.Lists;
 13 | import com.google.common.collect.Maps;
 14 | 
 15 | /**
 16 |  * A Java whitespace tokenizer that annotates its non-whitespace tokens with
 17 |  * width data.
 18 |  * 
 19 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 20 |  * 
 21 |  */
 22 | public class JavaWidthAnnotatedWhitespaceTokenizer extends
 23 | 		JavaWhitespaceTokenizer {
 24 | 
 25 | 	private static final long serialVersionUID = -3365546393414164809L;
 26 | 
 27 | 	/**
 28 | 	 * Constant for getting the size quantization of width, column of the
 29 | 	 * annotation.
 30 | 	 */
 31 | 	public static final int SIZE_QUANTIAZATION = (int) SettingsLoader
 32 | 			.getNumericSetting("sizeQuantization", 20);
 33 | 
 34 | 	/**
 35 | 	 * @param token
 36 | 	 * @return
 37 | 	 */
 38 | 	protected String annotatedTokenToString(final WhitespaceAnnotatedToken token) {
 39 | 		final int columnQ = token.column / SIZE_QUANTIAZATION;
 40 | 		final int widthQ = token.width / SIZE_QUANTIAZATION;
 41 | 		final String annotatedToken = token.token + "_" + columnQ + "_"
 42 | 				+ widthQ;
 43 | 		return annotatedToken;
 44 | 	}
 45 | 
 46 | 	@Override
 47 | 	public SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code) {
 48 | 		final TokenizerImplementation tok = new TokenizerImplementation();
 49 | 		final SortedMap<Integer, WhitespaceAnnotatedToken> annotatedTokens = tok
 50 | 				.tokenListWithPosAndWidth(code);
 51 | 		final SortedMap<Integer, FullToken> tokens = Maps.newTreeMap();
 52 | 
 53 | 		for (final Entry<Integer, WhitespaceAnnotatedToken> entry : annotatedTokens
 54 | 				.entrySet()) {
 55 | 			tokens.put(entry.getKey(), new FullToken(
 56 | 					annotatedTokenToString(entry.getValue()),
 57 | 					entry.getValue().tokenType));
 58 | 		}
 59 | 		return tokens;
 60 | 	}
 61 | 
 62 | 	@Override
 63 | 	public FullToken getTokenFromString(final String token) {
 64 | 		throw new UnsupportedOperationException();
 65 | 	}
 66 | 
 67 | 	@Override
 68 | 	public List<FullToken> getTokenListFromCode(final char[] code) {
 69 | 		final TokenizerImplementation tok = new TokenizerImplementation();
 70 | 		final List<WhitespaceAnnotatedToken> annotatedTokens = tok
 71 | 				.getTokensWithWidthData(code);
 72 | 		final List<FullToken> tokens = Lists.newArrayList();
 73 | 		for (final WhitespaceAnnotatedToken token : annotatedTokens) {
 74 | 			if (token.token.startsWith("WS_")) {
 75 | 				tokens.add(new FullToken(token.token, token.tokenType));
 76 | 			} else {
 77 | 				final String annotatedToken = annotatedTokenToString(token);
 78 | 				tokens.add(new FullToken(annotatedToken, token.tokenType));
 79 | 			}
 80 | 		}
 81 | 		return tokens;
 82 | 	}
 83 | 
 84 | 	@Override
 85 | 	public List<String> tokenListFromCode(final char[] code) {
 86 | 		final TokenizerImplementation tok = new TokenizerImplementation();
 87 | 		final List<WhitespaceAnnotatedToken> annotatedTokens = tok
 88 | 				.getTokensWithWidthData(code);
 89 | 		final List<String> tokens = Lists.newArrayList();
 90 | 		for (final WhitespaceAnnotatedToken token : annotatedTokens) {
 91 | 			if (token.token.startsWith("WS_")) {
 92 | 				tokens.add(token.token);
 93 | 			} else {
 94 | 				tokens.add(annotatedTokenToString(token));
 95 | 			}
 96 | 		}
 97 | 		return tokens;
 98 | 	}
 99 | 
100 | 	@Override
101 | 	public SortedMap<Integer, String> tokenListWithPos(final char[] code) {
102 | 		final TokenizerImplementation tok = new TokenizerImplementation();
103 | 		final SortedMap<Integer, WhitespaceAnnotatedToken> annotatedTokens = tok
104 | 				.tokenListWithPosAndWidth(code);
105 | 		final SortedMap<Integer, String> tokens = Maps.newTreeMap();
106 | 
107 | 		for (final Entry<Integer, WhitespaceAnnotatedToken> entry : annotatedTokens
108 | 				.entrySet()) {
109 | 			tokens.put(entry.getKey(), annotatedTokenToString(entry.getValue()));
110 | 		}
111 | 		return tokens;
112 | 	}
113 | 
114 | }
115 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/js/codedata/metrics/IJavascriptFileMetricRetriever.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.js.codedata.metrics;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | 
 9 | import org.eclipse.wst.jsdt.core.dom.ASTNode;
10 | 
11 | /**
12 |  * An interface for all the classes that can return a metric
13 |  * 
14 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
15 |  * 
16 |  */
17 | public interface IJavascriptFileMetricRetriever {
18 | 	double getMetricForASTNode(final ASTNode node);
19 | 
20 | 	double getMetricForFile(final File file) throws IOException;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/js/codedata/metrics/JavascriptCyclomaticCalculator.java:
--------------------------------------------------------------------------------
  1 | package codemining.js.codedata.metrics;
  2 | 
  3 | import java.io.File;
  4 | import java.io.IOException;
  5 | import java.util.logging.Logger;
  6 | 
  7 | import org.eclipse.wst.jsdt.core.dom.ASTNode;
  8 | import org.eclipse.wst.jsdt.core.dom.ASTVisitor;
  9 | import org.eclipse.wst.jsdt.core.dom.CatchClause;
 10 | import org.eclipse.wst.jsdt.core.dom.ConditionalExpression;
 11 | import org.eclipse.wst.jsdt.core.dom.DoStatement;
 12 | import org.eclipse.wst.jsdt.core.dom.EnhancedForStatement;
 13 | import org.eclipse.wst.jsdt.core.dom.ForStatement;
 14 | import org.eclipse.wst.jsdt.core.dom.FunctionDeclaration;
 15 | import org.eclipse.wst.jsdt.core.dom.IfStatement;
 16 | import org.eclipse.wst.jsdt.core.dom.SwitchCase;
 17 | import org.eclipse.wst.jsdt.core.dom.WhileStatement;
 18 | 
 19 | import codemining.js.codeutils.JavascriptASTExtractor;
 20 | 
 21 | /**
 22 |  * Compute McCabe's Cyclomatic Complexity.
 23 |  * 
 24 |  * @author Miltos Allamanis
 25 |  * 
 26 |  */
 27 | public class JavascriptCyclomaticCalculator implements
 28 | 		IJavascriptFileMetricRetriever {
 29 | 
 30 | 	/**
 31 | 	 * Visit all "junctions" in an AST and increment complexity.
 32 | 	 * 
 33 | 	 */
 34 | 	private static class JunctionVisitor extends ASTVisitor {
 35 | 		int complexity = 0;
 36 | 
 37 | 		@Override
 38 | 		public boolean visit(final CatchClause arg0) {
 39 | 			complexity++;
 40 | 			return super.visit(arg0);
 41 | 		}
 42 | 
 43 | 		@Override
 44 | 		public boolean visit(final ConditionalExpression arg0) {
 45 | 			complexity++;
 46 | 			return super.visit(arg0);
 47 | 		}
 48 | 
 49 | 		@Override
 50 | 		public boolean visit(final DoStatement arg0) {
 51 | 			complexity++;
 52 | 			return super.visit(arg0);
 53 | 		}
 54 | 
 55 | 		@Override
 56 | 		public boolean visit(final EnhancedForStatement arg0) {
 57 | 			complexity++;
 58 | 			return super.visit(arg0);
 59 | 		}
 60 | 
 61 | 		@Override
 62 | 		public boolean visit(final ForStatement arg0) {
 63 | 			complexity++;
 64 | 			return super.visit(arg0);
 65 | 		}
 66 | 
 67 | 		@Override
 68 | 		public boolean visit(final IfStatement arg0) {
 69 | 			complexity++;
 70 | 			return super.visit(arg0);
 71 | 		}
 72 | 
 73 | 		@Override
 74 | 		public boolean visit(final FunctionDeclaration arg0) {
 75 | 			/*
 76 | 			 * if (isConcrete(arg0)) { complexity.startMethod(); return
 77 | 			 * super.visit(arg0); } return false;
 78 | 			 */
 79 | 			complexity++; // TODO: Not exactly true, but we'll use that
 80 | 			return super.visit(arg0);
 81 | 		}
 82 | 
 83 | 		@Override
 84 | 		public boolean visit(final SwitchCase arg0) {
 85 | 			complexity++;
 86 | 			return super.visit(arg0);
 87 | 		}
 88 | 
 89 | 		@Override
 90 | 		public boolean visit(final WhileStatement arg0) {
 91 | 			complexity++;
 92 | 			return super.visit(arg0);
 93 | 		}
 94 | 	}
 95 | 
 96 | 	private static final Logger LOGGER = Logger
 97 | 			.getLogger(JavascriptCyclomaticCalculator.class.getName());
 98 | 
 99 | 	public int getComplexity(final File file) throws IOException {
100 | 		final JavascriptASTExtractor ast = new JavascriptASTExtractor(false);
101 | 		final JunctionVisitor visitor = new JunctionVisitor();
102 | 		ast.getAST(file).accept(visitor);
103 | 		return visitor.complexity;
104 | 	}
105 | 
106 | 	@Override
107 | 	public double getMetricForASTNode(final ASTNode node) {
108 | 		final JunctionVisitor visitor = new JunctionVisitor();
109 | 		node.accept(visitor);
110 | 		return visitor.complexity;
111 | 	}
112 | 
113 | 	@Override
114 | 	public double getMetricForFile(final File file) throws IOException {
115 | 		return getComplexity(file);
116 | 	}
117 | }
118 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/js/codedata/metrics/JavascriptFunctionCounter.java:
--------------------------------------------------------------------------------
 1 | package codemining.js.codedata.metrics;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.util.List;
 6 | 
 7 | import org.apache.commons.io.FileUtils;
 8 | import org.eclipse.wst.jsdt.core.dom.ASTNode;
 9 | import org.eclipse.wst.jsdt.core.dom.ASTVisitor;
10 | import org.eclipse.wst.jsdt.core.dom.JavaScriptUnit;
11 | 
12 | import codemining.js.codeutils.JavascriptASTExtractor;
13 | 
14 | public final class JavascriptFunctionCounter {
15 | 
16 | 	public static class MethodClassCountVisitor extends ASTVisitor {
17 | 
18 | 		public int noFunctions = 0;
19 | 
20 | 		@Override
21 | 		public void postVisit(final ASTNode node) {
22 | 
23 | 			if (node.getNodeType() == ASTNode.FUNCTION_DECLARATION)
24 | 				noFunctions++;
25 | 		}
26 | 
27 | 	}
28 | 
29 | 	public static void main(final String[] args) throws IOException {
30 | 		if (args.length != 1) {
31 | 			System.err.println("Usage <inputDirectory>");
32 | 			System.exit(-1);
33 | 		}
34 | 		final File directory = new File(args[0]);
35 | 		countFunctions(directory);
36 | 	}
37 | 
38 | 	public static void countFunctions(final File projectDir) throws IOException {
39 | 
40 | 		System.out.println("\n===== Project " + projectDir);
41 | 		final MethodClassCountVisitor mccv = new MethodClassCountVisitor();
42 | 		final JavascriptASTExtractor astExtractor = new JavascriptASTExtractor(
43 | 				false);
44 | 
45 | 		final List<File> files = (List<File>) FileUtils.listFiles(projectDir,
46 | 				new String[] { "js" }, true);
47 | 
48 | 		int count = 0;
49 | 		for (final File file : files) {
50 | 
51 | 			final JavaScriptUnit cu = astExtractor.getAST(file);
52 | 			cu.accept(mccv);
53 | 
54 | 			if (count % 1000 == 0)
55 | 				System.out.println("At file " + count + " of " + files.size());
56 | 			count++;
57 | 		}
58 | 
59 | 		System.out.println("Project " + projectDir);
60 | 		System.out.println("No. *.js files " + files.size());
61 | 		System.out.println("No. Functions: " + mccv.noFunctions);
62 | 	}
63 | 
64 | 	private JavascriptFunctionCounter() {
65 | 
66 | 	}
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/js/codeutils/FunctionRetriever.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.js.codeutils;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.Map;
 9 | 
10 | import org.eclipse.wst.jsdt.core.dom.ASTNode;
11 | import org.eclipse.wst.jsdt.core.dom.ASTVisitor;
12 | import org.eclipse.wst.jsdt.core.dom.FunctionDeclaration;
13 | import org.eclipse.wst.jsdt.core.dom.JavaScriptUnit;
14 | 
15 | import com.google.common.collect.Maps;
16 | 
17 | /**
18 |  * A utility class that retrieves the methods (as AST Nodes) of a file.
19 |  * 
20 |  * @author Miltos Allamanis
21 |  * 
22 |  */
23 | public final class FunctionRetriever extends ASTVisitor {
24 | 
25 | 	public static Map<String, FunctionDeclaration> getFunctionNodes(
26 | 			final File file) throws IOException {
27 | 		final JavascriptASTExtractor astExtractor = new JavascriptASTExtractor(
28 | 				false);
29 | 		final FunctionRetriever m = new FunctionRetriever();
30 | 		final JavaScriptUnit cu = astExtractor.getAST(file);
31 | 		cu.accept(m);
32 | 		return m.functions;
33 | 	}
34 | 
35 | 	public static Map<String, FunctionDeclaration> getFunctionNodes(
36 | 			final String file) throws Exception {
37 | 		final JavascriptASTExtractor astExtractor = new JavascriptASTExtractor(
38 | 				false);
39 | 		final FunctionRetriever m = new FunctionRetriever();
40 | 		final ASTNode cu = astExtractor.getCompilationUnitAstNode(file);
41 | 		cu.accept(m);
42 | 		return m.functions;
43 | 	}
44 | 
45 | 	private final Map<String, FunctionDeclaration> functions = Maps
46 | 			.newTreeMap();
47 | 
48 | 	private FunctionRetriever() {
49 | 
50 | 	}
51 | 
52 | 	@Override
53 | 	public boolean visit(final FunctionDeclaration node) {
54 | 		functions.put(node.getName().toString(), node);
55 | 		return super.visit(node);
56 | 	}
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/js/codeutils/binding/JavascriptExactVariableBindingsExtractor.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.js.codeutils.binding;
  5 | 
  6 | import static com.google.common.base.Preconditions.checkNotNull;
  7 | 
  8 | import java.util.Collections;
  9 | import java.util.List;
 10 | import java.util.Map;
 11 | import java.util.Map.Entry;
 12 | import java.util.Set;
 13 | 
 14 | import org.apache.commons.lang.NotImplementedException;
 15 | import org.eclipse.wst.jsdt.core.dom.*;
 16 | 
 17 | import codemining.js.codeutils.JavascriptASTExtractor;
 18 | import codemining.languagetools.bindings.TokenNameBinding;
 19 | 
 20 | import com.google.common.collect.Lists;
 21 | import com.google.common.collect.Maps;
 22 | import com.google.common.collect.Sets;
 23 | 
 24 | /**
 25 |  * Retrieve the variable bindings, given an ASTNode. This finds exact bindings
 26 |  * to the detriment of recall. Partial code snippets are not supported.
 27 |  *
 28 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 29 |  *
 30 |  */
 31 | public class JavascriptExactVariableBindingsExtractor extends
 32 | 		AbstractJavascriptNameBindingsExtractor {
 33 | 
 34 | 	/**
 35 | 	 * This class looks for declarations of variables and the references to
 36 | 	 * them.
 37 | 	 *
 38 | 	 */
 39 | 	private static class VariableBindingFinder extends ASTVisitor {
 40 | 		/**
 41 | 		 * Map of variables (represented as bindings) to all token positions
 42 | 		 * where the variable is referenced.
 43 | 		 */
 44 | 		Map<IVariableBinding, List<ASTNode>> variableScope = Maps
 45 | 				.newIdentityHashMap();
 46 | 
 47 | 		private void addBinding(final IVariableBinding binding) {
 48 | 			variableScope.put(binding, Lists.<ASTNode> newArrayList());
 49 | 		}
 50 | 
 51 | 		/**
 52 | 		 * @param binding
 53 | 		 */
 54 | 		private void addBindingData(final IVariableBinding binding,
 55 | 				final ASTNode nameNode) {
 56 | 			if (binding == null) {
 57 | 				return; // Sorry, cannot do anything.
 58 | 			}
 59 | 			final List<ASTNode> thisVarBindings = checkNotNull(
 60 | 					variableScope.get(binding),
 61 | 					"Binding was not previously found");
 62 | 			thisVarBindings.add(nameNode);
 63 | 		}
 64 | 
 65 | 		/**
 66 | 		 * Looks for field declarations (i.e. class member variables).
 67 | 		 */
 68 | 		@Override
 69 | 		public boolean visit(final FieldDeclaration node) {
 70 | 			for (final Object fragment : node.fragments()) {
 71 | 				final VariableDeclarationFragment frag = (VariableDeclarationFragment) fragment;
 72 | 				final IVariableBinding binding = frag.resolveBinding();
 73 | 				addBinding(binding);
 74 | 			}
 75 | 			return true;
 76 | 		}
 77 | 
 78 | 		/**
 79 | 		 * Visits {@link SimpleName} AST nodes. Resolves the binding of the
 80 | 		 * simple name and looks for it in the {@link #variableScope} map. If
 81 | 		 * the binding is found, this is a reference to a variable.
 82 | 		 *
 83 | 		 * @param node
 84 | 		 *            the node to visit
 85 | 		 */
 86 | 		@Override
 87 | 		public boolean visit(final SimpleName node) {
 88 | 			final IBinding binding = node.resolveBinding();
 89 | 			if (variableScope.containsKey(binding)) {
 90 | 				addBindingData((IVariableBinding) binding, node);
 91 | 			}
 92 | 			return true;
 93 | 		}
 94 | 
 95 | 		/**
 96 | 		 * Looks for Method Parameters.
 97 | 		 */
 98 | 		@Override
 99 | 		public boolean visit(final SingleVariableDeclaration node) {
100 | 			final IVariableBinding binding = node.resolveBinding();
101 | 			if (binding != null) {
102 | 				addBinding(binding);
103 | 			}
104 | 			return true;
105 | 		}
106 | 
107 | 		/**
108 | 		 * Looks for variables declared in for loops.
109 | 		 */
110 | 		@Override
111 | 		public boolean visit(final VariableDeclarationExpression node) {
112 | 			for (final Object fragment : node.fragments()) {
113 | 				final VariableDeclarationFragment frag = (VariableDeclarationFragment) fragment;
114 | 				final IVariableBinding binding = frag.resolveBinding();
115 | 				if (binding != null) {
116 | 					addBinding(binding);
117 | 				}
118 | 			}
119 | 			return true;
120 | 		}
121 | 
122 | 		/**
123 | 		 * Looks for local variable declarations. For every declaration of a
124 | 		 * variable, the parent {@link Block} denoting the variable's scope is
125 | 		 * stored in {@link #variableScope} map.
126 | 		 *
127 | 		 * @param node
128 | 		 *            the node to visit
129 | 		 */
130 | 		@Override
131 | 		public boolean visit(final VariableDeclarationStatement node) {
132 | 			for (final Object fragment : node.fragments()) {
133 | 				final VariableDeclarationFragment frag = (VariableDeclarationFragment) fragment;
134 | 				final IVariableBinding binding = frag.resolveBinding();
135 | 				if (binding != null) {
136 | 					addBinding(binding);
137 | 				}
138 | 			}
139 | 			return true;
140 | 		}
141 | 	}
142 | 
143 | 	@Override
144 | 	protected JavascriptASTExtractor createExtractor() {
145 | 		return new JavascriptASTExtractor(true);
146 | 	}
147 | 
148 | 	@Override
149 | 	public Set<?> getAvailableFeatures() {
150 | 		return Collections.emptySet();
151 | 	}
152 | 
153 | 	@Override
154 | 	public Set<Set<ASTNode>> getNameBindings(final ASTNode node) {
155 | 		final VariableBindingFinder bindingFinder = new VariableBindingFinder();
156 | 		node.accept(bindingFinder);
157 | 
158 | 		final Set<Set<ASTNode>> nameBindings = Sets.newHashSet();
159 | 		for (final Entry<IVariableBinding, List<ASTNode>> variableBindings : bindingFinder.variableScope
160 | 				.entrySet()) {
161 | 			final Set<ASTNode> boundNodes = Sets.newIdentityHashSet();
162 | 			boundNodes.addAll(variableBindings.getValue());
163 | 			nameBindings.add(boundNodes);
164 | 		}
165 | 		return nameBindings;
166 | 	}
167 | 
168 | 	@Override
169 | 	public List<TokenNameBinding> getNameBindings(final String code) {
170 | 		throw new UnsupportedOperationException(
171 | 				"Partial snippets cannot be resolved due to the "
172 | 						+ "lack of support from Eclipse JSDT. Consider using the approximate binding extractor.");
173 | 	}
174 | 
175 | 	@Override
176 | 	public void setActiveFeatures(final Set<?> activeFeatures) {
177 | 		throw new NotImplementedException();
178 | 	}
179 | }
180 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/js/codeutils/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * JavaScript code
3 |  */
4 | package codemining.js.codeutils;


--------------------------------------------------------------------------------
/src/main/java/codemining/langs/codeutils/AbstractJygmentsTokenizer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.langs.codeutils;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.List;
  9 | import java.util.SortedMap;
 10 | 
 11 | import org.apache.commons.io.FileUtils;
 12 | import org.apache.commons.io.filefilter.AbstractFileFilter;
 13 | import org.apache.commons.io.filefilter.RegexFileFilter;
 14 | 
 15 | import codemining.languagetools.ITokenizer;
 16 | 
 17 | import com.google.common.collect.Lists;
 18 | import com.google.common.collect.Maps;
 19 | import com.threecrickets.jygments.ResolutionException;
 20 | import com.threecrickets.jygments.grammar.Lexer;
 21 | import com.threecrickets.jygments.grammar.Token;
 22 | import com.threecrickets.jygments.grammar.TokenType;
 23 | 
 24 | /**
 25 |  * Tokenize all languages
 26 |  * 
 27 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 28 |  * 
 29 |  */
 30 | public abstract class AbstractJygmentsTokenizer implements ITokenizer {
 31 | 
 32 | 	final Lexer lexer;
 33 | 
 34 | 	private final RegexFileFilter codeFilter;
 35 | 
 36 | 	private static final long serialVersionUID = 8826779180772076954L;
 37 | 
 38 | 	public AbstractJygmentsTokenizer(final String fileSuffix)
 39 | 			throws ResolutionException {
 40 | 		lexer = Lexer.getForFileName("sample." + fileSuffix);
 41 | 		// lexer.setStripAll(true);
 42 | 		// lexer.setStripNewLines(true);
 43 | 		// lexer.setTabSize(1);
 44 | 		codeFilter = new RegexFileFilter(".*\\." + fileSuffix + "$");
 45 | 	}
 46 | 
 47 | 	/*
 48 | 	 * (non-Javadoc)
 49 | 	 * 
 50 | 	 * @see codemining.languagetools.ITokenizer#getFileFilter()
 51 | 	 */
 52 | 	@Override
 53 | 	public AbstractFileFilter getFileFilter() {
 54 | 		return codeFilter;
 55 | 	}
 56 | 
 57 | 	@Override
 58 | 	public List<FullToken> getTokenListFromCode(final File codeFile)
 59 | 			throws IOException {
 60 | 		return getTokenListFromCode(FileUtils.readFileToString(codeFile)
 61 | 				.toCharArray());
 62 | 	}
 63 | 
 64 | 	public abstract String getTokenString(final Token tok);
 65 | 
 66 | 	/**
 67 | 	 * @param tok
 68 | 	 * @return
 69 | 	 */
 70 | 	protected boolean isProgramToken(final Token tok) {
 71 | 		final TokenType tokenType = tok.getType();
 72 | 		return tokenType == TokenType.Comment
 73 | 				|| tokenType == TokenType.Comment_Multiline
 74 | 				|| tokenType == TokenType.Comment_Single
 75 | 				|| tokenType == TokenType.Comment_Special
 76 | 				|| tokenType == TokenType.Comment_Preproc
 77 | 				|| tokenType == TokenType.Text || tok.getValue().equals(" ")
 78 | 				|| tok.getValue().equals("\n") || tok.getValue().equals("\t");
 79 | 	}
 80 | 
 81 | 	/*
 82 | 	 * (non-Javadoc)
 83 | 	 * 
 84 | 	 * @see codemining.languagetools.ITokenizer#tokenListFromCode(char[])
 85 | 	 */
 86 | 	@Override
 87 | 	public List<String> tokenListFromCode(final char[] code) {
 88 | 		final Iterable<Token> tokens = lexer.getTokens(new String(code));
 89 | 		final List<String> toks = Lists.newArrayList();
 90 | 		toks.add(SENTENCE_START);
 91 | 		for (final Token tok : tokens) {
 92 | 			if (isProgramToken(tok)) {
 93 | 				continue;
 94 | 			}
 95 | 			toks.add(getTokenString(tok));
 96 | 		}
 97 | 		toks.add(SENTENCE_END);
 98 | 		return toks;
 99 | 	}
100 | 
101 | 	@Override
102 | 	public List<String> tokenListFromCode(final File codeFile)
103 | 			throws IOException {
104 | 		return tokenListFromCode(FileUtils.readFileToString(codeFile)
105 | 				.toCharArray());
106 | 	}
107 | 
108 | 	/*
109 | 	 * (non-Javadoc)
110 | 	 * 
111 | 	 * @see codemining.languagetools.ITokenizer#tokenListWithPos(char[])
112 | 	 */
113 | 	@Override
114 | 	public SortedMap<Integer, String> tokenListWithPos(final char[] code) {
115 | 		final Iterable<Token> tokens = lexer.getTokens(new String(code));
116 | 		final SortedMap<Integer, String> tokensWithPos = Maps.newTreeMap();
117 | 		tokensWithPos.put(-1, SENTENCE_START);
118 | 		tokensWithPos.put(Integer.MAX_VALUE, SENTENCE_END);
119 | 		for (final Token tok : tokens) {
120 | 			if (isProgramToken(tok)) {
121 | 				continue;
122 | 			}
123 | 			tokensWithPos.put(tok.getPos(), getTokenString(tok));
124 | 		}
125 | 		return tokensWithPos;
126 | 	}
127 | 
128 | }
129 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/langs/codeutils/CodeTokenizer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.langs.codeutils;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.Collection;
  9 | import java.util.List;
 10 | import java.util.SortedMap;
 11 | 
 12 | import org.apache.commons.io.FileUtils;
 13 | 
 14 | import com.google.common.collect.Lists;
 15 | import com.google.common.collect.Maps;
 16 | import com.threecrickets.jygments.ResolutionException;
 17 | import com.threecrickets.jygments.grammar.Token;
 18 | import com.threecrickets.jygments.grammar.TokenType;
 19 | 
 20 | /**
 21 |  * Tokenize the code using the real tokens.
 22 |  *
 23 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 24 |  *
 25 |  */
 26 | public class CodeTokenizer extends AbstractJygmentsTokenizer {
 27 | 
 28 | 	private static final long serialVersionUID = -981980819807626795L;
 29 | 
 30 | 	/**
 31 | 	 * @param fileSuffix
 32 | 	 * @throws ResolutionException
 33 | 	 */
 34 | 	public CodeTokenizer(final String fileSuffix) throws ResolutionException {
 35 | 		super(fileSuffix);
 36 | 	}
 37 | 
 38 | 	@Override
 39 | 	public SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code) {
 40 | 		final Iterable<Token> tokens = lexer.getTokens(new String(code));
 41 | 		final SortedMap<Integer, FullToken> tokensWithPos = Maps.newTreeMap();
 42 | 		tokensWithPos.put(-1, new FullToken(SENTENCE_START, SENTENCE_START));
 43 | 		tokensWithPos.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END,
 44 | 				SENTENCE_END));
 45 | 		for (final Token tok : tokens) {
 46 | 			if (isProgramToken(tok)) {
 47 | 				continue;
 48 | 			}
 49 | 			tokensWithPos.put(tok.getPos(), new FullToken(getTokenString(tok),
 50 | 					tok.getType().getName()));
 51 | 		}
 52 | 		return tokensWithPos;
 53 | 	}
 54 | 
 55 | 	@Override
 56 | 	public String getIdentifierType() {
 57 | 		return TokenType.Name.getName();
 58 | 	}
 59 | 
 60 | 	@Override
 61 | 	public Collection<String> getKeywordTypes() {
 62 | 		return Lists.newArrayList(TokenType.Keyword.getName());
 63 | 	}
 64 | 
 65 | 	@Override
 66 | 	public Collection<String> getLiteralTypes() {
 67 | 		return Lists.newArrayList(TokenType.Literal.getName());
 68 | 	}
 69 | 
 70 | 	@Override
 71 | 	public FullToken getTokenFromString(final String token) {
 72 | 		return getTokenListFromCode(token.toCharArray()).get(1);
 73 | 	}
 74 | 
 75 | 	@Override
 76 | 	public List<FullToken> getTokenListFromCode(final char[] code) {
 77 | 		final Iterable<Token> tokens = lexer.getTokens(new String(code));
 78 | 		final List<FullToken> toks = Lists.newArrayList();
 79 | 		toks.add(new FullToken(SENTENCE_START, SENTENCE_START));
 80 | 		for (final Token tok : tokens) {
 81 | 			if (isProgramToken(tok)) {
 82 | 				continue;
 83 | 			}
 84 | 			toks.add(new FullToken(getTokenString(tok), tok.getType().getName()));
 85 | 		}
 86 | 		toks.add(new FullToken(SENTENCE_END, SENTENCE_END));
 87 | 		return toks;
 88 | 	}
 89 | 
 90 | 	@Override
 91 | 	public List<FullToken> getTokenListFromCode(final File codeFile)
 92 | 			throws IOException {
 93 | 		return getTokenListFromCode(FileUtils.readFileToString(codeFile)
 94 | 				.toCharArray());
 95 | 	}
 96 | 
 97 | 	/*
 98 | 	 * (non-Javadoc)
 99 | 	 *
100 | 	 * @see codemining.langs.codeutils.AbstractCodeTokenizer#getTokenString(com.
101 | 	 * threecrickets.jygments.grammar.Token)
102 | 	 */
103 | 	@Override
104 | 	public String getTokenString(final Token tok) {
105 | 		return tok.getValue().trim();
106 | 	}
107 | 
108 | 	@Override
109 | 	public SortedMap<Integer, FullToken> tokenListWithPos(final File file)
110 | 			throws IOException {
111 | 		return fullTokenListWithPos(FileUtils.readFileToString(file)
112 | 				.toCharArray());
113 | 	}
114 | 
115 | }
116 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/langs/codeutils/TokenTypeTokenizer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *
  3 |  */
  4 | package codemining.langs.codeutils;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.Collection;
  9 | import java.util.List;
 10 | import java.util.SortedMap;
 11 | 
 12 | import org.apache.commons.io.FileUtils;
 13 | 
 14 | import com.google.common.collect.Lists;
 15 | import com.google.common.collect.Maps;
 16 | import com.threecrickets.jygments.ResolutionException;
 17 | import com.threecrickets.jygments.grammar.Token;
 18 | 
 19 | /**
 20 |  * Tokenize the code but return only the token types.
 21 |  *
 22 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 23 |  *
 24 |  */
 25 | public class TokenTypeTokenizer extends AbstractJygmentsTokenizer {
 26 | 
 27 | 	private static final long serialVersionUID = 5822480321022420348L;
 28 | 
 29 | 	/**
 30 | 	 * @param fileSuffix
 31 | 	 * @throws ResolutionException
 32 | 	 */
 33 | 	public TokenTypeTokenizer(final String fileSuffix)
 34 | 			throws ResolutionException {
 35 | 		super(fileSuffix);
 36 | 	}
 37 | 
 38 | 	@Override
 39 | 	public SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code) {
 40 | 		final Iterable<Token> tokens = lexer.getTokens(new String(code));
 41 | 		final SortedMap<Integer, FullToken> tokensWithPos = Maps.newTreeMap();
 42 | 		tokensWithPos.put(-1, new FullToken(SENTENCE_START, SENTENCE_START));
 43 | 		tokensWithPos.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END,
 44 | 				SENTENCE_END));
 45 | 		for (final Token tok : tokens) {
 46 | 			if (isProgramToken(tok)) {
 47 | 				continue;
 48 | 			}
 49 | 			tokensWithPos.put(tok.getPos(), new FullToken(getTokenString(tok),
 50 | 					""));
 51 | 		}
 52 | 		return tokensWithPos;
 53 | 	}
 54 | 
 55 | 	@Override
 56 | 	public String getIdentifierType() {
 57 | 		throw new IllegalArgumentException("Token types may not be computed");
 58 | 	}
 59 | 
 60 | 	@Override
 61 | 	public Collection<String> getKeywordTypes() {
 62 | 		throw new IllegalArgumentException("Token types may not be computed");
 63 | 	}
 64 | 
 65 | 	@Override
 66 | 	public Collection<String> getLiteralTypes() {
 67 | 		throw new IllegalArgumentException("Token types may not be computed");
 68 | 	}
 69 | 
 70 | 	@Override
 71 | 	public FullToken getTokenFromString(final String token) {
 72 | 		return new FullToken(token, "");
 73 | 	}
 74 | 
 75 | 	@Override
 76 | 	public List<FullToken> getTokenListFromCode(final char[] code) {
 77 | 		final Iterable<Token> tokens = lexer.getTokens(new String(code));
 78 | 		final List<FullToken> toks = Lists.newArrayList();
 79 | 		toks.add(new FullToken(SENTENCE_START, SENTENCE_START));
 80 | 		for (final Token tok : tokens) {
 81 | 			if (isProgramToken(tok)) {
 82 | 				continue;
 83 | 			}
 84 | 			toks.add(new FullToken(tok.getType().getName(), ""));
 85 | 		}
 86 | 		toks.add(new FullToken(SENTENCE_END, SENTENCE_END));
 87 | 		return toks;
 88 | 	}
 89 | 
 90 | 	/*
 91 | 	 * (non-Javadoc)
 92 | 	 *
 93 | 	 * @see codemining.langs.codeutils.AbstractCodeTokenizer#getTokenString(com.
 94 | 	 * threecrickets.jygments.grammar.Token)
 95 | 	 */
 96 | 	@Override
 97 | 	public String getTokenString(final Token tok) {
 98 | 		return tok.getType().getName();
 99 | 	}
100 | 
101 | 	@Override
102 | 	public SortedMap<Integer, FullToken> tokenListWithPos(final File file)
103 | 			throws IOException {
104 | 		return fullTokenListWithPos(FileUtils.readFileToString(file)
105 | 				.toCharArray());
106 | 	}
107 | 
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/langs/codeutils/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * A package containing code utilitis for all programming languages.
3 |  * 
4 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
5 |  *
6 |  */
7 | package codemining.langs.codeutils;


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/ClassHierarchy.java:
--------------------------------------------------------------------------------
  1 | package codemining.languagetools;
  2 | 
  3 | import static com.google.common.base.Preconditions.checkArgument;
  4 | 
  5 | import java.io.Serializable;
  6 | import java.util.Collection;
  7 | import java.util.Map;
  8 | import java.util.Set;
  9 | 
 10 | import com.google.common.base.Optional;
 11 | import com.google.common.collect.ImmutableList;
 12 | import com.google.common.collect.Maps;
 13 | import com.google.common.collect.Sets;
 14 | 
 15 | /**
 16 |  * A class hierarchy contains all the implemented types of a single type.
 17 |  *
 18 |  * The type hierarchy makes an effort to be as compact as possible, removing
 19 |  * redundant is-a relationships.
 20 |  *
 21 |  * Not thread safe.
 22 |  *
 23 |  */
 24 | public class ClassHierarchy implements Serializable {
 25 | 
 26 | 	public static final class Type implements Serializable {
 27 | 
 28 | 		public final String fullQualifiedName;
 29 | 
 30 | 		private final Set<Type> implementingTypes = Sets.newIdentityHashSet();
 31 | 
 32 | 		/**
 33 | 		 * A closure of the implementing types.
 34 | 		 */
 35 | 		private final Set<Type> implementingTypesClosure = Sets
 36 | 				.newIdentityHashSet();
 37 | 
 38 | 		private final Set<Type> childTypes = Sets.newIdentityHashSet();
 39 | 		/**
 40 | 		 * A closure of the child Types
 41 | 		 */
 42 | 		private final Set<Type> childTypesClosure = Sets.newIdentityHashSet();
 43 | 
 44 | 		private static final long serialVersionUID = -4245298170285828934L;
 45 | 
 46 | 		public Type(final String fqName) {
 47 | 			fullQualifiedName = fqName;
 48 | 		}
 49 | 
 50 | 		/**
 51 | 		 * Add a childType for the given type, only if it does not belong to its
 52 | 		 * transitive closure.
 53 | 		 *
 54 | 		 * @param childType
 55 | 		 */
 56 | 		private final void addChildType(final Type childType) {
 57 | 			checkArgument(!implementingTypesClosure.contains(childType));
 58 | 			if (childTypesClosure.contains(childType)) {
 59 | 				return;
 60 | 			}
 61 | 
 62 | 			// If the given type is already a child of a parent type,
 63 | 			// we need to remove it from its implementing type.
 64 | 			implementingTypesClosure.forEach(t -> t.childTypes
 65 | 					.remove(childType));
 66 | 
 67 | 			childTypes.add(childType);
 68 | 
 69 | 			// Update parents closures
 70 | 			implementingTypesClosure.forEach(t -> t.childTypesClosure
 71 | 					.add(childType));
 72 | 		}
 73 | 
 74 | 		/**
 75 | 		 * Add an implementing type of this type only if it does not belong to
 76 | 		 * its transitive closure.
 77 | 		 *
 78 | 		 * @param implementingType
 79 | 		 */
 80 | 		private final void addImplementingType(final Type implementingType) {
 81 | 			checkArgument(!childTypesClosure.contains(implementingType));
 82 | 			if (implementingTypesClosure.contains(implementingType)) {
 83 | 				return;
 84 | 			}
 85 | 
 86 | 			// If the type is already an implementing type of a child,
 87 | 			// we need to remove it from its child types
 88 | 			childTypesClosure.forEach(t -> t.implementingTypes
 89 | 					.remove(implementingType));
 90 | 
 91 | 			implementingTypes.add(implementingType);
 92 | 
 93 | 			// Update children closures
 94 | 			childTypesClosure.forEach(t -> t.implementingTypesClosure
 95 | 					.add(implementingType));
 96 | 		}
 97 | 
 98 | 		public Collection<Type> getImplementingTypesClosure() {
 99 | 			return new ImmutableList.Builder<Type>()
100 | 					.addAll(implementingTypesClosure).addAll(implementingTypes)
101 | 					.build();
102 | 		}
103 | 
104 | 		@Override
105 | 		public String toString() {
106 | 			final StringBuilder sb = new StringBuilder();
107 | 			sb.append(fullQualifiedName);
108 | 			if (!implementingTypes.isEmpty()) {
109 | 				sb.append("[ implements ");
110 | 				implementingTypes.forEach(t -> sb.append(t.fullQualifiedName
111 | 						+ " "));
112 | 			}
113 | 			if (!childTypes.isEmpty()) {
114 | 				sb.append(" isimplementedby ");
115 | 				childTypes.forEach(t -> sb.append(t.fullQualifiedName + " "));
116 | 			}
117 | 			sb.append("]");
118 | 			return sb.toString();
119 | 		}
120 | 
121 | 	}
122 | 
123 | 	private static final long serialVersionUID = 8866244164953568827L;
124 | 
125 | 	private final Map<String, Type> nameToType = Maps.newTreeMap();
126 | 
127 | 	/**
128 | 	 * Add a type relationship.
129 | 	 *
130 | 	 * @param type
131 | 	 * @param parentTypeFqn
132 | 	 */
133 | 	public void addParentToType(final String type, final String parentTypeFqn) {
134 | 		final Type childType = getTypeOrNew(type);
135 | 		final Type parentType = getTypeOrNew(parentTypeFqn);
136 | 		childType.addImplementingType(parentType);
137 | 		parentType.addChildType(childType);
138 | 	}
139 | 
140 | 	public Optional<Type> getTypeForName(final String fqName) {
141 | 		if (nameToType.containsKey(fqName)) {
142 | 			return Optional.of(nameToType.get(fqName));
143 | 		}
144 | 		return Optional.absent();
145 | 	}
146 | 
147 | 	/**
148 | 	 * Get a type that already exists or a create a new type.
149 | 	 *
150 | 	 * @param fqName
151 | 	 */
152 | 	private Type getTypeOrNew(final String fqName) {
153 | 		final Type type;
154 | 		if (nameToType.containsKey(fqName)) {
155 | 			type = nameToType.get(fqName);
156 | 		} else {
157 | 			type = new Type(fqName);
158 | 			nameToType.put(fqName, type);
159 | 		}
160 | 		return type;
161 | 	}
162 | 
163 | 	@Override
164 | 	public String toString() {
165 | 		final StringBuilder sb = new StringBuilder();
166 | 		for (final Type type : nameToType.values()) {
167 | 			sb.append(type.toString());
168 | 			sb.append(System.lineSeparator());
169 | 		}
170 | 		return sb.toString();
171 | 	}
172 | 
173 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/CodePrinter.java:
--------------------------------------------------------------------------------
  1 | package codemining.languagetools;
  2 | 
  3 | import java.awt.Color;
  4 | import java.io.File;
  5 | import java.io.IOException;
  6 | import java.util.List;
  7 | import java.util.Map.Entry;
  8 | import java.util.SortedMap;
  9 | 
 10 | import org.apache.commons.io.FileUtils;
 11 | import org.apache.commons.lang.StringEscapeUtils;
 12 | 
 13 | import codemining.languagetools.ITokenizer.FullToken;
 14 | import codemining.util.SettingsLoader;
 15 | 
 16 | /**
 17 |  * Output Java code to HTML with optional coloring. Not thread-safe.
 18 |  * 
 19 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 20 |  * 
 21 |  */
 22 | public class CodePrinter {
 23 | 
 24 | 	/**
 25 | 	 * The tokenizer used to tokenize code.
 26 | 	 */
 27 | 	final ITokenizer tokenizer;
 28 | 
 29 | 	/**
 30 | 	 * The background Color of the output HTML document.
 31 | 	 */
 32 | 	final Color documentBackgroundColor;
 33 | 
 34 | 	int lineNumber = 1;
 35 | 
 36 | 	private final boolean ignoreTokBG = SettingsLoader.getBooleanSetting(
 37 | 			"ignoreTokenBackground", true);
 38 | 
 39 | 	public static final String CSS_STYLE = "<style>\n.line {font-family:monospace; "
 40 | 			+ "font: 14px/1.3 \"Source Code Pro\", \"Fira Mono OT\", monospace;white-space:pre;}\n"
 41 | 			+ ".line:hover {font-family:monospace; "
 42 | 			+ "font: 14px/1.3 \"Source Code Pro\", \"Fira Mono OT\", monospace;white-space:pre; background-color:rgb(240,240,240);}\n"
 43 | 			+ "</style>";
 44 | 
 45 | 	public CodePrinter(final ITokenizer tokenizer,
 46 | 			final Color documentBackgroundColor) {
 47 | 		this.tokenizer = tokenizer;
 48 | 		this.documentBackgroundColor = documentBackgroundColor;
 49 | 	}
 50 | 
 51 | 	private void addSlack(final String substring, final StringBuffer buf) {
 52 | 		for (final char c : StringEscapeUtils.escapeHtml(substring)
 53 | 				.toCharArray()) {
 54 | 			if (c == '\n') {
 55 | 				appendLineDiv(buf, true);
 56 | 			} else {
 57 | 				buf.append(c);
 58 | 			}
 59 | 		}
 60 | 
 61 | 	}
 62 | 
 63 | 	private void appendLineDiv(final StringBuffer buf,
 64 | 			final boolean closePrevious) {
 65 | 		if (closePrevious) {
 66 | 			buf.append("<br/></div>\n");
 67 | 		}
 68 | 		buf.append("<div class='line' id='C" + lineNumber + "'>");
 69 | 		lineNumber++;
 70 | 	}
 71 | 
 72 | 	/**
 73 | 	 * Return a StringBuffer with colored tokens as specified from the
 74 | 	 * coloredTokens. There should be one-to-one correspondence with the actual
 75 | 	 * tokens.
 76 | 	 */
 77 | 	public StringBuffer getHTMLwithColors(
 78 | 			final List<ColoredToken> coloredTokens, final File codeFile)
 79 | 			throws IOException, InstantiationException, IllegalAccessException {
 80 | 		final String code = FileUtils.readFileToString(codeFile);
 81 | 		lineNumber = 1;
 82 | 
 83 | 		final StringBuffer buf = new StringBuffer();
 84 | 
 85 | 		final SortedMap<Integer, FullToken> toks = tokenizer
 86 | 				.fullTokenListWithPos(code.toCharArray());
 87 | 
 88 | 		int i = 0;
 89 | 		int prevPos = 0;
 90 | 		buf.append("<html>\n<head>\n<link href='http://fonts.googleapis.com/css?family=Source+Code+Pro:300,400,500,600,700,800,900' rel='stylesheet' type='text/css'>\n");
 91 | 		buf.append(CSS_STYLE);
 92 | 		buf.append("</head>\n<body style='background-color:rgb("
 93 | 				+ documentBackgroundColor.getRed() + ","
 94 | 				+ documentBackgroundColor.getGreen() + ","
 95 | 				+ documentBackgroundColor.getBlue() + ")'>");
 96 | 		appendLineDiv(buf, false);
 97 | 		for (final Entry<Integer, FullToken> entry : toks.entrySet()) {
 98 | 			if (i == 0 || entry.getKey() == Integer.MAX_VALUE) {
 99 | 				i++;
100 | 				continue;
101 | 			}
102 | 			addSlack(code.substring(prevPos, entry.getKey()), buf);
103 | 			final ColoredToken tok = coloredTokens.get(i);
104 | 
105 | 			buf.append("<span style='background-color:rgba("
106 | 					+ tok.bgColor.getRed() + "," + tok.bgColor.getGreen() + ","
107 | 					+ tok.bgColor.getBlue() + "," + (ignoreTokBG ? "0" : "1")
108 | 					+ "); color:rgb(" + tok.fontColor.getRed() + ","
109 | 					+ tok.fontColor.getGreen() + "," + tok.fontColor.getBlue()
110 | 					+ "); " + tok.extraStyle + "'>"
111 | 					+ StringEscapeUtils.escapeHtml(entry.getValue().token)
112 | 					+ "</span>");
113 | 			i++;
114 | 			prevPos = entry.getKey() + entry.getValue().token.length();
115 | 		}
116 | 		buf.append("</div></body></html>");
117 | 		return buf;
118 | 
119 | 	}
120 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/ColoredToken.java:
--------------------------------------------------------------------------------
 1 | package codemining.languagetools;
 2 | 
 3 | import java.awt.Color;
 4 | 
 5 | /**
 6 |  * Struct class representing a colored token.
 7 |  * 
 8 |  */
 9 | public final class ColoredToken {
10 | 	public Color fontColor;
11 | 	public final Color bgColor;
12 | 	public final String token;
13 | 	public String extraStyle;
14 | 
15 | 	/**
16 | 	 * Construct with default bgColor white.
17 | 	 * 
18 | 	 * @param token
19 | 	 * @param fontColor
20 | 	 */
21 | 	public ColoredToken(final String token, final Color fontColor) {
22 | 		this.token = token;
23 | 		this.fontColor = fontColor;
24 | 		bgColor = Color.WHITE;
25 | 		extraStyle = "";
26 | 	}
27 | 
28 | 	public ColoredToken(final String token, final Color fontColor,
29 | 			final Color bgColor, final String extraStyle) {
30 | 		this.token = token;
31 | 		this.fontColor = fontColor;
32 | 		this.bgColor = bgColor;
33 | 		this.extraStyle = extraStyle;
34 | 	}
35 | 
36 | 	public void setColor(final Color fontColor) {
37 | 		this.fontColor = fontColor;
38 | 	}
39 | 
40 | 	public void setStyle(final String extraStyle) {
41 | 		this.extraStyle = extraStyle;
42 | 	}
43 | 
44 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/FormattingTokenizer.java:
--------------------------------------------------------------------------------
  1 | package codemining.languagetools;
  2 | 
  3 | import java.io.File;
  4 | import java.io.IOException;
  5 | import java.util.Collection;
  6 | import java.util.List;
  7 | import java.util.Map.Entry;
  8 | import java.util.SortedMap;
  9 | 
 10 | import org.apache.commons.io.FileUtils;
 11 | import org.apache.commons.io.filefilter.AbstractFileFilter;
 12 | 
 13 | import com.google.common.collect.Lists;
 14 | import com.google.common.collect.Maps;
 15 | 
 16 | /**
 17 |  * Adds a NO_SPACE between tokens that contain, no space.
 18 |  *
 19 |  */
 20 | public class FormattingTokenizer implements IFormattingTokenizer {
 21 | 
 22 | 	public static final String WS_NO_SPACE = "WS_NO_SPACE";
 23 | 	private static final long serialVersionUID = -1736507313790110846L;
 24 | 	final ITokenizer baseTokenizer;
 25 | 
 26 | 	public FormattingTokenizer(final ITokenizer baseTokenizer) {
 27 | 		this.baseTokenizer = baseTokenizer;
 28 | 	}
 29 | 
 30 | 	@Override
 31 | 	public SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code) {
 32 | 		throw new IllegalArgumentException("Cannot be implemented");
 33 | 	}
 34 | 
 35 | 	public ITokenizer getBaseTokenizer() {
 36 | 		return baseTokenizer;
 37 | 	}
 38 | 
 39 | 	@Override
 40 | 	public AbstractFileFilter getFileFilter() {
 41 | 		return baseTokenizer.getFileFilter();
 42 | 	}
 43 | 
 44 | 	@Override
 45 | 	public String getIdentifierType() {
 46 | 		return baseTokenizer.getIdentifierType();
 47 | 	}
 48 | 
 49 | 	@Override
 50 | 	public Collection<String> getKeywordTypes() {
 51 | 		return baseTokenizer.getKeywordTypes();
 52 | 	}
 53 | 
 54 | 	@Override
 55 | 	public Collection<String> getLiteralTypes() {
 56 | 		return baseTokenizer.getLiteralTypes();
 57 | 	}
 58 | 
 59 | 	@Override
 60 | 	public FullToken getTokenFromString(final String token) {
 61 | 		return baseTokenizer.getTokenFromString(token);
 62 | 	}
 63 | 
 64 | 	@Override
 65 | 	public List<FullToken> getTokenListFromCode(final char[] code) {
 66 | 		final List<FullToken> list = Lists.newArrayList();
 67 | 		final List<FullToken> original = baseTokenizer
 68 | 				.getTokenListFromCode(code);
 69 | 		for (int i = 0; i < original.size() - 1; i++) {
 70 | 			final FullToken currentToken = original.get(i);
 71 | 			list.add(currentToken);
 72 | 			final FullToken nextToken = original.get(i + 1);
 73 | 			if (!currentToken.token.startsWith("WS_")
 74 | 					&& !nextToken.token.startsWith("WS_")) {
 75 | 				list.add(new FullToken(WS_NO_SPACE, ""));
 76 | 			}
 77 | 		}
 78 | 		list.add(original.get(original.size() - 1));
 79 | 		return list;
 80 | 	}
 81 | 
 82 | 	@Override
 83 | 	public List<FullToken> getTokenListFromCode(final File codeFile)
 84 | 			throws IOException {
 85 | 		return getTokenListFromCode(FileUtils.readFileToString(codeFile)
 86 | 				.toCharArray());
 87 | 	}
 88 | 
 89 | 	@Override
 90 | 	public List<String> tokenListFromCode(final char[] code) {
 91 | 		// TODO Duplicate
 92 | 		final List<String> list = Lists.newArrayList();
 93 | 		final List<String> original = baseTokenizer.tokenListFromCode(code);
 94 | 		for (int i = 0; i < original.size() - 1; i++) {
 95 | 			final String currentToken = original.get(i);
 96 | 			list.add(currentToken);
 97 | 			final String nextToken = original.get(i + 1);
 98 | 			if (!currentToken.startsWith("WS_") && !nextToken.startsWith("WS_")) {
 99 | 				list.add(WS_NO_SPACE);
100 | 			}
101 | 		}
102 | 		list.add(original.get(original.size() - 1));
103 | 		return list;
104 | 	}
105 | 
106 | 	@Override
107 | 	public List<String> tokenListFromCode(final File codeFile)
108 | 			throws IOException {
109 | 		return tokenListFromCode(FileUtils.readFileToString(codeFile)
110 | 				.toCharArray());
111 | 	}
112 | 
113 | 	@Override
114 | 	public SortedMap<Integer, String> tokenListWithPos(final char[] code) {
115 | 		throw new IllegalArgumentException("Cannot be implemented");
116 | 	}
117 | 
118 | 	@Override
119 | 	public SortedMap<Integer, FullToken> tokenListWithPos(final File file)
120 | 			throws IOException {
121 | 		return fullTokenListWithPos(FileUtils.readFileToString(file)
122 | 				.toCharArray());
123 | 	}
124 | 
125 | 	/**
126 | 	 * Return the position of just the whitespaces in the code.
127 | 	 *
128 | 	 * @param code
129 | 	 * @return
130 | 	 */
131 | 	public SortedMap<Integer, String> whitespaceTokenPositions(final char[] code) {
132 | 		final SortedMap<Integer, String> wsPositions = Maps.newTreeMap();
133 | 		final SortedMap<Integer, String> originalPositions = baseTokenizer
134 | 				.tokenListWithPos(code);
135 | 
136 | 		boolean previousWasWhitespace = true;
137 | 		for (final Entry<Integer, String> tokenEntry : originalPositions
138 | 				.entrySet()) {
139 | 			if (tokenEntry.getValue().startsWith(ITokenizer.SENTENCE_START)
140 | 					|| tokenEntry.getValue()
141 | 					.startsWith(ITokenizer.SENTENCE_END)) {
142 | 				continue;
143 | 			}
144 | 			if (tokenEntry.getValue().startsWith("WS_")) {
145 | 				wsPositions.put(tokenEntry.getKey(), tokenEntry.getValue());
146 | 				previousWasWhitespace = true;
147 | 			} else if (!previousWasWhitespace) {
148 | 				wsPositions.put(tokenEntry.getKey(), WS_NO_SPACE);
149 | 				previousWasWhitespace = false;
150 | 			} else {
151 | 				previousWasWhitespace = false;
152 | 			}
153 | 		}
154 | 
155 | 		return wsPositions;
156 | 	}
157 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/IAstAnnotatedTokenizer.java:
--------------------------------------------------------------------------------
  1 | package codemining.languagetools;
  2 | 
  3 | import java.io.File;
  4 | import java.io.IOException;
  5 | import java.io.Serializable;
  6 | import java.util.List;
  7 | import java.util.SortedMap;
  8 | 
  9 | import com.google.common.base.Function;
 10 | import com.google.common.base.Objects;
 11 | 
 12 | /**
 13 |  * A tokenizer interface that returns AST annotated tokens.
 14 |  * 
 15 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 16 |  * 
 17 |  */
 18 | public interface IAstAnnotatedTokenizer extends ITokenizer {
 19 | 
 20 | 	/**
 21 | 	 * A struct class for representing AST annotated tokens.
 22 | 	 */
 23 | 	public static class AstAnnotatedToken implements Serializable {
 24 | 
 25 | 		private static final long serialVersionUID = -8505721476537620929L;
 26 | 
 27 | 		public static final Function<AstAnnotatedToken, FullToken> TOKEN_FLATTEN_FUNCTION = new Function<AstAnnotatedToken, FullToken>() {
 28 | 			@Override
 29 | 			public FullToken apply(final AstAnnotatedToken input) {
 30 | 				if (input.tokenAstNode != null
 31 | 						&& input.parentTokenAstNode != null) {
 32 | 					return new FullToken(input.token.token + "->in{"
 33 | 							+ input.tokenAstNode + "->"
 34 | 							+ input.parentTokenAstNode + "}",
 35 | 							input.token.tokenType);
 36 | 				} else {
 37 | 					return new FullToken(input.token);
 38 | 				}
 39 | 			}
 40 | 		};
 41 | 
 42 | 		public final FullToken token;
 43 | 		public final String tokenAstNode;
 44 | 		public final String parentTokenAstNode;
 45 | 
 46 | 		public AstAnnotatedToken(final FullToken token,
 47 | 				final String tokenAstNode, final String parentTokenAstNode) {
 48 | 			this.token = token;
 49 | 			this.tokenAstNode = tokenAstNode;
 50 | 			this.parentTokenAstNode = parentTokenAstNode;
 51 | 		}
 52 | 
 53 | 		@Override
 54 | 		public boolean equals(final Object obj) {
 55 | 			if (this == obj) {
 56 | 				return true;
 57 | 			}
 58 | 			if (obj == null) {
 59 | 				return false;
 60 | 			}
 61 | 			if (getClass() != obj.getClass()) {
 62 | 				return false;
 63 | 			}
 64 | 			final AstAnnotatedToken other = (AstAnnotatedToken) obj;
 65 | 			return Objects.equal(other.token, token)
 66 | 					&& Objects.equal(other.tokenAstNode, tokenAstNode)
 67 | 					&& Objects.equal(other.parentTokenAstNode,
 68 | 							parentTokenAstNode);
 69 | 		}
 70 | 
 71 | 		@Override
 72 | 		public int hashCode() {
 73 | 			return Objects.hashCode(token, tokenAstNode, parentTokenAstNode);
 74 | 		}
 75 | 
 76 | 		@Override
 77 | 		public String toString() {
 78 | 			return TOKEN_FLATTEN_FUNCTION.apply(this).toString();
 79 | 		}
 80 | 
 81 | 	}
 82 | 
 83 | 	public abstract List<AstAnnotatedToken> getAnnotatedTokenListFromCode(
 84 | 			char[] code);
 85 | 
 86 | 	public abstract List<AstAnnotatedToken> getAnnotatedTokenListFromCode(
 87 | 			File codeFile) throws IOException;
 88 | 
 89 | 	/**
 90 | 	 * @param code
 91 | 	 * @return
 92 | 	 */
 93 | 	public abstract SortedMap<Integer, AstAnnotatedToken> getAnnotatedTokens(
 94 | 			char[] code);
 95 | 
 96 | 	/**
 97 | 	 * Return the base tokenizer whose tokens are annotated.
 98 | 	 * 
 99 | 	 * @return
100 | 	 */
101 | 	public ITokenizer getBaseTokenizer();
102 | 
103 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/IFormattingTokenizer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.languagetools;
 5 | 
 6 | /**
 7 |  * A dummy tokenizer interface, suggesting that all implementors produce
 8 |  * whitespace tokens.
 9 |  * 
10 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
11 |  * 
12 |  */
13 | public interface IFormattingTokenizer extends ITokenizer {
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/IScopeExtractor.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.languagetools;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | 
 9 | import org.eclipse.jdt.core.dom.ASTNode;
10 | 
11 | import com.google.common.collect.Multimap;
12 | 
13 | /**
14 |  * A interface for extracting scoped related information. Scope extractors
15 |  * return a multimap that for each scope contains all the identifiers that are
16 |  * declared there.
17 |  * 
18 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
19 |  * 
20 |  */
21 | public interface IScopeExtractor {
22 | 	Multimap<Scope, String> getFromFile(final File file) throws IOException;
23 | 
24 | 	Multimap<Scope, String> getFromNode(final ASTNode node);
25 | 
26 | 	Multimap<Scope, String> getFromString(final String code,
27 | 			final ParseType parseType);
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/ITokenizer.java:
--------------------------------------------------------------------------------
  1 | package codemining.languagetools;
  2 | 
  3 | import java.io.File;
  4 | import java.io.IOException;
  5 | import java.io.Serializable;
  6 | import java.util.Collection;
  7 | import java.util.List;
  8 | import java.util.SortedMap;
  9 | 
 10 | import org.apache.commons.io.filefilter.AbstractFileFilter;
 11 | 
 12 | import com.google.common.base.Function;
 13 | import com.google.common.base.Objects;
 14 | 
 15 | /**
 16 |  * Interface of a code tokenizer.
 17 |  *
 18 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 19 |  *
 20 |  */
 21 | public interface ITokenizer extends Serializable {
 22 | 
 23 | 	public static class FullToken implements Serializable {
 24 | 
 25 | 		private static final long serialVersionUID = -49456240173307314L;
 26 | 
 27 | 		public static final Function<FullToken, String> TOKEN_NAME_CONVERTER = new Function<FullToken, String>() {
 28 | 			@Override
 29 | 			public String apply(final FullToken input) {
 30 | 				return input.token;
 31 | 			}
 32 | 		};
 33 | 
 34 | 		public final String token;
 35 | 
 36 | 		public final String tokenType;
 37 | 
 38 | 		public FullToken(final FullToken other) {
 39 | 			token = other.token;
 40 | 			tokenType = other.tokenType;
 41 | 		}
 42 | 
 43 | 		public FullToken(final String tokName, final String tokType) {
 44 | 			token = tokName;
 45 | 			tokenType = tokType;
 46 | 		}
 47 | 
 48 | 		@Override
 49 | 		public boolean equals(final Object obj) {
 50 | 			if (!(obj instanceof FullToken)) {
 51 | 				return false;
 52 | 			}
 53 | 			final FullToken other = (FullToken) obj;
 54 | 			return other.token.equals(token)
 55 | 					&& other.tokenType.equals(tokenType);
 56 | 		}
 57 | 
 58 | 		@Override
 59 | 		public int hashCode() {
 60 | 			return Objects.hashCode(token, tokenType);
 61 | 		}
 62 | 
 63 | 		@Override
 64 | 		public String toString() {
 65 | 			return token + " (" + tokenType + ")";
 66 | 		}
 67 | 
 68 | 	}
 69 | 
 70 | 	/**
 71 | 	 * A sentence end (constant) token
 72 | 	 */
 73 | 	static final String SENTENCE_END = "<SENTENCE_END/>";
 74 | 
 75 | 	/**
 76 | 	 * A sentence start (constant) token
 77 | 	 */
 78 | 	static final String SENTENCE_START = "<SENTENCE_START>";
 79 | 
 80 | 	/**
 81 | 	 * Return a list with the full tokens.
 82 | 	 *
 83 | 	 * @param code
 84 | 	 * @return
 85 | 	 */
 86 | 	SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code);
 87 | 
 88 | 	/**
 89 | 	 * Return a file filter, filtering the files that can be tokenized.
 90 | 	 *
 91 | 	 * @return
 92 | 	 *
 93 | 	 */
 94 | 	AbstractFileFilter getFileFilter();
 95 | 
 96 | 	/**
 97 | 	 * Return the token type that signifies that a token is an identifier.
 98 | 	 *
 99 | 	 * @return
100 | 	 */
101 | 	String getIdentifierType();
102 | 
103 | 	/**
104 | 	 * Return the token types that are keywords.
105 | 	 * 
106 | 	 * @return
107 | 	 */
108 | 	Collection<String> getKeywordTypes();
109 | 
110 | 	/**
111 | 	 * Return the types the represent literals.
112 | 	 *
113 | 	 * @return
114 | 	 */
115 | 	Collection<String> getLiteralTypes();
116 | 
117 | 	/**
118 | 	 * Return a full token given a string token.
119 | 	 *
120 | 	 * @param token
121 | 	 * @return
122 | 	 */
123 | 	FullToken getTokenFromString(final String token);
124 | 
125 | 	/**
126 | 	 * Get the list of tokens from the code.
127 | 	 *
128 | 	 * @param code
129 | 	 * @return
130 | 	 */
131 | 	List<FullToken> getTokenListFromCode(final char[] code);
132 | 
133 | 	/**
134 | 	 * Get the list of tokens from the code.
135 | 	 *
136 | 	 * @param code
137 | 	 * @return
138 | 	 */
139 | 	List<FullToken> getTokenListFromCode(final File codeFile)
140 | 			throws IOException;
141 | 
142 | 	/**
143 | 	 * Tokenize some code.
144 | 	 *
145 | 	 * @param code
146 | 	 *            the code
147 | 	 * @return a list of tokens
148 | 	 */
149 | 	List<String> tokenListFromCode(final char[] code);
150 | 
151 | 	/**
152 | 	 * Tokenize code given a file.
153 | 	 *
154 | 	 * @param codeFile
155 | 	 * @return
156 | 	 */
157 | 	List<String> tokenListFromCode(final File codeFile) throws IOException;
158 | 
159 | 	/**
160 | 	 * Return a list of tokens along with their positions.
161 | 	 *
162 | 	 * @param code
163 | 	 * @return
164 | 	 */
165 | 	SortedMap<Integer, String> tokenListWithPos(final char[] code);
166 | 
167 | 	/**
168 | 	 * Return a list of tokens along with their positions.
169 | 	 *
170 | 	 * @param file
171 | 	 * @return
172 | 	 * @throws IOException
173 | 	 */
174 | 	SortedMap<Integer, FullToken> tokenListWithPos(File file)
175 | 			throws IOException;
176 | 
177 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/ParseType.java:
--------------------------------------------------------------------------------
1 | package codemining.languagetools;
2 | 
3 | public enum ParseType {
4 | 	COMPILATION_UNIT, CLASS_BODY, METHOD, STATEMENTS, EXPRESSION
5 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/Scope.java:
--------------------------------------------------------------------------------
 1 | package codemining.languagetools;
 2 | 
 3 | import com.google.common.base.Objects;
 4 | import com.google.common.collect.ComparisonChain;
 5 | 
 6 | /**
 7 |  * A utility class to represent scopes.
 8 |  * 
 9 |  */
10 | public class Scope implements Comparable<Scope> {
11 | 
12 | 	public enum ScopeType {
13 | 		SCOPE_CLASS, SCOPE_LOCAL, SCOPE_METHOD
14 | 	}
15 | 
16 | 	public final String code;
17 | 
18 | 	public final ScopeType scopeType;
19 | 
20 | 	public final String type;
21 | 
22 | 	public final int astNodeType;
23 | 	public final int astParentNodeType;
24 | 
25 | 	public Scope(final String code, final ScopeType scopeType,
26 | 			final String type, final int astNodeType,
27 | 			final int astParentNodeType) {
28 | 		this.code = code;
29 | 		this.scopeType = scopeType;
30 | 		this.type = type;
31 | 		this.astNodeType = astNodeType;
32 | 		this.astParentNodeType = astParentNodeType;
33 | 	}
34 | 
35 | 	@Override
36 | 	public int compareTo(final Scope other) {
37 | 		return ComparisonChain.start().compare(code, other.code)
38 | 				.compare(scopeType, other.scopeType).compare(type, other.type)
39 | 				.compare(astNodeType, other.astNodeType)
40 | 				.compare(astParentNodeType, other.astParentNodeType).result();
41 | 	}
42 | 
43 | 	@Override
44 | 	public boolean equals(final Object obj) {
45 | 		if (!(obj instanceof Scope)) {
46 | 			return false;
47 | 		}
48 | 		final Scope other = (Scope) obj;
49 | 		return other.code.equals(code) && other.scopeType == scopeType
50 | 				&& other.astNodeType == astNodeType
51 | 				&& other.astParentNodeType == astParentNodeType
52 | 				&& other.type.equals(type);
53 | 	}
54 | 
55 | 	@Override
56 | 	public int hashCode() {
57 | 		return Objects.hashCode(code, scopeType, type, astNodeType,
58 | 				astParentNodeType);
59 | 	}
60 | 
61 | 	@Override
62 | 	public String toString() {
63 | 		return scopeType + " " + code;
64 | 	}
65 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/TokenizerUtils.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  */
  4 | package codemining.languagetools;
  5 | 
  6 | import static com.google.common.base.Preconditions.checkArgument;
  7 | import static com.google.common.base.Preconditions.checkPositionIndex;
  8 | 
  9 | import java.lang.reflect.InvocationTargetException;
 10 | import java.util.List;
 11 | 
 12 | import org.apache.commons.lang.StringUtils;
 13 | 
 14 | import codemining.languagetools.ITokenizer.FullToken;
 15 | import codemining.util.SettingsLoader;
 16 | 
 17 | /**
 18 |  * Utility function relevant to tokenization.
 19 |  * 
 20 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 21 |  * 
 22 |  */
 23 | public class TokenizerUtils {
 24 | 
 25 | 	public static final int TAB_INDENT_SIZE = (int) SettingsLoader
 26 | 			.getNumericSetting("tabSize", 4);
 27 | 
 28 | 	/**
 29 | 	 * Return the column of the given position.
 30 | 	 * 
 31 | 	 * @param code
 32 | 	 * @param position
 33 | 	 * @return
 34 | 	 */
 35 | 	public static int getColumnOfPosition(final String code, final int position) {
 36 | 		checkPositionIndex(position, code.length());
 37 | 		int newLinePosition = code.substring(0, position).lastIndexOf("\n");
 38 | 		if (newLinePosition == -1) {
 39 | 			newLinePosition = 0; // Start of file.
 40 | 		}
 41 | 		final int tabCount = StringUtils.countMatches(
 42 | 				code.substring(newLinePosition, position), "\t");
 43 | 		return position - newLinePosition + (TAB_INDENT_SIZE - 1) * tabCount;
 44 | 	}
 45 | 
 46 | 	/**
 47 | 	 * Crudely join tokens together.
 48 | 	 * 
 49 | 	 * @param tokens
 50 | 	 * @param sb
 51 | 	 * @return
 52 | 	 */
 53 | 	public final static StringBuffer joinFullTokens(
 54 | 			final List<FullToken> tokens, final StringBuffer sb) {
 55 | 		for (final FullToken token : tokens) {
 56 | 			sb.append(token.token);
 57 | 			sb.append(" ");
 58 | 		}
 59 | 
 60 | 		return sb;
 61 | 	}
 62 | 
 63 | 	/**
 64 | 	 * Crudely join tokens together.
 65 | 	 * 
 66 | 	 * @param tokens
 67 | 	 * @param sb
 68 | 	 * @return
 69 | 	 */
 70 | 	public final static StringBuffer joinTokens(final List<String> tokens) {
 71 | 		final StringBuffer sb = new StringBuffer();
 72 | 		for (final String token : tokens) {
 73 | 			sb.append(token);
 74 | 			sb.append(" ");
 75 | 		}
 76 | 
 77 | 		return sb;
 78 | 	}
 79 | 
 80 | 	/**
 81 | 	 * Crudely join tokens together.
 82 | 	 * 
 83 | 	 * @param tokens
 84 | 	 * @param sb
 85 | 	 * @return
 86 | 	 */
 87 | 	public final static StringBuffer joinTokens(final List<String> tokens,
 88 | 			final StringBuffer sb) {
 89 | 		for (final String token : tokens) {
 90 | 			sb.append(token);
 91 | 			sb.append(" ");
 92 | 		}
 93 | 
 94 | 		return sb;
 95 | 	}
 96 | 
 97 | 	/**
 98 | 	 * Remove the sentence start/end FullTokens.
 99 | 	 * 
100 | 	 * @param tokenSequence
101 | 	 */
102 | 	public static final void removeSentenceStartEndFullTokens(
103 | 			final List<FullToken> tokenSequence) {
104 | 		checkArgument(tokenSequence.get(0).token
105 | 				.equals(ITokenizer.SENTENCE_START));
106 | 		tokenSequence.remove(0);
107 | 		checkArgument(tokenSequence.get(tokenSequence.size() - 1).token
108 | 				.equals(ITokenizer.SENTENCE_END));
109 | 		tokenSequence.remove(tokenSequence.size() - 1);
110 | 	}
111 | 
112 | 	/**
113 | 	 * Remove the sentence start/end tokens.
114 | 	 * 
115 | 	 * @param tokenSequence
116 | 	 */
117 | 	public static final void removeSentenceStartEndTokens(
118 | 			final List<String> tokenSequence) {
119 | 		checkArgument(tokenSequence.get(0).equals(ITokenizer.SENTENCE_START));
120 | 		tokenSequence.remove(0);
121 | 		checkArgument(tokenSequence.get(tokenSequence.size() - 1).equals(
122 | 				ITokenizer.SENTENCE_END));
123 | 		tokenSequence.remove(tokenSequence.size() - 1);
124 | 	}
125 | 
126 | 	private TokenizerUtils() {
127 | 		// Utilty class
128 | 	}
129 | 
130 | 	/**
131 | 	 * @param tokenizerClass
132 | 	 * @param tokenizerArguments
133 | 	 * @return
134 | 	 * @throws InstantiationException
135 | 	 * @throws IllegalAccessException
136 | 	 * @throws IllegalArgumentException
137 | 	 * @throws InvocationTargetException
138 | 	 * @throws NoSuchMethodException
139 | 	 * @throws SecurityException
140 | 	 * @throws ClassNotFoundException
141 | 	 */
142 | 	public static ITokenizer tokenizerForClass(final String tokenizerClass,
143 | 			final String tokenizerArguments) throws InstantiationException,
144 | 			IllegalAccessException, IllegalArgumentException,
145 | 			InvocationTargetException, NoSuchMethodException,
146 | 			SecurityException, ClassNotFoundException {
147 | 		return (ITokenizer) Class.forName(tokenizerClass)
148 | 				.getDeclaredConstructor(String.class)
149 | 				.newInstance(tokenizerArguments);
150 | 	}
151 | 
152 | 	public static ITokenizer tokenizerForClass(final String tokenizerClass,
153 | 			final Boolean tokenizerArguments) throws InstantiationException,
154 | 			IllegalAccessException, IllegalArgumentException,
155 | 			InvocationTargetException, NoSuchMethodException,
156 | 			SecurityException, ClassNotFoundException {
157 | 		return (ITokenizer) Class.forName(tokenizerClass)
158 | 				.getDeclaredConstructor(Boolean.TYPE)
159 | 				.newInstance(tokenizerArguments);
160 | 	}
161 | 
162 | 	/**
163 | 	 * @param tokenizerClass
164 | 	 * @return
165 | 	 * @throws InstantiationException
166 | 	 * @throws IllegalAccessException
167 | 	 * @throws ClassNotFoundException
168 | 	 */
169 | 	public static ITokenizer tokenizerForClass(final String tokenizerClass)
170 | 			throws InstantiationException, IllegalAccessException,
171 | 			ClassNotFoundException {
172 | 		return (ITokenizer) Class.forName(tokenizerClass).newInstance();
173 | 	}
174 | 
175 | }
176 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/bindings/AbstractNameBindingsExtractor.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  */
 4 | package codemining.languagetools.bindings;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.List;
 9 | import java.util.Set;
10 | 
11 | import com.google.common.collect.HashMultimap;
12 | import com.google.common.collect.Multimap;
13 | 
14 | /**
15 |  * A NameBindings extractor from arbitrary code.
16 |  *
17 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
18 |  *
19 |  */
20 | public abstract class AbstractNameBindingsExtractor {
21 | 
22 | 	public abstract Set<?> getAvailableFeatures();
23 | 
24 | 	/**
25 | 	 * Return all the name bindings for file f
26 | 	 *
27 | 	 * @param f
28 | 	 * @return a multimap containing for each name all the relavant name
29 | 	 *         bindings in the file.
30 | 	 * @throws IOException
31 | 	 */
32 | 	public Multimap<String, TokenNameBinding> getBindingsForName(final File f)
33 | 			throws IOException {
34 | 		return getBindingsForName(getNameBindings(f));
35 | 	}
36 | 
37 | 	protected Multimap<String, TokenNameBinding> getBindingsForName(
38 | 			final List<TokenNameBinding> bindings) {
39 | 		final Multimap<String, TokenNameBinding> toks = HashMultimap.create();
40 | 		for (final TokenNameBinding binding : bindings) {
41 | 			toks.put(binding.getName(), binding);
42 | 		}
43 | 		return toks;
44 | 	}
45 | 
46 | 	/**
47 | 	 * Return the name bindings given the code.
48 | 	 *
49 | 	 * @param code
50 | 	 * @return a multimap containing for each name all the relavant name
51 | 	 *         bindings in the code snippet.
52 | 	 */
53 | 	public Multimap<String, TokenNameBinding> getBindingsForName(
54 | 			final String code) {
55 | 		return getBindingsForName(getNameBindings(code));
56 | 	}
57 | 
58 | 	/**
59 | 	 * Get the name bindings for the given file.
60 | 	 *
61 | 	 * @param f
62 | 	 * @return
63 | 	 * @throws IOException
64 | 	 */
65 | 	public abstract List<TokenNameBinding> getNameBindings(final File f)
66 | 			throws IOException;
67 | 
68 | 	/**
69 | 	 * Get the name bindings given the code.
70 | 	 *
71 | 	 * @param code
72 | 	 * @return
73 | 	 */
74 | 	public abstract List<TokenNameBinding> getNameBindings(final String code);
75 | 
76 | 	/**
77 | 	 * Return a ResolvedSourceCode instance for the given code.
78 | 	 *
79 | 	 * @param f
80 | 	 * @return
81 | 	 * @throws IOException
82 | 	 */
83 | 	public abstract ResolvedSourceCode getResolvedSourceCode(final File f)
84 | 			throws IOException;
85 | 
86 | 	/**
87 | 	 * Return a ResolvedSourceCode instance for the given code.
88 | 	 *
89 | 	 * @param code
90 | 	 * @return
91 | 	 */
92 | 	public abstract ResolvedSourceCode getResolvedSourceCode(final String code);
93 | 
94 | 	public abstract void setActiveFeatures(Set<?> activeFeatures);
95 | }
96 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/bindings/ResolvedSourceCode.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  */
 4 | package codemining.languagetools.bindings;
 5 | 
 6 | import static com.google.common.base.Preconditions.checkArgument;
 7 | 
 8 | import java.util.Collection;
 9 | import java.util.List;
10 | 
11 | import com.google.common.collect.ArrayListMultimap;
12 | 
13 | /**
14 |  * A full piece of source code that has the variable bindings resolved. The
15 |  * variable bindings are "attached" to the source code, so any changes in the
16 |  * token stream, will be reflected to the bindings.
17 |  *
18 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
19 |  *
20 |  */
21 | public class ResolvedSourceCode {
22 | 
23 | 	public final String name;
24 | 
25 | 	public final List<String> codeTokens;
26 | 
27 | 	private final ArrayListMultimap<String, TokenNameBinding> variableBindings;
28 | 
29 | 	/**
30 | 	 * Assumes that the variable bindings use the same (as in ==) token list.
31 | 	 *
32 | 	 * @param name
33 | 	 * @param codeTokens
34 | 	 * @param variableBindings
35 | 	 */
36 | 	public ResolvedSourceCode(final List<String> codeTokens,
37 | 			final ArrayListMultimap<String, TokenNameBinding> variableBindings) {
38 | 		this.name = "UnkownSourceCodeName";
39 | 		this.codeTokens = codeTokens;
40 | 		this.variableBindings = variableBindings;
41 | 	}
42 | 
43 | 	/**
44 | 	 * Assumes that the variable bindings use the same (as in ==) token list.
45 | 	 *
46 | 	 * @param name
47 | 	 * @param codeTokens
48 | 	 * @param variableBindings
49 | 	 */
50 | 	public ResolvedSourceCode(final String name, final List<String> codeTokens,
51 | 			final ArrayListMultimap<String, TokenNameBinding> variableBindings) {
52 | 		this.name = name;
53 | 		this.codeTokens = codeTokens;
54 | 		this.variableBindings = variableBindings;
55 | 	}
56 | 
57 | 	/**
58 | 	 * Return all the bindings in source code.
59 | 	 *
60 | 	 * @return
61 | 	 */
62 | 	public Collection<TokenNameBinding> getAllBindings() {
63 | 		return variableBindings.values();
64 | 	}
65 | 
66 | 	/**
67 | 	 * Return the bindings for a single name.
68 | 	 *
69 | 	 * @param name
70 | 	 * @return
71 | 	 */
72 | 	public Collection<TokenNameBinding> getBindingsForName(final String name) {
73 | 		return variableBindings.get(name);
74 | 	}
75 | 
76 | 	/**
77 | 	 * Rename a single bound set of tokens.
78 | 	 *
79 | 	 * @param binding
80 | 	 * @param name
81 | 	 */
82 | 	public void renameVariableTo(final TokenNameBinding binding,
83 | 			final String name) {
84 | 		checkArgument(variableBindings.values().contains(binding),
85 | 				"Binding is not pointing to this source code");
86 | 
87 | 		for (final int position : binding.nameIndexes) {
88 | 			codeTokens.set(position, name);
89 | 		}
90 | 	}
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/bindings/TokenNameBinding.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  */
 4 | package codemining.languagetools.bindings;
 5 | 
 6 | import static com.google.common.base.Preconditions.checkArgument;
 7 | 
 8 | import java.io.Serializable;
 9 | import java.util.Collections;
10 | import java.util.List;
11 | import java.util.Set;
12 | 
13 | import com.google.common.base.Objects;
14 | import com.google.common.collect.Lists;
15 | 
16 | /**
17 |  * A single name binding in source code. A struct-like class.
18 |  *
19 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
20 |  *
21 |  */
22 | public class TokenNameBinding implements Serializable {
23 | 	private static final long serialVersionUID = 2020613810485746430L;
24 | 
25 | 	/**
26 | 	 * The tokens of source code.
27 | 	 */
28 | 	public final List<String> sourceCodeTokens;
29 | 
30 | 	/**
31 | 	 * The positions in sourceCodeTokens that contain the given name.
32 | 	 */
33 | 	public final Set<Integer> nameIndexes;
34 | 
35 | 	/**
36 | 	 * Features of the binding
37 | 	 */
38 | 	public final Set<String> features;
39 | 
40 | 	public TokenNameBinding(final Set<Integer> nameIndexes,
41 | 			final List<String> sourceCodeTokens, final Set<String> features) {
42 | 		checkArgument(nameIndexes.size() > 0);
43 | 		checkArgument(sourceCodeTokens.size() > 0);
44 | 		this.nameIndexes = Collections.unmodifiableSet(nameIndexes);
45 | 		this.sourceCodeTokens = Collections.unmodifiableList(sourceCodeTokens);
46 | 		this.features = features;
47 | 	}
48 | 
49 | 	@Override
50 | 	public boolean equals(final Object obj) {
51 | 		if (this == obj) {
52 | 			return true;
53 | 		}
54 | 		if (obj == null) {
55 | 			return false;
56 | 		}
57 | 		if (getClass() != obj.getClass()) {
58 | 			return false;
59 | 		}
60 | 		final TokenNameBinding other = (TokenNameBinding) obj;
61 | 		return Objects.equal(nameIndexes, other.nameIndexes)
62 | 				&& Objects.equal(features, other.features)
63 | 				&& Objects.equal(sourceCodeTokens, other.sourceCodeTokens);
64 | 	}
65 | 
66 | 	public String getName() {
67 | 		return sourceCodeTokens.get(nameIndexes.iterator().next());
68 | 	}
69 | 
70 | 	@Override
71 | 	public int hashCode() {
72 | 		return Objects.hashCode(sourceCodeTokens, nameIndexes, features);
73 | 	}
74 | 
75 | 	/**
76 | 	 * Rename this name to the given binding. The source code tokens included in
77 | 	 * this struct, now represent the new structure.
78 | 	 *
79 | 	 * @param name
80 | 	 * @return
81 | 	 */
82 | 	public TokenNameBinding renameTo(final String name) {
83 | 		final List<String> renamedCode = Lists.newArrayList(sourceCodeTokens);
84 | 		for (final int position : nameIndexes) {
85 | 			renamedCode.set(position, name);
86 | 		}
87 | 		return new TokenNameBinding(nameIndexes, renamedCode, features);
88 | 	}
89 | 
90 | 	@Override
91 | 	public String toString() {
92 | 		return getName() + nameIndexes + " " + features;
93 | 	}
94 | }
95 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * Interfaces and utilities for language tools.
3 |  */
4 | package codemining.languagetools;
5 | 
6 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/tokenizers/whitespace/WhitespaceToTokenConverter.java:
--------------------------------------------------------------------------------
 1 | package codemining.languagetools.tokenizers.whitespace;
 2 | 
 3 | 
 4 | /**
 5 |  * A stateful whitespace to whitespace token converter.
 6 |  * 
 7 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 8 |  * 
 9 |  */
10 | public class WhitespaceToTokenConverter {
11 | 	private int currentIdentationSpaces = 0;
12 | 	private int currentIdentationTabs = 0;
13 | 
14 | 	/**
15 | 	 * Convert the given symbol to whitespace token.
16 | 	 * 
17 | 	 * @param token
18 | 	 * @return
19 | 	 */
20 | 	public String toWhiteSpaceSymbol(final String token) {
21 | 		final String symbol;
22 | 		int spaces = 0;
23 | 		int tabs = 0;
24 | 		int newLines = 0;
25 | 		for (final char c : token.replace("\r", "").toCharArray()) {
26 | 			if (c == '\n') {
27 | 				newLines++;
28 | 			} else if (c == '\t') {
29 | 				tabs++;
30 | 			} else if (c == ' ') {
31 | 				spaces++;
32 | 			}
33 | 		}
34 | 
35 | 		if (newLines == 0) {
36 | 			symbol = "WS_s" + spaces + "t" + tabs;
37 | 		} else if (newLines > 0) {
38 | 			final int spaceDiff = spaces - currentIdentationSpaces;
39 | 			final int tabDiff = tabs - currentIdentationTabs;
40 | 			currentIdentationSpaces = spaces;
41 | 			currentIdentationTabs = tabs;
42 | 
43 | 			if (spaceDiff >= 0 && tabDiff >= 0) {
44 | 				symbol = "WS_INDENTs" + spaceDiff + "t" + tabDiff + "n"
45 | 						+ newLines;
46 | 			} else {
47 | 				symbol = "WS_DEDENTs" + -spaceDiff + "t" + -tabDiff + "n"
48 | 						+ newLines;
49 | 			}
50 | 		} else {
51 | 			throw new IllegalStateException();
52 | 		}
53 | 		return symbol;
54 | 	}
55 | 
56 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/tokenizers/whitespace/WhitespaceTokenConverter.java:
--------------------------------------------------------------------------------
  1 | package codemining.languagetools.tokenizers.whitespace;
  2 | 
  3 | import static com.google.common.base.Preconditions.checkArgument;
  4 | 
  5 | import java.util.regex.Matcher;
  6 | import java.util.regex.Pattern;
  7 | 
  8 | /**
  9 |  * A utility stateful class for converting whitespace tokens to whitespace.
 10 |  * 
 11 |  */
 12 | public final class WhitespaceTokenConverter {
 13 | 
 14 | 	/**
 15 | 	 * A struct class.
 16 | 	 * 
 17 | 	 */
 18 | 	private static final class Whitespace {
 19 | 		int nTabs;
 20 | 		int nSpace;
 21 | 		int nNewLines;
 22 | 	}
 23 | 
 24 | 	private int currentSpaceIndentation = 0;
 25 | 	private int currentTabIndentation = 0;
 26 | 
 27 | 	public static final Pattern INDENT_PATTERN = Pattern
 28 | 			.compile("WS_INDENTs([0-9]+)t([0-9]+)n([0-9]+)");
 29 | 
 30 | 	public static final Pattern DEDENT_PATTERN = Pattern
 31 | 			.compile("WS_DEDENTs(-?\\d+)t(-?\\d+)n(\\d+)");
 32 | 
 33 | 	public static final Pattern SPACE_PATTERN = Pattern
 34 | 			.compile("WS_s(\\d+)t(\\d+)");
 35 | 
 36 | 	/**
 37 | 	 * Append whitespace to StringBuffer, given the specifications.
 38 | 	 * 
 39 | 	 * @param nSpace
 40 | 	 * @param nTab
 41 | 	 * @param startAtNewLine
 42 | 	 * @return
 43 | 	 */
 44 | 	public static final void createWhitespace(
 45 | 			final WhitespaceTokenConverter.Whitespace space, final StringBuffer sb) {
 46 | 		for (int i = 0; i < space.nNewLines; i++) {
 47 | 			sb.append(System.getProperty("line.separator"));
 48 | 		}
 49 | 		for (int i = 0; i < space.nSpace; i++) {
 50 | 			sb.append(" ");
 51 | 		}
 52 | 		for (int i = 0; i < space.nTabs; i++) {
 53 | 			sb.append("\t");
 54 | 		}
 55 | 	}
 56 | 
 57 | 	/**
 58 | 	 * Whitespace token converter.
 59 | 	 * 
 60 | 	 * @param wsToken
 61 | 	 * @param buffer
 62 | 	 */
 63 | 	public void appendWS(final String wsToken, final StringBuffer buffer) {
 64 | 		checkArgument(wsToken.startsWith("WS_"));
 65 | 		final WhitespaceTokenConverter.Whitespace space;
 66 | 		if (wsToken.startsWith("WS_INDENT")) {
 67 | 			space = convert(wsToken, INDENT_PATTERN);
 68 | 			currentSpaceIndentation += space.nSpace;
 69 | 			currentTabIndentation += space.nTabs;
 70 | 			space.nSpace = currentSpaceIndentation;
 71 | 			space.nTabs = currentTabIndentation;
 72 | 
 73 | 		} else if (wsToken.startsWith("WS_DEDENT")) {
 74 | 			space = convert(wsToken, DEDENT_PATTERN);
 75 | 			currentSpaceIndentation -= space.nSpace;
 76 | 			if (currentSpaceIndentation < 0) {
 77 | 				currentSpaceIndentation = 0;
 78 | 			}
 79 | 			currentTabIndentation -= space.nTabs;
 80 | 			if (currentTabIndentation < 0) {
 81 | 				currentTabIndentation = 0;
 82 | 			}
 83 | 			space.nSpace = currentSpaceIndentation;
 84 | 			space.nTabs = currentTabIndentation;
 85 | 		} else {
 86 | 			space = convert(wsToken, SPACE_PATTERN);
 87 | 		}
 88 | 		createWhitespace(space, buffer);
 89 | 	}
 90 | 
 91 | 	private WhitespaceTokenConverter.Whitespace convert(final String wsToken,
 92 | 			final Pattern patternToMatch) {
 93 | 		final WhitespaceTokenConverter.Whitespace space = new Whitespace();
 94 | 		final Matcher m = patternToMatch.matcher(wsToken);
 95 | 		checkArgument(m.matches(), "Pattern " + patternToMatch.toString()
 96 | 				+ " does not match " + wsToken);
 97 | 		space.nSpace = Integer.parseInt(m.group(1));
 98 | 		space.nTabs = Integer.parseInt(m.group(2));
 99 | 		if (m.groupCount() == 3) {
100 | 			space.nNewLines = Integer.parseInt(m.group(3));
101 | 		}
102 | 		return space;
103 | 	}
104 | }


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/tui/DistinctTokenCount.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.languagetools.tui;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.List;
 9 | import java.util.logging.Logger;
10 | 
11 | import org.apache.commons.io.FileUtils;
12 | import org.apache.commons.io.filefilter.DirectoryFileFilter;
13 | import org.apache.commons.io.filefilter.RegexFileFilter;
14 | import org.apache.commons.lang.exception.ExceptionUtils;
15 | 
16 | import codemining.languagetools.ITokenizer;
17 | import codemining.languagetools.TokenizerUtils;
18 | 
19 | import com.google.common.collect.Multiset.Entry;
20 | import com.google.common.collect.TreeMultiset;
21 | 
22 | /**
23 |  * Print to stdout the total count of all unique tokens in the text.
24 |  * 
25 |  * Used to answer the question: Do we have a zipf-ian distribution of tokens in
26 |  * Java Code?
27 |  * 
28 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
29 |  * 
30 |  */
31 | public class DistinctTokenCount {
32 | 
33 | 	private static final Logger LOGGER = Logger
34 | 			.getLogger(DistinctTokenCount.class.getName());
35 | 
36 | 	/**
37 | 	 * @param args
38 | 	 * @throws ClassNotFoundException
39 | 	 * @throws IllegalAccessException
40 | 	 * @throws InstantiationException
41 | 	 */
42 | 	public static void main(final String[] args) throws InstantiationException,
43 | 			IllegalAccessException, ClassNotFoundException {
44 | 
45 | 		if (args.length != 2) {
46 | 			System.err.println("Usage: <directory> <tokenizerClass>");
47 | 			return;
48 | 		}
49 | 
50 | 		final DistinctTokenCount tokCount = new DistinctTokenCount(args[1]);
51 | 		for (final File fi : FileUtils.listFiles(new File(args[0]),
52 | 				new RegexFileFilter(".*\\.java$"),
53 | 				DirectoryFileFilter.DIRECTORY)) {
54 | 			try {
55 | 				tokCount.addTokens(fi);
56 | 			} catch (final IOException e) {
57 | 				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
58 | 			}
59 | 		}
60 | 
61 | 		tokCount.printCounts();
62 | 	}
63 | 
64 | 	private final TreeMultiset<String> allTokens = TreeMultiset.create();
65 | 
66 | 	private final ITokenizer tokenizer;
67 | 
68 | 	public DistinctTokenCount(final String tokenizerClass)
69 | 			throws InstantiationException, IllegalAccessException,
70 | 			ClassNotFoundException {
71 | 		tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass);
72 | 	}
73 | 
74 | 	public void addTokens(final File file) throws IOException {
75 | 		LOGGER.finer("Reading file " + file.getAbsolutePath());
76 | 		final char[] code = FileUtils.readFileToString(file).toCharArray();
77 | 		final List<String> tokens = tokenizer.tokenListFromCode(code);
78 | 		allTokens.addAll(tokens);
79 | 
80 | 	}
81 | 
82 | 	/**
83 | 	 * Prints the counts.
84 | 	 */
85 | 	public void printCounts() {
86 | 		for (final Entry<String> token : allTokens.entrySet()) {
87 | 			System.out.println(token.getCount());
88 | 		}
89 | 	}
90 | 
91 | }
92 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/tui/TokenCounter.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.languagetools.tui;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.logging.Logger;
 9 | 
10 | import org.apache.commons.io.FileUtils;
11 | import org.apache.commons.io.filefilter.DirectoryFileFilter;
12 | import org.apache.commons.lang.exception.ExceptionUtils;
13 | 
14 | import codemining.languagetools.ITokenizer;
15 | import codemining.languagetools.TokenizerUtils;
16 | 
17 | /**
18 |  * Utility for counting all the tokens in a folder.
19 |  * 
20 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
21 |  * 
22 |  */
23 | public class TokenCounter {
24 | 
25 | 	private static final Logger LOGGER = Logger.getLogger(TokenCounter.class
26 | 			.getName());
27 | 
28 | 	/**
29 | 	 * @param args
30 | 	 * @throws IOException
31 | 	 * @throws ClassNotFoundException
32 | 	 * @throws IllegalAccessException
33 | 	 * @throws InstantiationException
34 | 	 */
35 | 	public static void main(final String[] args) throws IOException,
36 | 			InstantiationException, IllegalAccessException,
37 | 			ClassNotFoundException {
38 | 		if (args.length != 2) {
39 | 			System.err.println("Usage <codeDir> <TokenizerClass>");
40 | 			return;
41 | 		}
42 | 
43 | 		long tokenCount = 0;
44 | 
45 | 		final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]);
46 | 
47 | 		for (final File fi : FileUtils.listFiles(new File(args[0]),
48 | 				tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) {
49 | 			try {
50 | 				final char[] code = FileUtils.readFileToString(fi)
51 | 						.toCharArray();
52 | 				tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove
53 | 																			// sentence
54 | 																			// start/end
55 | 			} catch (final IOException e) {
56 | 				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
57 | 			}
58 | 		}
59 | 
60 | 		System.out.println("Tokens: " + tokenCount);
61 | 	}
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/tui/TokenizerTUI.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.languagetools.tui;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.lang.reflect.InvocationTargetException;
 9 | import java.util.Collection;
10 | 
11 | import org.apache.commons.io.FileUtils;
12 | import org.apache.commons.io.filefilter.DirectoryFileFilter;
13 | import org.eclipse.jdt.core.compiler.InvalidInputException;
14 | 
15 | import codemining.languagetools.ITokenizer;
16 | import codemining.languagetools.ITokenizer.FullToken;
17 | import codemining.languagetools.TokenizerUtils;
18 | 
19 | import com.google.common.collect.Lists;
20 | 
21 | /**
22 |  * Print tokenized code of a file to stdout. Each token is placed at a separate
23 |  * line. New files are separated by an empty line.
24 |  * 
25 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
26 |  * 
27 |  */
28 | public class TokenizerTUI {
29 | 	public static void main(final String[] args) throws InvalidInputException,
30 | 			IOException, InstantiationException, IllegalAccessException,
31 | 			ClassNotFoundException, IllegalArgumentException,
32 | 			SecurityException, InvocationTargetException, NoSuchMethodException {
33 | 		if (args.length < 2) {
34 | 			System.err
35 | 					.println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]");
36 | 			return;
37 | 		}
38 | 
39 | 		final ITokenizer tok;
40 | 		final String tokenizerClass = args[1];
41 | 		if (args.length == 2) {
42 | 			tok = TokenizerUtils.tokenizerForClass(tokenizerClass);
43 | 		} else {
44 | 			final String tokenizerArguments = args[2];
45 | 			tok = TokenizerUtils.tokenizerForClass(tokenizerClass,
46 | 					tokenizerArguments);
47 | 		}
48 | 
49 | 		final File baseFile = new File(args[0]);
50 | 		final Collection<File> allFiles;
51 | 		if (baseFile.isDirectory()) {
52 | 			allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(),
53 | 					DirectoryFileFilter.DIRECTORY);
54 | 		} else {
55 | 			allFiles = Lists.newArrayList(baseFile);
56 | 		}
57 | 
58 | 		for (final File fi : allFiles) {
59 | 
60 | 			final StringBuffer buf = new StringBuffer();
61 | 			for (final FullToken token : tok.getTokenListFromCode(fi)) {
62 | 				buf.append(token);
63 | 				buf.append(System.getProperty("line.separator"));
64 | 			}
65 | 
66 | 			System.out.println(buf.toString());
67 | 			System.out.println();
68 | 
69 | 		}
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/tui/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * Command line tools for language tools.
3 |  */
4 | package codemining.languagetools.tui;
5 | 
6 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/python/codeutils/AbstractPythonTokenizer.java:
--------------------------------------------------------------------------------
  1 | package codemining.python.codeutils;
  2 | 
  3 | import java.io.File;
  4 | import java.io.IOException;
  5 | import java.util.Collection;
  6 | import java.util.List;
  7 | import java.util.SortedMap;
  8 | 
  9 | import org.apache.commons.io.FileUtils;
 10 | import org.apache.commons.io.filefilter.AbstractFileFilter;
 11 | import org.apache.commons.io.filefilter.RegexFileFilter;
 12 | import org.apache.commons.lang.NotImplementedException;
 13 | import org.python.pydev.parser.grammarcommon.ITokenManager;
 14 | import org.python.pydev.parser.jython.FastCharStream;
 15 | import org.python.pydev.parser.jython.Token;
 16 | 
 17 | import codemining.languagetools.ITokenizer;
 18 | 
 19 | import com.google.common.collect.Lists;
 20 | import com.google.common.collect.Maps;
 21 | 
 22 | /**
 23 |  * An abstract python tokenizer using the PyDev interface.
 24 |  *
 25 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
 26 |  *
 27 |  */
 28 | public abstract class AbstractPythonTokenizer implements ITokenizer {
 29 | 
 30 | 	private static final long serialVersionUID = 5009530263783901964L;
 31 | 
 32 | 	/**
 33 | 	 * A filter for the files being tokenized.
 34 | 	 */
 35 | 	private static final RegexFileFilter pythonCodeFilter = new RegexFileFilter(
 36 | 			".*\\.py$");
 37 | 
 38 | 	public AbstractPythonTokenizer() {
 39 | 		super();
 40 | 	}
 41 | 
 42 | 	@Override
 43 | 	public SortedMap<Integer, FullToken> fullTokenListWithPos(final char[] code) {
 44 | 		final FastCharStream stream = new FastCharStream(code);
 45 | 		final ITokenManager mng = getPythonTokenizer(stream);
 46 | 		final SortedMap<Integer, FullToken> tokens = Maps.newTreeMap();
 47 | 
 48 | 		Token nextToken = mng.getNextToken();
 49 | 		while (nextToken.kind != 0) {
 50 | 			if (shouldAdd(nextToken)) {
 51 | 				// TODO: Bad Heurisitc...
 52 | 				tokens.put(
 53 | 						nextToken.getBeginLine() * 500
 54 | 								+ nextToken.getBeginCol(),
 55 | 						new FullToken(nextToken.image, Integer
 56 | 								.toString(nextToken.kind)));
 57 | 			}
 58 | 			nextToken = mng.getNextToken();
 59 | 		}
 60 | 
 61 | 		return tokens;
 62 | 	}
 63 | 
 64 | 	@Override
 65 | 	public AbstractFileFilter getFileFilter() {
 66 | 		return pythonCodeFilter;
 67 | 	}
 68 | 
 69 | 	@Override
 70 | 	public String getIdentifierType() {
 71 | 		return "92"; // TODO from not hard coded?
 72 | 	}
 73 | 
 74 | 	/*
 75 | 	 * (non-Javadoc)
 76 | 	 *
 77 | 	 * @see codemining.languagetools.ITokenizer#getKeywordTypes()
 78 | 	 */
 79 | 	@Override
 80 | 	public Collection<String> getKeywordTypes() {
 81 | 		throw new NotImplementedException();
 82 | 	}
 83 | 
 84 | 	/*
 85 | 	 * (non-Javadoc)
 86 | 	 *
 87 | 	 * @see codemining.languagetools.ITokenizer#getLiteralTypes()
 88 | 	 */
 89 | 	@Override
 90 | 	public Collection<String> getLiteralTypes() {
 91 | 		throw new NotImplementedException();
 92 | 	}
 93 | 
 94 | 	public abstract ITokenManager getPythonTokenizer(final FastCharStream stream);
 95 | 
 96 | 	@Override
 97 | 	public FullToken getTokenFromString(final String token) {
 98 | 		final FastCharStream stream = new FastCharStream(token.toCharArray());
 99 | 		final ITokenManager mng = getPythonTokenizer(stream);
100 | 		final Token pyToken = mng.getNextToken();
101 | 		return new FullToken(pyToken.image, Integer.toString(pyToken.kind));
102 | 	}
103 | 
104 | 	@Override
105 | 	public List<FullToken> getTokenListFromCode(final char[] code) {
106 | 		final FastCharStream stream = new FastCharStream(code);
107 | 		final ITokenManager mng = getPythonTokenizer(stream);
108 | 		final List<FullToken> tokens = Lists.newArrayList();
109 | 
110 | 		Token nextToken = mng.getNextToken();
111 | 		while (nextToken.kind != 0) {
112 | 			if (shouldAdd(nextToken)) {
113 | 				tokens.add(new FullToken(nextToken.image, Integer
114 | 						.toString(nextToken.kind)));
115 | 			}
116 | 			nextToken = mng.getNextToken();
117 | 		}
118 | 
119 | 		return tokens;
120 | 	}
121 | 
122 | 	@Override
123 | 	public List<FullToken> getTokenListFromCode(final File codeFile)
124 | 			throws IOException {
125 | 		return getTokenListFromCode(FileUtils.readFileToString(codeFile)
126 | 				.toCharArray());
127 | 	}
128 | 
129 | 	/**
130 | 	 * @param nextToken
131 | 	 * @return
132 | 	 */
133 | 	public boolean shouldAdd(final Token nextToken) {
134 | 		// disallow whitespace, indent and docstrings
135 | 		return nextToken.kind != 6 && nextToken.kind != 14
136 | 				&& nextToken.kind != 13 && nextToken.kind != 115;
137 | 	}
138 | 
139 | 	@Override
140 | 	public List<String> tokenListFromCode(final char[] code) {
141 | 		final FastCharStream stream = new FastCharStream(code);
142 | 		final ITokenManager mng = getPythonTokenizer(stream);
143 | 		final List<String> tokens = Lists.newArrayList();
144 | 
145 | 		Token nextToken = mng.getNextToken();
146 | 		while (nextToken.kind != 0) {
147 | 			if (shouldAdd(nextToken)) {
148 | 				tokens.add(nextToken.image);
149 | 			}
150 | 			nextToken = mng.getNextToken();
151 | 		}
152 | 
153 | 		return tokens;
154 | 	}
155 | 
156 | 	@Override
157 | 	public List<String> tokenListFromCode(final File codeFile)
158 | 			throws IOException {
159 | 		return tokenListFromCode(FileUtils.readFileToString(codeFile)
160 | 				.toCharArray());
161 | 	}
162 | 
163 | 	@Override
164 | 	public SortedMap<Integer, String> tokenListWithPos(final char[] code) {
165 | 		final FastCharStream stream = new FastCharStream(code);
166 | 		final ITokenManager mng = getPythonTokenizer(stream);
167 | 		final SortedMap<Integer, String> tokens = Maps.newTreeMap();
168 | 
169 | 		Token nextToken = mng.getNextToken();
170 | 		while (nextToken.kind != 0) {
171 | 			if (shouldAdd(nextToken)) {
172 | 				// TODO: Bad Heurisitc...
173 | 				tokens.put(
174 | 						nextToken.getBeginLine() * 500
175 | 								+ nextToken.getBeginCol(), nextToken.image);
176 | 			}
177 | 			nextToken = mng.getNextToken();
178 | 		}
179 | 
180 | 		return tokens;
181 | 	}
182 | 
183 | 	@Override
184 | 	public SortedMap<Integer, FullToken> tokenListWithPos(final File file)
185 | 			throws IOException {
186 | 		return fullTokenListWithPos(FileUtils.readFileToString(file)
187 | 				.toCharArray());
188 | 	}
189 | 
190 | }
191 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/python/codeutils/Python27Tokenizer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.python.codeutils;
 5 | 
 6 | import org.python.pydev.parser.grammar27.PythonGrammar27TokenManager;
 7 | import org.python.pydev.parser.grammarcommon.ITokenManager;
 8 | import org.python.pydev.parser.jython.FastCharStream;
 9 | 
10 | /**
11 |  * A Python 2.7 tokenizer.
12 |  * 
13 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
14 |  * 
15 |  */
16 | public class Python27Tokenizer extends AbstractPythonTokenizer {
17 | 
18 | 	/*
19 | 	 * (non-Javadoc)
20 | 	 * 
21 | 	 * @see
22 | 	 * codemining.python.codeutils.AbstractPythonTokenizer#getPythonTokenizer
23 | 	 * (org.python.pydev.parser.jython.FastCharStream)
24 | 	 */
25 | 	@Override
26 | 	public ITokenManager getPythonTokenizer(FastCharStream stream) {
27 | 		final ITokenManager mng = new PythonGrammar27TokenManager(stream);
28 | 		return mng;
29 | 	}
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/java/codemining/python/codeutils/Python30Tokenizer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.python.codeutils;
 5 | 
 6 | import org.python.pydev.parser.grammar30.PythonGrammar30TokenManager;
 7 | import org.python.pydev.parser.grammarcommon.ITokenManager;
 8 | import org.python.pydev.parser.jython.FastCharStream;
 9 | 
10 | /**
11 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
12 |  * 
13 |  */
14 | public class Python30Tokenizer extends AbstractPythonTokenizer  {
15 | 
16 | 	private static final long serialVersionUID = 6944634686739086853L;
17 | 
18 | 	/**
19 | 	 * @param stream
20 | 	 * @return
21 | 	 */
22 | 	@Override
23 | 	public ITokenManager getPythonTokenizer(final FastCharStream stream) {
24 | 		final ITokenManager mng = new PythonGrammar30TokenManager(stream);
25 | 		return mng;
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/JavaApproximateTypeInferencerTest.java:
--------------------------------------------------------------------------------
 1 | package codemining.java.codeutils;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.io.File;
 6 | import java.io.IOException;
 7 | import java.util.Map;
 8 | 
 9 | import org.apache.commons.io.FileUtils;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 | 
13 | import codemining.languagetools.ParseType;
14 | 
15 | public class JavaApproximateTypeInferencerTest {
16 | 
17 | 	String classContent;
18 | 
19 | 	@Before
20 | 	public void setUp() throws IOException {
21 | 		classContent = FileUtils.readFileToString(new File(
22 | 				JavaAstExtractorTest.class.getClassLoader()
23 | 						.getResource("SampleClass3.txt").getFile()));
24 | 	}
25 | 
26 | 	@Test
27 | 	public void test() {
28 | 		JavaASTExtractor ex = new JavaASTExtractor(false);
29 | 		JavaApproximateTypeInferencer jati = new JavaApproximateTypeInferencer(
30 | 				ex.getAST(classContent, ParseType.COMPILATION_UNIT));
31 | 		jati.infer();
32 | 		final Map<String, String> vars = jati.getVariableTypes();
33 | 		assertEquals(vars.get("anInstance"), "my.pack.SomeName");
34 | 		assertEquals(vars.get("arrayOfInt"), "int[]");
35 | 		assertEquals(vars.get("aNumber"), "long");
36 | 		assertEquals(vars.get("singleObject"), "your.pack.Blah");
37 | 		assertEquals(vars.get("arrayOfObjects"), "your.pack.Blah[]");
38 | 		assertEquals(vars.get("listOfInt"), "java.util.List<java.lang.Integer>");
39 | 		assertEquals(
40 | 				vars.get("complexParamType"),
41 | 				"java.util.Map<your.pack.Blah,java.util.Map<my.pack.SomeNameInPkg,java.util.List<java.lang.Double>>>");
42 | 		assertEquals(vars.get("paraType"),
43 | 				"your.pack2.ParamType<your.pack.Blah>");
44 | 		assertEquals(vars.get("lowerBoundPa"),
45 | 				"your.pack2.ParamType<? extends your.pack.Blah>");
46 | 		assertEquals(vars.get("upperBoundPa"),
47 | 				"your.pack2.ParamType<? super your.pack.Blah>");
48 | 		assertEquals(vars.get("upperBoundPa2"),
49 | 				"your.pack2.ParamType<? super java.util.List<? super your.pack.Blah>>");
50 | 		assertEquals(vars.get("e"),
51 | 				"java.io.IOException | java.lang.ArithmeticException");
52 | 
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/JavaAstExtractorTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.codeutils;
 5 | 
 6 | import static org.junit.Assert.assertTrue;
 7 | 
 8 | import java.io.File;
 9 | import java.io.IOException;
10 | import java.util.List;
11 | 
12 | import org.apache.commons.io.FileUtils;
13 | import org.eclipse.jdt.core.dom.ASTNode;
14 | import org.junit.Before;
15 | import org.junit.Test;
16 | 
17 | import codemining.java.tokenizers.JavaTokenizer;
18 | import codemining.languagetools.ParseType;
19 | 
20 | /**
21 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
22 |  * 
23 |  */
24 | public class JavaAstExtractorTest {
25 | 
26 | 	String classContent;
27 | 	String methodContent;
28 | 
29 | 	@Before
30 | 	public void setUp() throws IOException {
31 | 		classContent = FileUtils.readFileToString(new File(
32 | 				JavaAstExtractorTest.class.getClassLoader()
33 | 						.getResource("SampleClass.txt").getFile()));
34 | 
35 | 		methodContent = FileUtils.readFileToString(new File(
36 | 				JavaAstExtractorTest.class.getClassLoader()
37 | 						.getResource("SampleMethod.txt").getFile()));
38 | 	}
39 | 
40 | 	/**
41 | 	 * Test method for
42 | 	 * {@link codemining.java.codeutils.JavaASTExtractor#getBestEffortAst(java.lang.String)}
43 | 	 * .
44 | 	 * 
45 | 	 * @throws IOException
46 | 	 */
47 | 	@Test
48 | 	public void testGetASTString() {
49 | 		final JavaASTExtractor ex = new JavaASTExtractor(false);
50 | 		assertTrue(classContent.length() > 0);
51 | 		final ASTNode classCU = ex.getASTNode(classContent,
52 | 				ParseType.COMPILATION_UNIT);
53 | 		assertTrue(snippetMatchesAstTokens(classContent, classCU));
54 | 
55 | 		assertTrue(methodContent.length() > 0);
56 | 		final ASTNode methodCU = ex.getASTNode(methodContent,
57 | 				ParseType.METHOD);
58 | 		assertTrue(snippetMatchesAstTokens(methodContent, methodCU));
59 | 	}
60 | 
61 | 	private boolean snippetMatchesAstTokens(final String snippetCode,
62 | 			final ASTNode node) {
63 | 		final JavaTokenizer tokenizer = new JavaTokenizer();
64 | 		final List<String> snippetTokens = tokenizer
65 | 				.tokenListFromCode(snippetCode.toCharArray());
66 | 		final List<String> astTokens = tokenizer.tokenListFromCode(node
67 | 				.toString().toCharArray());
68 | 		return astTokens.equals(snippetTokens);
69 | 	}
70 | }
71 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/JavaWhitespaceTokenizerTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.codeutils;
 5 | 
 6 | import static org.junit.Assert.*;
 7 | 
 8 | import java.io.File;
 9 | import java.util.List;
10 | 
11 | import org.apache.commons.io.FileUtils;
12 | import org.junit.Before;
13 | import org.junit.Test;
14 | 
15 | import codemining.java.tokenizers.JavaWhitespaceTokenizer;
16 | 
17 | import com.google.common.collect.Lists;
18 | 
19 | /**
20 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
21 |  * 
22 |  */
23 | public class JavaWhitespaceTokenizerTest {
24 | 
25 | 	private String code;
26 | 	private List<String> correctTokens;
27 | 
28 | 	/**
29 | 	 * @throws java.lang.Exception
30 | 	 */
31 | 	@Before
32 | 	public void setUp() throws Exception {
33 | 		code = FileUtils.readFileToString(new File(
34 | 				JavaAstExtractorTest.class.getClassLoader()
35 | 						.getResource("SampleClass2.txt").getFile()));
36 | 
37 | 		correctTokens = Lists.newArrayList(FileUtils.readFileToString(
38 | 				new File(JavaAstExtractorTest.class.getClassLoader()
39 | 						.getResource("SampleClass2WhitespaceTokens.txt")
40 | 						.getFile())).split("\n"));
41 | 	}
42 | 
43 | 	@Test
44 | 	public void test() {
45 | 		final JavaWhitespaceTokenizer tokenizer = new JavaWhitespaceTokenizer();
46 | 		final List<String> tokens = tokenizer.tokenListFromCode(code
47 | 				.toCharArray());
48 | 		for (int i = 0; i < correctTokens.size(); i++) {
49 | 			assertEquals("Does not match at position " + i, tokens.get(i),
50 | 					correctTokens.get(i));
51 | 		}
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/TokenizeJavaCodeTest.java:
--------------------------------------------------------------------------------
  1 | package codemining.java.codeutils;
  2 | 
  3 | import static org.junit.Assert.assertEquals;
  4 | import static org.junit.Assert.assertTrue;
  5 | 
  6 | import java.util.List;
  7 | import java.util.Map;
  8 | 
  9 | import org.eclipse.jdt.core.compiler.ITerminalSymbols;
 10 | import org.junit.Test;
 11 | 
 12 | import codemining.java.tokenizers.JavaTokenizer;
 13 | import codemining.languagetools.ITokenizer;
 14 | 
 15 | public class TokenizeJavaCodeTest {
 16 | 
 17 | 	private static final char[] CODE_SAMPLE1 = "int x=2;".toCharArray();
 18 | 
 19 | 	private static final String[] TOKENS_SAMPLE1 = { ITokenizer.SENTENCE_START,
 20 | 			"int", "x", "=", "2", ";", ITokenizer.SENTENCE_END };
 21 | 	private static final int[] TOKEN_POS_SAMPLE1 = { -1, 0, 4, 5, 6, 7,
 22 | 			Integer.MAX_VALUE };
 23 | 
 24 | 	private static final char[] CODE_SAMPLE2 = "if (y>0) {\n a += 2;\n}"
 25 | 			.toCharArray();
 26 | 
 27 | 	private static final String[] TOKENS_SAMPLE2 = { ITokenizer.SENTENCE_START,
 28 | 			"if", "(", "y", ">", "0", ")", "{", "a", "+=", "2", ";", "}",
 29 | 			ITokenizer.SENTENCE_END };
 30 | 
 31 | 	public static final char[] CODE_SAMPLE3 = "int x=2; // this is a test\n"
 32 | 			.toCharArray();
 33 | 
 34 | 	@Test
 35 | 	public void testSample1() {
 36 | 		ITokenizer tokenizer = new JavaTokenizer();
 37 | 		testSample1(tokenizer);
 38 | 	}
 39 | 
 40 | 	/**
 41 | 	 * @param tokenizer
 42 | 	 */
 43 | 	protected void testSample1(ITokenizer tokenizer) {
 44 | 		final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE1);
 45 | 		for (int i = 0; i < TOKENS_SAMPLE1.length; i++) {
 46 | 			assertEquals(tok.get(i), TOKENS_SAMPLE1[i]);
 47 | 		}
 48 | 		assertEquals(tok.size(), TOKENS_SAMPLE1.length);
 49 | 	}
 50 | 
 51 | 	/**
 52 | 	 * @param tokenizer
 53 | 	 */
 54 | 	protected void testSample1Position(ITokenizer tokenizer) {
 55 | 		final Map<Integer, String> toks = tokenizer
 56 | 				.tokenListWithPos(CODE_SAMPLE1);
 57 | 		for (int i = 0; i < TOKEN_POS_SAMPLE1.length; i++) {
 58 | 			assertTrue(toks.containsKey(TOKEN_POS_SAMPLE1[i]));
 59 | 			assertEquals(toks.get(TOKEN_POS_SAMPLE1[i]), TOKENS_SAMPLE1[i]);
 60 | 		}
 61 | 		assertEquals(toks.size(), TOKENS_SAMPLE1.length);
 62 | 	}
 63 | 
 64 | 	@Test
 65 | 	public void testSample1postion() {
 66 | 		ITokenizer tokenizer = new JavaTokenizer();
 67 | 		testSample1Position(tokenizer);
 68 | 	}
 69 | 
 70 | 	@Test
 71 | 	public void testSample2() {
 72 | 		ITokenizer tokenizer = new JavaTokenizer();
 73 | 		testSample2(tokenizer);
 74 | 	}
 75 | 
 76 | 	/**
 77 | 	 * @param tokenizer
 78 | 	 */
 79 | 	protected void testSample2(ITokenizer tokenizer) {
 80 | 		final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE2);
 81 | 
 82 | 		for (int i = 0; i < TOKENS_SAMPLE2.length; i++) {
 83 | 			assertEquals(tok.get(i), TOKENS_SAMPLE2[i]);
 84 | 		}
 85 | 	}
 86 | 
 87 | 	@Test
 88 | 	public void testSample3() {
 89 | 		ITokenizer tokenizer = new JavaTokenizer();
 90 | 		testSample3(tokenizer);
 91 | 	}
 92 | 
 93 | 	/**
 94 | 	 * @param tokenizer
 95 | 	 */
 96 | 	protected void testSample3(ITokenizer tokenizer) {
 97 | 		final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE3);
 98 | 		for (int i = 0; i < TOKENS_SAMPLE1.length; i++) {
 99 | 			assertEquals(tok.get(i), TOKENS_SAMPLE1[i]);
100 | 		}
101 | 		assertEquals(tok.size(), TOKENS_SAMPLE1.length);
102 | 	}
103 | 
104 | 	@Test
105 | 	public void testTokenTypes() {
106 | 		ITokenizer tokenizer = new JavaTokenizer();
107 | 		assertEquals(
108 | 				tokenizer.getTokenFromString("hello"),
109 | 				new ITokenizer.FullToken("hello", tokenizer.getIdentifierType()));
110 | 		assertEquals(
111 | 				tokenizer.getTokenFromString("{"),
112 | 				new ITokenizer.FullToken("{", Integer
113 | 						.toString(ITerminalSymbols.TokenNameLBRACE)));
114 | 
115 | 	}
116 | }
117 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/binding/BindingTester.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.java.codeutils.binding;
 5 | 
 6 | import static org.junit.Assert.assertEquals;
 7 | import static org.junit.Assert.assertFalse;
 8 | 
 9 | import java.util.List;
10 | import java.util.Set;
11 | 
12 | import codemining.languagetools.bindings.TokenNameBinding;
13 | 
14 | import com.google.common.collect.Sets;
15 | 
16 | /**
17 |  * Utility class for testing bindings.
18 |  * 
19 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
20 |  * 
21 |  */
22 | public class BindingTester {
23 | 
24 | 	private BindingTester() {
25 | 	}
26 | 
27 | 	public static void checkAllBindings(final List<TokenNameBinding> bindings) {
28 | 		final Set<Integer> indexes = Sets.newHashSet();
29 | 		for (final TokenNameBinding binding : bindings) {
30 | 			BindingTester.checkBinding(binding);
31 | 			assertFalse("Indexes appear only once",
32 | 					indexes.removeAll(binding.nameIndexes));
33 | 			indexes.addAll(binding.nameIndexes);
34 | 		}
35 | 	}
36 | 
37 | 	public static void checkBinding(final TokenNameBinding binding) {
38 | 		final String tokenName = binding.sourceCodeTokens
39 | 				.get(binding.nameIndexes.iterator().next());
40 | 		for (final int idx : binding.nameIndexes) {
41 | 			assertEquals(tokenName, binding.sourceCodeTokens.get(idx));
42 | 		}
43 | 	};
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/binding/JavaApproximateVariableBindingExtractorTest.java:
--------------------------------------------------------------------------------
 1 | package codemining.java.codeutils.binding;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | import static org.junit.Assert.assertTrue;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.Collection;
 9 | import java.util.List;
10 | 
11 | import org.apache.commons.io.FileUtils;
12 | import org.junit.Before;
13 | import org.junit.Test;
14 | 
15 | import codemining.java.codeutils.JavaAstExtractorTest;
16 | import codemining.languagetools.bindings.TokenNameBinding;
17 | 
18 | public class JavaApproximateVariableBindingExtractorTest {
19 | 
20 | 	private static <T> void allAreContained(final Collection<T> collection,
21 | 			final Collection<T> in) {
22 | 		for (final T element : collection) {
23 | 			assertTrue(in.contains(element));
24 | 		}
25 | 	}
26 | 
27 | 	File classContent;
28 | 
29 | 	File classContent2;
30 | 
31 | 	String methodContent;
32 | 
33 | 	@Before
34 | 	public void setUp() throws IOException {
35 | 		classContent = new File(JavaAstExtractorTest.class.getClassLoader()
36 | 				.getResource("SampleClass.txt").getFile());
37 | 		classContent2 = new File(JavaAstExtractorTest.class.getClassLoader()
38 | 				.getResource("SampleClass2.txt").getFile());
39 | 
40 | 		methodContent = FileUtils.readFileToString(new File(
41 | 				JavaAstExtractorTest.class.getClassLoader()
42 | 						.getResource("SampleMethod.txt").getFile()));
43 | 	}
44 | 
45 | 	@Test
46 | 	public void testClassBindings() throws IOException {
47 | 		final JavaApproximateVariableBindingExtractor jabe = new JavaApproximateVariableBindingExtractor();
48 | 		final JavaExactVariableBindingsExtractor jbe = new JavaExactVariableBindingsExtractor();
49 | 
50 | 		final List<TokenNameBinding> classVariableBindings = jabe
51 | 				.getNameBindings(classContent);
52 | 		final List<TokenNameBinding> classVariableBindingsExact = jbe
53 | 				.getNameBindings(classContent);
54 | 
55 | 		BindingTester
56 | 				.checkAllBindings(classVariableBindings);
57 | 		assertEquals(classVariableBindings.size(), 5);
58 | 
59 | 		final List<TokenNameBinding> classVariableBindings2 = jabe
60 | 				.getNameBindings(classContent2);
61 | 		final List<TokenNameBinding> classVariableBindings2Exact = jbe
62 | 				.getNameBindings(classContent2);
63 | 
64 | 		assertEquals(classVariableBindings2.size(), 9);
65 | 
66 | 		allAreContained(classVariableBindingsExact, classVariableBindings);
67 | 		allAreContained(classVariableBindings2Exact, classVariableBindings2);
68 | 	}
69 | 
70 | 	@Test
71 | 	public void testMethodBinding() {
72 | 		final JavaApproximateVariableBindingExtractor jabe = new JavaApproximateVariableBindingExtractor();
73 | 		final List<TokenNameBinding> methodVariableBindings = jabe
74 | 				.getNameBindings(methodContent);
75 | 		BindingTester
76 | 				.checkAllBindings(methodVariableBindings);
77 | 		assertEquals(methodVariableBindings.size(), 3);
78 | 
79 | 	}
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/binding/JavaExactVariableBindingsExtractorTest.java:
--------------------------------------------------------------------------------
 1 | package codemining.java.codeutils.binding;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.io.File;
 6 | import java.io.IOException;
 7 | import java.util.List;
 8 | 
 9 | import org.junit.Before;
10 | import org.junit.Test;
11 | 
12 | import codemining.java.codeutils.JavaAstExtractorTest;
13 | import codemining.languagetools.bindings.TokenNameBinding;
14 | 
15 | public class JavaExactVariableBindingsExtractorTest {
16 | 
17 | 	File classContent;
18 | 
19 | 	File classContent2;
20 | 
21 | 	@Before
22 | 	public void setUp() throws IOException {
23 | 		classContent = new File(JavaAstExtractorTest.class.getClassLoader()
24 | 				.getResource("SampleClass.txt").getFile());
25 | 		classContent2 = new File(JavaAstExtractorTest.class.getClassLoader()
26 | 				.getResource("SampleClass2.txt").getFile());
27 | 	}
28 | 
29 | 	@Test
30 | 	public void testClassBindings() throws IOException {
31 | 		final JavaExactVariableBindingsExtractor jbe = new JavaExactVariableBindingsExtractor();
32 | 		final List<TokenNameBinding> classVariableBindings = jbe
33 | 				.getNameBindings(classContent);
34 | 		BindingTester.checkAllBindings(classVariableBindings);
35 | 		assertEquals(classVariableBindings.size(), 5);
36 | 
37 | 		final List<TokenNameBinding> classVariableBindings2 = jbe
38 | 				.getNameBindings(classContent2);
39 | 
40 | 		assertEquals(classVariableBindings2.size(), 9);
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/binding/JavaMethodBindingExtractorTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  */
 4 | package codemining.java.codeutils.binding;
 5 | 
 6 | import static org.junit.Assert.assertEquals;
 7 | 
 8 | import java.io.File;
 9 | import java.io.IOException;
10 | import java.util.List;
11 | 
12 | import org.apache.commons.io.FileUtils;
13 | import org.junit.Before;
14 | import org.junit.Test;
15 | 
16 | import codemining.java.codeutils.JavaAstExtractorTest;
17 | import codemining.languagetools.bindings.TokenNameBinding;
18 | 
19 | public class JavaMethodBindingExtractorTest {
20 | 
21 | 	File classContent;
22 | 
23 | 	File classContent2;
24 | 
25 | 	String methodContent;
26 | 
27 | 	@Before
28 | 	public void setUp() throws IOException {
29 | 		classContent = new File(JavaAstExtractorTest.class.getClassLoader()
30 | 				.getResource("SampleClass.txt").getFile());
31 | 		classContent2 = new File(JavaAstExtractorTest.class.getClassLoader()
32 | 				.getResource("SampleClass2.txt").getFile());
33 | 
34 | 		methodContent = FileUtils.readFileToString(new File(
35 | 				JavaAstExtractorTest.class.getClassLoader()
36 | 						.getResource("SampleMethod.txt").getFile()));
37 | 	}
38 | 
39 | 	@Test
40 | 	public void testClassLevelBindings() throws IOException {
41 | 		final JavaMethodInvocationBindingExtractor jame = new JavaMethodInvocationBindingExtractor();
42 | 
43 | 		final List<TokenNameBinding> classMethodBindings = jame
44 | 				.getNameBindings(classContent);
45 | 
46 | 		BindingTester.checkAllBindings(classMethodBindings);
47 | 		assertEquals(classMethodBindings.size(), 7);
48 | 
49 | 		final List<TokenNameBinding> classMethodBindings2 = jame
50 | 				.getNameBindings(classContent2);
51 | 		BindingTester.checkAllBindings(classMethodBindings2);
52 | 
53 | 		assertEquals(classMethodBindings2.size(), 6);
54 | 	}
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/java/codeutils/binding/JavaTypeBindingExtractorTest.java:
--------------------------------------------------------------------------------
 1 | package codemining.java.codeutils.binding;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.io.File;
 6 | import java.io.IOException;
 7 | import java.util.List;
 8 | 
 9 | import org.apache.commons.io.FileUtils;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 | 
13 | import codemining.java.codeutils.JavaAstExtractorTest;
14 | import codemining.languagetools.bindings.TokenNameBinding;
15 | 
16 | public class JavaTypeBindingExtractorTest {
17 | 
18 | 	File classContent;
19 | 
20 | 	File classContent2;
21 | 
22 | 	String methodContent;
23 | 
24 | 	@Before
25 | 	public void setUp() throws IOException {
26 | 		classContent = new File(JavaAstExtractorTest.class.getClassLoader()
27 | 				.getResource("SampleClass.txt").getFile());
28 | 		classContent2 = new File(JavaAstExtractorTest.class.getClassLoader()
29 | 				.getResource("SampleClass2.txt").getFile());
30 | 
31 | 		methodContent = FileUtils.readFileToString(new File(
32 | 				JavaAstExtractorTest.class.getClassLoader()
33 | 						.getResource("SampleMethod.txt").getFile()));
34 | 	}
35 | 
36 | 	@Test
37 | 	public void testClassLevelBindings() throws IOException {
38 | 		final JavaTypeDeclarationBindingExtractor jame = new JavaTypeDeclarationBindingExtractor();
39 | 
40 | 		final List<TokenNameBinding> classTypeindings = jame
41 | 				.getNameBindings(classContent);
42 | 
43 | 		BindingTester.checkAllBindings(classTypeindings);
44 | 		assertEquals(classTypeindings.size(), 1);
45 | 
46 | 		final List<TokenNameBinding> classTypeBindings2 = jame
47 | 				.getNameBindings(classContent2);
48 | 		BindingTester.checkAllBindings(classTypeBindings2);
49 | 
50 | 		assertEquals(classTypeBindings2.size(), 1);
51 | 	}
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/js/codeutils/JavascriptASTExtractorTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.js.codeutils;
 5 | 
 6 | import static org.junit.Assert.assertTrue;
 7 | 
 8 | import java.io.File;
 9 | import java.io.IOException;
10 | import java.util.List;
11 | 
12 | import org.apache.commons.io.FileUtils;
13 | import org.eclipse.wst.jsdt.core.dom.ASTNode;
14 | import org.junit.Before;
15 | import org.junit.Test;
16 | 
17 | import codemining.languagetools.ParseType;
18 | 
19 | /**
20 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
21 |  * 
22 |  */
23 | // FIXME Javascript AST parser is quite buggy: For SampleJavascript the === is
24 | // printed as + and for SampleJavascript2 it prints a random semicolon at line 7
25 | // Is this just a buggy toString method issue? Tests commented out until fixed.
26 | public class JavascriptASTExtractorTest {
27 | 
28 | 	String classContent;
29 | 	String methodContent;
30 | 
31 | 	@Before
32 | 	public void setUp() throws IOException {
33 | 		classContent = FileUtils.readFileToString(new File(
34 | 				JavascriptASTExtractorTest.class.getClassLoader()
35 | 						.getResource("SampleJavascript2.txt").getFile()));
36 | 
37 | 		methodContent = FileUtils.readFileToString(new File(
38 | 				JavascriptASTExtractorTest.class.getClassLoader()
39 | 						.getResource("SampleJavascript.txt").getFile()));
40 | 	}
41 | 
42 | 	/**
43 | 	 * Test method for
44 | 	 * {@link codemining.java.codeutils.JavaASTExtractor#getBestEffortAst(java.lang.String)}
45 | 	 * .
46 | 	 * 
47 | 	 * @throws IOException
48 | 	 */
49 | 	@Test
50 | 	public void testGetASTString() {
51 | 		final JavascriptASTExtractor ex = new JavascriptASTExtractor(false);
52 | 		assertTrue(classContent.length() > 0);
53 | 		final ASTNode classCU = ex.getASTNode(classContent,
54 | 				ParseType.COMPILATION_UNIT);
55 | 		// assertTrue(snippetMatchesAstTokens(classContent, classCU));
56 | 
57 | 		assertTrue(methodContent.length() > 0);
58 | 		final ASTNode methodCU = ex.getASTNode(methodContent, ParseType.METHOD);
59 | 		// assertTrue(snippetMatchesAstTokens(methodContent, methodCU));
60 | 	}
61 | 
62 | 	private boolean snippetMatchesAstTokens(final String snippetCode,
63 | 			final ASTNode node) {
64 | 		final JavascriptTokenizer tokenizer = new JavascriptTokenizer();
65 | 		final List<String> snippetTokens = tokenizer
66 | 				.tokenListFromCode(snippetCode.toCharArray());
67 | 		final List<String> astTokens = tokenizer.tokenListFromCode(node
68 | 				.toString().toCharArray());
69 | 		return astTokens.equals(snippetTokens);
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/js/codeutils/TokenizeJavascriptCodeTest.java:
--------------------------------------------------------------------------------
  1 | package codemining.js.codeutils;
  2 | 
  3 | import static org.junit.Assert.assertEquals;
  4 | import static org.junit.Assert.assertTrue;
  5 | 
  6 | import java.util.List;
  7 | import java.util.Map;
  8 | 
  9 | import org.eclipse.wst.jsdt.core.compiler.ITerminalSymbols;
 10 | import org.junit.Test;
 11 | 
 12 | import codemining.languagetools.ITokenizer;
 13 | 
 14 | public class TokenizeJavascriptCodeTest {
 15 | 
 16 | 	private static final char[] CODE_SAMPLE1 = "var x=2;".toCharArray();
 17 | 
 18 | 	private static final String[] TOKENS_SAMPLE1 = { ITokenizer.SENTENCE_START,
 19 | 			"var", "x", "=", "2", ";", ITokenizer.SENTENCE_END };
 20 | 	private static final int[] TOKEN_POS_SAMPLE1 = { -1, 0, 4, 5, 6, 7,
 21 | 			Integer.MAX_VALUE };
 22 | 
 23 | 	private static final char[] CODE_SAMPLE2 = "if (y>0) {\n a += 2;\n}"
 24 | 			.toCharArray();
 25 | 
 26 | 	private static final String[] TOKENS_SAMPLE2 = { ITokenizer.SENTENCE_START,
 27 | 			"if", "(", "y", ">", "0", ")", "{", "a", "+=", "2", ";", "}",
 28 | 			ITokenizer.SENTENCE_END };
 29 | 
 30 | 	public static final char[] CODE_SAMPLE3 = "var x=2; // this is a test\n"
 31 | 			.toCharArray();
 32 | 
 33 | 	@Test
 34 | 	public void testSample1() {
 35 | 		final ITokenizer tokenizer = new JavascriptTokenizer();
 36 | 		testSample1(tokenizer);
 37 | 	}
 38 | 
 39 | 	/**
 40 | 	 * @param tokenizer
 41 | 	 */
 42 | 	protected void testSample1(final ITokenizer tokenizer) {
 43 | 		final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE1);
 44 | 		for (int i = 0; i < TOKENS_SAMPLE1.length; i++) {
 45 | 			assertEquals(tok.get(i), TOKENS_SAMPLE1[i]);
 46 | 		}
 47 | 		assertEquals(tok.size(), TOKENS_SAMPLE1.length);
 48 | 	}
 49 | 
 50 | 	/**
 51 | 	 * @param tokenizer
 52 | 	 */
 53 | 	protected void testSample1Position(final ITokenizer tokenizer) {
 54 | 		final Map<Integer, String> toks = tokenizer
 55 | 				.tokenListWithPos(CODE_SAMPLE1);
 56 | 		for (int i = 0; i < TOKEN_POS_SAMPLE1.length; i++) {
 57 | 			assertTrue(toks.containsKey(TOKEN_POS_SAMPLE1[i]));
 58 | 			assertEquals(toks.get(TOKEN_POS_SAMPLE1[i]), TOKENS_SAMPLE1[i]);
 59 | 		}
 60 | 		assertEquals(toks.size(), TOKENS_SAMPLE1.length);
 61 | 	}
 62 | 
 63 | 	@Test
 64 | 	public void testSample1postion() {
 65 | 		final ITokenizer tokenizer = new JavascriptTokenizer();
 66 | 		testSample1Position(tokenizer);
 67 | 	}
 68 | 
 69 | 	@Test
 70 | 	public void testSample2() {
 71 | 		final ITokenizer tokenizer = new JavascriptTokenizer();
 72 | 		testSample2(tokenizer);
 73 | 	}
 74 | 
 75 | 	/**
 76 | 	 * @param tokenizer
 77 | 	 */
 78 | 	protected void testSample2(final ITokenizer tokenizer) {
 79 | 		final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE2);
 80 | 
 81 | 		for (int i = 0; i < TOKENS_SAMPLE2.length; i++) {
 82 | 			assertEquals(tok.get(i), TOKENS_SAMPLE2[i]);
 83 | 		}
 84 | 	}
 85 | 
 86 | 	@Test
 87 | 	public void testSample3() {
 88 | 		final ITokenizer tokenizer = new JavascriptTokenizer();
 89 | 		testSample3(tokenizer);
 90 | 	}
 91 | 
 92 | 	/**
 93 | 	 * @param tokenizer
 94 | 	 */
 95 | 	protected void testSample3(final ITokenizer tokenizer) {
 96 | 		final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE3);
 97 | 		for (int i = 0; i < TOKENS_SAMPLE1.length; i++) {
 98 | 			assertEquals(tok.get(i), TOKENS_SAMPLE1[i]);
 99 | 		}
100 | 		assertEquals(tok.size(), TOKENS_SAMPLE1.length);
101 | 	}
102 | 
103 | 	@Test
104 | 	public void testTokenTypes() {
105 | 		final ITokenizer tokenizer = new JavascriptTokenizer();
106 | 		assertEquals(
107 | 				tokenizer.getTokenFromString("hello"),
108 | 				new ITokenizer.FullToken("hello", tokenizer.getIdentifierType()));
109 | 		assertEquals(
110 | 				tokenizer.getTokenFromString("{"),
111 | 				new ITokenizer.FullToken("{", Integer
112 | 						.toString(ITerminalSymbols.TokenNameLBRACE)));
113 | 
114 | 	}
115 | }
116 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/js/codeutils/binding/JavascriptApproximateVariableBindingExtractorTest.java:
--------------------------------------------------------------------------------
 1 | package codemining.js.codeutils.binding;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | import static org.junit.Assert.assertTrue;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.Collection;
 9 | import java.util.List;
10 | 
11 | import org.apache.commons.io.FileUtils;
12 | import org.junit.Before;
13 | import org.junit.Test;
14 | 
15 | import codemining.java.codeutils.binding.BindingTester;
16 | import codemining.js.codeutils.JavascriptASTExtractorTest;
17 | import codemining.languagetools.bindings.TokenNameBinding;
18 | 
19 | public class JavascriptApproximateVariableBindingExtractorTest {
20 | 
21 | 	private static <T> void allAreContained(final Collection<T> collection,
22 | 			final Collection<T> in) {
23 | 		for (final T element : collection) {
24 | 			assertTrue(in.contains(element));
25 | 		}
26 | 	}
27 | 
28 | 	File classContent;
29 | 
30 | 	File classContent2;
31 | 
32 | 	String methodContent;
33 | 
34 | 	@Before
35 | 	public void setUp() throws IOException {
36 | 		classContent = new File(JavascriptASTExtractorTest.class
37 | 				.getClassLoader().getResource("SampleJavascript2.txt")
38 | 				.getFile());
39 | 		classContent2 = new File(JavascriptASTExtractorTest.class
40 | 				.getClassLoader().getResource("SampleJavascript3.txt")
41 | 				.getFile());
42 | 
43 | 		methodContent = FileUtils.readFileToString(new File(
44 | 				JavascriptASTExtractorTest.class.getClassLoader()
45 | 						.getResource("SampleJavascript.txt").getFile()));
46 | 	}
47 | 
48 | 	@Test
49 | 	public void testClassBindings() throws IOException {
50 | 		final JavascriptApproximateVariableBindingExtractor jabe = new JavascriptApproximateVariableBindingExtractor();
51 | 		final JavascriptExactVariableBindingsExtractor jbe = new JavascriptExactVariableBindingsExtractor();
52 | 
53 | 		final List<TokenNameBinding> classVariableBindings = jabe
54 | 				.getNameBindings(classContent);
55 | 		final List<TokenNameBinding> classVariableBindingsExact = jbe
56 | 				.getNameBindings(classContent);
57 | 
58 | 		BindingTester.checkAllBindings(classVariableBindings);
59 | 		assertEquals(classVariableBindings.size(), 3);
60 | 
61 | 		final List<TokenNameBinding> classVariableBindings2 = jabe
62 | 				.getNameBindings(classContent2);
63 | 		final List<TokenNameBinding> classVariableBindings2Exact = jbe
64 | 				.getNameBindings(classContent2);
65 | 
66 | 		assertEquals(classVariableBindings2.size(), 13);
67 | 
68 | 		allAreContained(classVariableBindingsExact, classVariableBindings);
69 | 		allAreContained(classVariableBindings2Exact, classVariableBindings2);
70 | 	}
71 | 
72 | 	@Test
73 | 	public void testMethodBinding() {
74 | 		final JavascriptApproximateVariableBindingExtractor jabe = new JavascriptApproximateVariableBindingExtractor();
75 | 		final List<TokenNameBinding> methodVariableBindings = jabe
76 | 				.getNameBindings(methodContent);
77 | 		BindingTester.checkAllBindings(methodVariableBindings);
78 | 		assertEquals(methodVariableBindings.size(), 1);
79 | 
80 | 	}
81 | 
82 | }
83 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/js/codeutils/binding/JavascriptExactVariableBindingsExtractorTest.java:
--------------------------------------------------------------------------------
 1 | package codemining.js.codeutils.binding;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.util.List;
 6 | 
 7 | import org.junit.Before;
 8 | import org.junit.Test;
 9 | 
10 | import codemining.java.codeutils.binding.BindingTester;
11 | import codemining.js.codeutils.JavascriptASTExtractorTest;
12 | import codemining.languagetools.bindings.TokenNameBinding;
13 | 
14 | // FIXME Tests commented out until binding resolution is fixed
15 | public class JavascriptExactVariableBindingsExtractorTest {
16 | 
17 | 	File classContent;
18 | 
19 | 	File classContent2;
20 | 
21 | 	@Before
22 | 	public void setUp() throws IOException {
23 | 		classContent = new File(JavascriptASTExtractorTest.class
24 | 				.getClassLoader().getResource("SampleJavascript.txt").getFile());
25 | 		classContent2 = new File(JavascriptASTExtractorTest.class
26 | 				.getClassLoader().getResource("SampleJavascript2.txt")
27 | 				.getFile());
28 | 	}
29 | 
30 | 	@Test
31 | 	public void testClassBindings() throws IOException {
32 | 		final JavascriptExactVariableBindingsExtractor jbe = new JavascriptExactVariableBindingsExtractor();
33 | 		final List<TokenNameBinding> classVariableBindings = jbe
34 | 				.getNameBindings(classContent);
35 | 		BindingTester.checkAllBindings(classVariableBindings);
36 | 		// assertEquals(classVariableBindings.size(), 1);
37 | 
38 | 		final List<TokenNameBinding> classVariableBindings2 = jbe
39 | 				.getNameBindings(classContent2);
40 | 
41 | 		// assertEquals(classVariableBindings2.size(), 3);
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/src/test/java/codemining/languagetools/TokenizerUtilsTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  */
 4 | package codemining.languagetools;
 5 | 
 6 | import static org.junit.Assert.assertEquals;
 7 | 
 8 | import java.io.File;
 9 | import java.io.IOException;
10 | 
11 | import org.apache.commons.io.FileUtils;
12 | import org.junit.Before;
13 | import org.junit.Test;
14 | 
15 | import codemining.java.codeutils.JavaAstExtractorTest;
16 | 
17 | /**
18 |  * @author Miltos Allamanis <m.allamanis@ed.ac.uk>
19 |  * 
20 |  */
21 | public class TokenizerUtilsTest {
22 | 
23 | 	private String classContent;
24 | 
25 | 	@Before
26 | 	public void setUp() throws IOException {
27 | 		classContent = FileUtils.readFileToString(new File(
28 | 				JavaAstExtractorTest.class.getClassLoader()
29 | 						.getResource("SampleClass.txt").getFile()));
30 | 	}
31 | 
32 | 	@Test
33 | 	public void testColumn() {
34 | 		assertEquals(TokenizerUtils.getColumnOfPosition(classContent, 970), 29);
35 | 		assertEquals(TokenizerUtils.getColumnOfPosition(classContent, 980), 13);
36 | 		assertEquals(TokenizerUtils.getColumnOfPosition(classContent, 1565), 17);
37 | 	}
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/src/test/resources/SampleClass.txt:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2000, 2007 IBM Corporation and others.
 3 |  * All rights reserved. This program and the accompanying materials 
 4 |  * are made available under the terms of the Eclipse Public License v1.0
 5 |  * which accompanies this distribution, and is available at
 6 |  * http://www.eclipse.org/legal/epl-v10.html
 7 |  * 
 8 |  * Contributors:
 9 |  *     IBM Corporation - initial API and implementation
10 |  *******************************************************************************/
11 | package org.eclipse.jdt.jeview;
12 | 
13 | 
14 | import org.eclipse.core.runtime.Assert;
15 | 
16 | import org.eclipse.ui.IEditorInput;
17 | import org.eclipse.ui.IEditorPart;
18 | import org.eclipse.ui.IWorkbenchPage;
19 | import org.eclipse.ui.IWorkbenchWindow;
20 | import org.eclipse.ui.texteditor.ITextEditor;
21 | 
22 | import org.eclipse.jdt.core.IJavaElement;
23 | import org.eclipse.jdt.core.IOpenable;
24 | 
25 | import org.eclipse.jdt.ui.JavaUI;
26 | 
27 | /**
28 |  *
29 |  */
30 | public class EditorUtility {
31 | 	private EditorUtility() {
32 | 		super();
33 | 	}
34 | 
35 | 	public static IEditorPart getActiveEditor() {
36 | 		IWorkbenchWindow window= JEViewPlugin.getDefault().getWorkbench().getActiveWorkbenchWindow();
37 | 		if (window != null) {
38 | 			IWorkbenchPage page= window.getActivePage();
39 | 			if (page != null) {
40 | 				return page.getActiveEditor();
41 | 			}
42 | 		}
43 | 		return null;
44 | 	}
45 | 	
46 | 	
47 | 	public static IOpenable getJavaInput(IEditorPart part) {
48 | 		IEditorInput editorInput= part.getEditorInput();
49 | 		if (editorInput != null) {
50 | 			IJavaElement input= javaUIgetEditorInputJavaElement(editorInput);
51 | 			if (input instanceof IOpenable) {
52 | 				return (IOpenable) input;
53 | 			}
54 | 		}
55 | 		return null;	
56 | 	}
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/src/test/resources/SampleClass2.txt:
--------------------------------------------------------------------------------
 1 | package junit.framework;
 2 | 
 3 | import java.util.List;
 4 | import java.util.ArrayList;
 5 | 
 6 | /**
 7 |  * This is just a piece of code from JUnit. Not necessarily working.
 8 |  * A <code>TestResult</code> collects the results of executing
 9 |  * a test case. It is an instance of the Collecting Parameter pattern.
10 |  * The test framework distinguishes between <i>failures</i> and <i>errors</i>.
11 |  * A failure is anticipated and checked for with assertions. Errors are
12 |  * unanticipated problems like an {@link ArrayIndexOutOfBoundsException}.
13 |  *
14 |  * @see Test
15 |  */
16 | public class TestResult extends Object {
17 |     protected List<TestFailure> fFailures;
18 | 
19 |     public TestResult() {
20 |         fFailures = new ArrayList<TestFailure>();
21 |         fErrors = new ArrayList<TestFailure>();
22 |         fListeners = new ArrayList<TestListener>();
23 |         fRunTests = 0;
24 |         fStop = false;
25 |     }
26 | 
27 |     /**
28 |      * Adds an error to the list of errors. The passed in exception
29 |      * caused the error.
30 |      */
31 |     public synchronized void addError(Test test, Throwable t) {
32 |         fErrors.add(new TestFailure(test, t));
33 |         for (TestListener each : cloneListeners()) {
34 |             each.addError(test, t);
35 |         }
36 |     }
37 | 
38 | 
39 |     /**
40 |      * Runs a TestCase.
41 |      */
42 |     public void runProtected(final Test test, Protectable p) {
43 |         try {
44 |             p.protect();
45 |         } catch (AssertionFailedError e) {
46 |             addFailure(test, e);
47 |         } catch (ThreadDeath e) { // don't catch ThreadDeath by accident
48 |             throw e;
49 |         } catch (Throwable e) {
50 |             addError(test, e);
51 |         }
52 |     }
53 | 
54 |     /**
55 |      * Checks whether the test run should stop
56 |      */
57 |     public synchronized boolean shouldStop() {
58 |         return fStop;
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/test/resources/SampleClass2WhitespaceTokens.txt:
--------------------------------------------------------------------------------
  1 | <SENTENCE_START>
  2 | package
  3 | WS_s1t0
  4 | IDENTIFIER
  5 | .
  6 | IDENTIFIER
  7 | ;
  8 | WS_INDENTs0t0n2
  9 | import
 10 | WS_s1t0
 11 | IDENTIFIER
 12 | .
 13 | IDENTIFIER
 14 | .
 15 | IDENTIFIER
 16 | ;
 17 | WS_INDENTs0t0n1
 18 | import
 19 | WS_s1t0
 20 | IDENTIFIER
 21 | .
 22 | IDENTIFIER
 23 | .
 24 | IDENTIFIER
 25 | ;
 26 | WS_INDENTs0t0n2
 27 | COMMENT_JAVADOC
 28 | WS_INDENTs0t0n1
 29 | public
 30 | WS_s1t0
 31 | class
 32 | WS_s1t0
 33 | IDENTIFIER
 34 | WS_s1t0
 35 | extends
 36 | WS_s1t0
 37 | IDENTIFIER
 38 | WS_s1t0
 39 | {
 40 | WS_INDENTs4t0n1
 41 | protected
 42 | WS_s1t0
 43 | IDENTIFIER
 44 | <
 45 | IDENTIFIER
 46 | >
 47 | WS_s1t0
 48 | IDENTIFIER
 49 | ;
 50 | WS_INDENTs0t0n2
 51 | public
 52 | WS_s1t0
 53 | IDENTIFIER
 54 | (
 55 | )
 56 | WS_s1t0
 57 | {
 58 | WS_INDENTs4t0n1
 59 | IDENTIFIER
 60 | WS_s1t0
 61 | =
 62 | WS_s1t0
 63 | new
 64 | WS_s1t0
 65 | IDENTIFIER
 66 | <
 67 | IDENTIFIER
 68 | >
 69 | (
 70 | )
 71 | ;
 72 | WS_INDENTs0t0n1
 73 | IDENTIFIER
 74 | WS_s1t0
 75 | =
 76 | WS_s1t0
 77 | new
 78 | WS_s1t0
 79 | IDENTIFIER
 80 | <
 81 | IDENTIFIER
 82 | >
 83 | (
 84 | )
 85 | ;
 86 | WS_INDENTs0t0n1
 87 | IDENTIFIER
 88 | WS_s1t0
 89 | =
 90 | WS_s1t0
 91 | new
 92 | WS_s1t0
 93 | IDENTIFIER
 94 | <
 95 | IDENTIFIER
 96 | >
 97 | (
 98 | )
 99 | ;
100 | WS_INDENTs0t0n1
101 | IDENTIFIER
102 | WS_s1t0
103 | =
104 | WS_s1t0
105 | LITERAL
106 | ;
107 | WS_INDENTs0t0n1
108 | IDENTIFIER
109 | WS_s1t0
110 | =
111 | WS_s1t0
112 | false
113 | ;
114 | WS_DEDENTs4t0n1
115 | }
116 | WS_INDENTs0t0n2
117 | COMMENT_JAVADOC
118 | WS_INDENTs0t0n1
119 | public
120 | WS_s1t0
121 | synchronized
122 | WS_s1t0
123 | void
124 | WS_s1t0
125 | IDENTIFIER
126 | (
127 | IDENTIFIER
128 | WS_s1t0
129 | IDENTIFIER
130 | ,
131 | WS_s1t0
132 | IDENTIFIER
133 | WS_s1t0
134 | IDENTIFIER
135 | )
136 | WS_s1t0
137 | {
138 | WS_INDENTs4t0n1
139 | IDENTIFIER
140 | .
141 | IDENTIFIER
142 | (
143 | new
144 | WS_s1t0
145 | IDENTIFIER
146 | (
147 | IDENTIFIER
148 | ,
149 | WS_s1t0
150 | IDENTIFIER
151 | )
152 | )
153 | ;
154 | WS_INDENTs0t0n1
155 | for
156 | WS_s1t0
157 | (
158 | IDENTIFIER
159 | WS_s1t0
160 | IDENTIFIER
161 | WS_s1t0
162 | :
163 | WS_s1t0
164 | IDENTIFIER
165 | (
166 | )
167 | )
168 | WS_s1t0
169 | {
170 | WS_INDENTs4t0n1
171 | IDENTIFIER
172 | .
173 | IDENTIFIER
174 | (
175 | IDENTIFIER
176 | ,
177 | WS_s1t0
178 | IDENTIFIER
179 | )
180 | ;
181 | WS_DEDENTs4t0n1
182 | }
183 | WS_DEDENTs4t0n1
184 | }
185 | WS_INDENTs0t0n3
186 | COMMENT_JAVADOC
187 | WS_INDENTs0t0n1
188 | public
189 | WS_s1t0
190 | void
191 | WS_s1t0
192 | IDENTIFIER
193 | (
194 | final
195 | WS_s1t0
196 | IDENTIFIER
197 | WS_s1t0
198 | IDENTIFIER
199 | ,
200 | WS_s1t0
201 | IDENTIFIER
202 | WS_s1t0
203 | IDENTIFIER
204 | )
205 | WS_s1t0
206 | {
207 | WS_INDENTs4t0n1
208 | try
209 | WS_s1t0
210 | {
211 | WS_INDENTs4t0n1
212 | IDENTIFIER
213 | .
214 | IDENTIFIER
215 | (
216 | )
217 | ;
218 | WS_DEDENTs4t0n1
219 | }
220 | WS_s1t0
221 | catch
222 | WS_s1t0
223 | (
224 | IDENTIFIER
225 | WS_s1t0
226 | IDENTIFIER
227 | )
228 | WS_s1t0
229 | {
230 | WS_INDENTs4t0n1
231 | IDENTIFIER
232 | (
233 | IDENTIFIER
234 | ,
235 | WS_s1t0
236 | IDENTIFIER
237 | )
238 | ;
239 | WS_DEDENTs4t0n1
240 | }
241 | WS_s1t0
242 | catch
243 | WS_s1t0
244 | (
245 | IDENTIFIER
246 | WS_s1t0
247 | IDENTIFIER
248 | )
249 | WS_s1t0
250 | {
251 | WS_s1t0
252 | COMMENT_LINE
253 | WS_INDENTs4t0n1
254 | throw
255 | WS_s1t0
256 | IDENTIFIER
257 | ;
258 | WS_DEDENTs4t0n1
259 | }
260 | WS_s1t0
261 | catch
262 | WS_s1t0
263 | (
264 | IDENTIFIER
265 | WS_s1t0
266 | IDENTIFIER
267 | )
268 | WS_s1t0
269 | {
270 | WS_INDENTs4t0n1
271 | IDENTIFIER
272 | (
273 | IDENTIFIER
274 | ,
275 | WS_s1t0
276 | IDENTIFIER
277 | )
278 | ;
279 | WS_DEDENTs4t0n1
280 | }
281 | WS_DEDENTs4t0n1
282 | }
283 | WS_INDENTs0t0n2
284 | COMMENT_JAVADOC
285 | WS_INDENTs0t0n1
286 | public
287 | WS_s1t0
288 | synchronized
289 | WS_s1t0
290 | boolean
291 | WS_s1t0
292 | IDENTIFIER
293 | (
294 | )
295 | WS_s1t0
296 | {
297 | WS_INDENTs4t0n1
298 | return
299 | WS_s1t0
300 | IDENTIFIER
301 | ;
302 | WS_DEDENTs4t0n1
303 | }
304 | WS_DEDENTs4t0n1
305 | }
306 | WS_INDENTs0t0n1
307 | <SENTENCE_END/>
308 | 


--------------------------------------------------------------------------------
/src/test/resources/SampleClass3.txt:
--------------------------------------------------------------------------------
 1 | package my.pack;
 2 | 
 3 | import java.util.List;
 4 | import java.util.Map;
 5 | import java.util.HashMap;
 6 | import java.io.IOException;
 7 | import your.pack.Blah;
 8 | import your.pack2.ParamType;
 9 | import com.google.common.collect.Lists;
10 | 
11 | private class SomeName {
12 | 	SomeName anInstance;
13 | 	
14 | 	int[] arrayOfInt = new int[10];
15 | 	
16 | 	long aNumber = 7;
17 | 	
18 | 	Blah singleObject = new Blah();
19 | 	
20 | 	Blah[] arrayOfObjects = null;
21 | 	
22 | 	List<Integer> listOfInt = Lists.newArrayList();
23 | 	
24 | 	Map<Blah,Map<SomeNameInPkg,List<Double>>> complexParamType;
25 | 	
26 | 	ParamType<Blah> paraType = new ParamType<Blah>(listOfInt);
27 | 	
28 | 	ParamType<? extends Blah> lowerBoundPa = null;
29 | 	
30 | 	ParamType<? super Blah> upperBoundPa = null;
31 | 	
32 | 	ParamType<? super List<? super Blah>> upperBoundPa2 = null;
33 | 	
34 | 	void doSomething() {
35 | 		try {
36 | 			justDoIt();
37 | 		} catch (IOException | ArithmeticException e) {
38 | 			e.printStackTrace();
39 | 		}
40 | 	}
41 | 
42 | }


--------------------------------------------------------------------------------
/src/test/resources/SampleJavascript.txt:
--------------------------------------------------------------------------------
1 | function factorial(n) {
2 |     if (n === 0) {
3 |         return 1;
4 |     }
5 |     return n * factorial(n - 1);
6 | }


--------------------------------------------------------------------------------
/src/test/resources/SampleJavascript2.txt:
--------------------------------------------------------------------------------
1 | var sum = function() {
2 |     var i, x = 0;
3 |     for (i = 0; i < arguments.length; ++i) {
4 |         x += arguments[i];
5 |     }
6 |     return x;
7 | }
8 | sum(1, 2, 3); // returns 6


--------------------------------------------------------------------------------
/src/test/resources/SampleJavascript3.txt:
--------------------------------------------------------------------------------
 1 | /* Finds the lowest common multiple (LCM) of two numbers */
 2 | function LCMCalculator(x, y) { // constructor function
 3 |     var checkInt = function (x) { // inner function
 4 |         if (x % 1 !== 0) {
 5 |             throw new TypeError(x + " is not an integer"); // throw an exception
 6 |         }
 7 |         return x;
 8 |     };
 9 |     this.a = checkInt(x)
10 |     //   semicolons   ^^^^  are optional, a newline is enough
11 |     this.b = checkInt(y);
12 | }
13 | // The prototype of object instances created by a constructor is
14 | // that constructor's "prototype" property.
15 | LCMCalculator.prototype = { // object literal
16 |     constructor: LCMCalculator, // when reassigning a prototype, set the constructor property appropriately
17 |     gcd: function () { // method that calculates the greatest common divisor
18 |         // Euclidean algorithm:
19 |         var a = Math.abs(this.a), b = Math.abs(this.b), t;
20 |         if (a < b) {
21 |             // swap variables
22 |             t = b;
23 |             b = a;
24 |             a = t;
25 |         }
26 |         while (b !== 0) {
27 |             t = b;
28 |             b = a % b;
29 |             a = t;
30 |         }
31 |         // Only need to calculate GCD once, so "redefine" this method.
32 |         // (Actually not redefinition—it's defined on the instance itself,
33 |         // so that this.gcd refers to this "redefinition" instead of LCMCalculator.prototype.gcd.)
34 |         // Also, 'gcd' === "gcd", this['gcd'] === this.gcd
35 |         this['gcd'] = function () {
36 |             return a;
37 |         };
38 |         return a;
39 |     },
40 |     // Object property names can be specified by strings delimited by double (") or single (') quotes.
41 |     lcm : function () {
42 |         // Variable names don't collide with object properties, e.g. |lcm| is not |this.lcm|.
43 |         // not using |this.a * this.b| to avoid FP precision issues
44 |         var lcm = this.a / this.gcd() * this.b;
45 |         // Only need to calculate lcm once, so "redefine" this method.
46 |         this.lcm = function () {
47 |             return lcm;
48 |         };
49 |         return lcm;
50 |     },
51 |     toString: function () {
52 |         return "LCMCalculator: a = " + this.a + ", b = " + this.b;
53 |     }
54 | };
55 |  
56 | // Define generic output function; this implementation only works for web browsers
57 | function output(x) {
58 |     document.body.appendChild(document.createTextNode(x));
59 |     document.body.appendChild(document.createElement('br'));
60 | }
61 |  
62 | // Note: Array's map() and forEach() are defined in JavaScript 1.6.
63 | // They are used here to demonstrate JavaScript's inherent functional nature.
64 | [[25, 55], [21, 56], [22, 58], [28, 56]].map(function (pair) { // array literal + mapping function
65 |     return new LCMCalculator(pair[0], pair[1]);
66 | }).sort(function (a, b) { // sort with this comparative function
67 |     return a.lcm() - b.lcm();
68 | }).forEach(function (obj) {
69 |     output(obj + ", gcd = " + obj.gcd() + ", lcm = " + obj.lcm());
70 | });
71 | 


--------------------------------------------------------------------------------
/src/test/resources/SampleMethod.txt:
--------------------------------------------------------------------------------
 1 | 	public static IOpenable getJavaInput(IEditorPart part) {
 2 | 		IEditorInput editorInput= part.getEditorInput();
 3 | 		if (editorInput != null) {
 4 | 			IJavaElement input= javaUIgetEditorInputJavaElement(editorInput);
 5 | 			if (input instanceof IOpenable) {
 6 | 				return (IOpenable) input;
 7 | 			}
 8 | 		}
 9 | 		return null;	
10 | 	}
11 | 


--------------------------------------------------------------------------------