├── .classpath ├── .gitignore ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── LICENSE ├── README.md ├── pom.xml └── src ├── main └── java │ └── codemining │ ├── cpp │ └── codeutils │ │ ├── AbstractCdtASTAnnotatedTokenizer.java │ │ ├── AbstractCdtAstExtractor.java │ │ ├── CASTAnnotatedTokenizer.java │ │ ├── CAstExtractor.java │ │ ├── CDTTokenizer.java │ │ ├── CppASTAnnotatedTokenizer.java │ │ ├── CppASTExtractor.java │ │ ├── CppTokenTypeTokenizer.java │ │ ├── CppWhitespaceTokenizer.java │ │ └── package-info.java │ ├── java │ ├── codedata │ │ ├── MethodRetriever.java │ │ ├── PackageInfoExtractor.java │ │ ├── metrics │ │ │ ├── CyclomaticCalculator.java │ │ │ ├── IFileMetricRetriever.java │ │ │ ├── JavaMethodClassCounter.java │ │ │ ├── LinesOfCodeMetric.java │ │ │ └── package-info.java │ │ └── package-info.java │ ├── codeutils │ │ ├── IdentifierPerType.java │ │ ├── JavaASTExtractor.java │ │ ├── JavaApproximateTypeInferencer.java │ │ ├── JavaTypeHierarchyExtractor.java │ │ ├── MethodExtractor.java │ │ ├── MethodUtils.java │ │ ├── MethodsInClass.java │ │ ├── ProjectTypeInformation.java │ │ ├── UsagePointExtractor.java │ │ ├── binding │ │ │ ├── AbstractJavaNameBindingsExtractor.java │ │ │ ├── JavaApproximateVariableBindingExtractor.java │ │ │ ├── JavaExactVariableBindingsExtractor.java │ │ │ ├── JavaFeatureExtractor.java │ │ │ ├── JavaMethodDeclarationBindingExtractor.java │ │ │ ├── JavaMethodInvocationBindingExtractor.java │ │ │ ├── JavaTypeDeclarationBindingExtractor.java │ │ │ ├── JavaVariableFeatureExtractor.java │ │ │ └── tui │ │ │ │ ├── JavaBindingsPerFeatureTypeToJson.java │ │ │ │ └── JavaBindingsToJson.java │ │ ├── package-info.java │ │ └── scopes │ │ │ ├── AllScopeExtractor.java │ │ │ ├── MethodScopeExtractor.java │ │ │ ├── ScopedIdentifierRenaming.java │ │ │ ├── ScopesTUI.java │ │ │ ├── TypenameScopeExtractor.java │ │ │ ├── VariableScopeExtractor.java │ │ │ └── package-info.java │ └── tokenizers │ │ ├── JavaASTAnnotatedTokenizer.java │ │ ├── JavaIdentifierAnnotatedTokenizer.java │ │ ├── JavaTokenTypeTokenizer.java │ │ ├── JavaTokenizer.java │ │ ├── JavaTokenizerSomeTokens.java │ │ ├── JavaTypeTokenizer.java │ │ ├── JavaWhitespaceTokenizer.java │ │ └── JavaWidthAnnotatedWhitespaceTokenizer.java │ ├── js │ ├── codedata │ │ └── metrics │ │ │ ├── IJavascriptFileMetricRetriever.java │ │ │ ├── JavascriptCyclomaticCalculator.java │ │ │ └── JavascriptFunctionCounter.java │ └── codeutils │ │ ├── FunctionRetriever.java │ │ ├── JavascriptASTExtractor.java │ │ ├── JavascriptTokenizer.java │ │ ├── NodeFinder.java │ │ ├── binding │ │ ├── AbstractJavascriptNameBindingsExtractor.java │ │ ├── JavascriptApproximateVariableBindingExtractor.java │ │ └── JavascriptExactVariableBindingsExtractor.java │ │ └── package-info.java │ ├── langs │ └── codeutils │ │ ├── AbstractJygmentsTokenizer.java │ │ ├── CodeTokenizer.java │ │ ├── TokenTypeTokenizer.java │ │ └── package-info.java │ ├── languagetools │ ├── ClassHierarchy.java │ ├── CodePrinter.java │ ├── ColoredToken.java │ ├── FormattingTokenizer.java │ ├── IAstAnnotatedTokenizer.java │ ├── IFormattingTokenizer.java │ ├── IScopeExtractor.java │ ├── ITokenizer.java │ ├── ParseType.java │ ├── Scope.java │ ├── TokenizerUtils.java │ ├── bindings │ │ ├── AbstractNameBindingsExtractor.java │ │ ├── ResolvedSourceCode.java │ │ └── TokenNameBinding.java │ ├── package-info.java │ ├── tokenizers │ │ └── whitespace │ │ │ ├── WhitespaceToTokenConverter.java │ │ │ └── WhitespaceTokenConverter.java │ └── tui │ │ ├── DistinctTokenCount.java │ │ ├── TokenCounter.java │ │ ├── TokenizerTUI.java │ │ └── package-info.java │ └── python │ └── codeutils │ ├── AbstractPythonTokenizer.java │ ├── Python27Tokenizer.java │ └── Python30Tokenizer.java └── test ├── java └── codemining │ ├── java │ └── codeutils │ │ ├── JavaApproximateTypeInferencerTest.java │ │ ├── JavaAstExtractorTest.java │ │ ├── JavaWhitespaceTokenizerTest.java │ │ ├── TokenizeJavaCodeTest.java │ │ └── binding │ │ ├── BindingTester.java │ │ ├── JavaApproximateVariableBindingExtractorTest.java │ │ ├── JavaExactVariableBindingsExtractorTest.java │ │ ├── JavaMethodBindingExtractorTest.java │ │ └── JavaTypeBindingExtractorTest.java │ ├── js │ └── codeutils │ │ ├── JavascriptASTExtractorTest.java │ │ ├── TokenizeJavascriptCodeTest.java │ │ └── binding │ │ ├── JavascriptApproximateVariableBindingExtractorTest.java │ │ └── JavascriptExactVariableBindingsExtractorTest.java │ └── languagetools │ └── TokenizerUtilsTest.java └── resources ├── SampleClass.txt ├── SampleClass2.txt ├── SampleClass2WhitespaceTokens.txt ├── SampleClass3.txt ├── SampleJavascript.txt ├── SampleJavascript2.txt ├── SampleJavascript3.txt └── SampleMethod.txt /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /target 3 | /target/ 4 | /target/ 5 | /target/ 6 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | codemining-core 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.8 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 12 | org.eclipse.jdt.core.compiler.source=1.8 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, School of Informatics, University of Edinburgh 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of [project] nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | codemining-core 2 | =============== 3 | A set of tools for extracting tokens and ASTs from code. 4 | 5 | codemining-core depends on the [codemining-utils](https://github.com/mast-group/codemining-utils) package 6 | 7 | codemining-core is released under a BSD license. 8 | -------------------------------------------------------------------------------- /src/main/java/codemining/cpp/codeutils/AbstractCdtAstExtractor.java: -------------------------------------------------------------------------------- 1 | package codemining.cpp.codeutils; 2 | 3 | import java.util.Map; 4 | 5 | import org.eclipse.cdt.core.dom.ast.IASTTranslationUnit; 6 | import org.eclipse.cdt.core.index.IIndex; 7 | import org.eclipse.cdt.core.model.ILanguage; 8 | import org.eclipse.cdt.core.parser.DefaultLogService; 9 | import org.eclipse.cdt.core.parser.FileContent; 10 | import org.eclipse.cdt.core.parser.IParserLogService; 11 | import org.eclipse.cdt.core.parser.IScannerInfo; 12 | import org.eclipse.cdt.core.parser.IncludeFileContentProvider; 13 | import org.eclipse.cdt.core.parser.ScannerInfo; 14 | import org.eclipse.core.runtime.CoreException; 15 | 16 | import com.google.common.collect.Maps; 17 | 18 | /** 19 | * Inteface for all classes that are able to retrieve a CDT-compatible AST. 20 | * Macros and inclusions are not resolved, unless in the same file. 21 | * 22 | * @author Miltos Allamanis 23 | * 24 | */ 25 | public abstract class AbstractCdtAstExtractor { 26 | 27 | /** 28 | * Return an AST for the following CDT-compatible code; 29 | * 30 | * @param code 31 | * @return 32 | * @throws CoreException 33 | */ 34 | public final IASTTranslationUnit getAST(final char[] code, 35 | final String baseIncludePath) throws CoreException { 36 | final FileContent fc = FileContent.create(baseIncludePath, code); 37 | final Map macroDefinitions = Maps.newHashMap(); 38 | final String[] includeSearchPaths = new String[0]; 39 | final IScannerInfo si = new ScannerInfo(macroDefinitions, 40 | includeSearchPaths); 41 | final IncludeFileContentProvider ifcp = IncludeFileContentProvider 42 | .getEmptyFilesProvider(); 43 | final IIndex idx = null; 44 | final int options = ILanguage.OPTION_IS_SOURCE_UNIT; 45 | final IParserLogService log = new DefaultLogService(); 46 | return getAstForLanguage(fc, si, ifcp, idx, options, log); 47 | } 48 | 49 | /** 50 | * To be overrided for each language. 51 | * 52 | * @param fc 53 | * @param si 54 | * @param ifcp 55 | * @param idx 56 | * @param options 57 | * @param log 58 | * @return 59 | * @throws CoreException 60 | */ 61 | protected abstract IASTTranslationUnit getAstForLanguage(FileContent fc, 62 | IScannerInfo si, IncludeFileContentProvider ifcp, IIndex idx, 63 | int options, IParserLogService log) throws CoreException; 64 | 65 | } -------------------------------------------------------------------------------- /src/main/java/codemining/cpp/codeutils/CASTAnnotatedTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.cpp.codeutils; 5 | 6 | import codemining.languagetools.ITokenizer; 7 | 8 | /** 9 | * A C AST annotated tokenizer. 10 | * 11 | * @author Miltos Allamanis 12 | * 13 | */ 14 | public class CASTAnnotatedTokenizer extends AbstractCdtASTAnnotatedTokenizer { 15 | 16 | private static final long serialVersionUID = 6395574519739472995L; 17 | 18 | /** 19 | * @param extractorClass 20 | */ 21 | public CASTAnnotatedTokenizer() { 22 | super(CAstExtractor.class, ""); 23 | } 24 | 25 | /** 26 | * @param base 27 | * @param extractorClass 28 | */ 29 | public CASTAnnotatedTokenizer(final ITokenizer base) { 30 | super(base, CAstExtractor.class, ""); 31 | } 32 | 33 | public CASTAnnotatedTokenizer(final ITokenizer base, 34 | final String codeBasePath) { 35 | super(base, CAstExtractor.class, codeBasePath); 36 | } 37 | 38 | public CASTAnnotatedTokenizer(final String codeBasePath) { 39 | super(CAstExtractor.class, codeBasePath); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/codemining/cpp/codeutils/CAstExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.cpp.codeutils; 5 | 6 | import org.eclipse.cdt.core.dom.ast.IASTTranslationUnit; 7 | import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage; 8 | import org.eclipse.cdt.core.index.IIndex; 9 | import org.eclipse.cdt.core.parser.FileContent; 10 | import org.eclipse.cdt.core.parser.IParserLogService; 11 | import org.eclipse.cdt.core.parser.IScannerInfo; 12 | import org.eclipse.cdt.core.parser.IncludeFileContentProvider; 13 | import org.eclipse.core.runtime.CoreException; 14 | 15 | /** 16 | * A C AST extractor. 17 | * 18 | * @author Miltos Allamanis 19 | * 20 | */ 21 | public class CAstExtractor extends AbstractCdtAstExtractor { 22 | 23 | @Override 24 | protected IASTTranslationUnit getAstForLanguage(final FileContent fc, 25 | final IScannerInfo si, final IncludeFileContentProvider ifcp, 26 | final IIndex idx, final int options, final IParserLogService log) 27 | throws CoreException { 28 | return GCCLanguage.getDefault().getASTTranslationUnit(fc, si, ifcp, 29 | idx, options, log); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/codemining/cpp/codeutils/CDTTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.cpp.codeutils; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Collection; 9 | import java.util.List; 10 | import java.util.SortedMap; 11 | 12 | import org.apache.commons.io.FileUtils; 13 | import org.apache.commons.io.filefilter.AbstractFileFilter; 14 | import org.apache.commons.io.filefilter.RegexFileFilter; 15 | import org.apache.commons.lang.NotImplementedException; 16 | import org.eclipse.cdt.internal.formatter.scanner.Scanner; 17 | import org.eclipse.cdt.internal.formatter.scanner.Token; 18 | 19 | import codemining.languagetools.ITokenizer; 20 | 21 | import com.google.common.collect.Lists; 22 | import com.google.common.collect.Maps; 23 | 24 | /** 25 | * A C/C++ tokenizer provided by the Eclipse CDT. 26 | * 27 | * @author Miltos Allamanis 28 | * 29 | */ 30 | public class CDTTokenizer implements ITokenizer { 31 | 32 | private static final long serialVersionUID = 3954406410244227404L; 33 | 34 | /** 35 | * A filter for the files being tokenized. 36 | */ 37 | public static final RegexFileFilter C_CODE_TOKENIZER = new RegexFileFilter( 38 | ".*\\.(c|cc|cpp|h)$"); 39 | 40 | /* 41 | * (non-Javadoc) 42 | * 43 | * @see codemining.languagetools.ITokenizer#fullTokenListWithPos(char[]) 44 | */ 45 | @Override 46 | public SortedMap fullTokenListWithPos(final char[] code) { 47 | final SortedMap tokens = Maps.newTreeMap(); 48 | tokens.put(-1, new FullToken(SENTENCE_START, SENTENCE_START)); 49 | tokens.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END, SENTENCE_END)); 50 | 51 | final Scanner scanner = new Scanner(); 52 | scanner.setSource(code); 53 | do { 54 | final int token = scanner.getNextToken(); 55 | if (token == Token.tWHITESPACE) { 56 | continue; 57 | } 58 | final String nxtToken = new String(scanner.getCurrentTokenSource()); 59 | tokens.put(scanner.getCurrentPosition(), new FullToken(nxtToken, 60 | Integer.toString(token))); 61 | } while (!scanner.atEnd()); 62 | return tokens; 63 | } 64 | 65 | /* 66 | * (non-Javadoc) 67 | * 68 | * @see codemining.languagetools.ITokenizer#getFileFilter() 69 | */ 70 | @Override 71 | public AbstractFileFilter getFileFilter() { 72 | return C_CODE_TOKENIZER; 73 | } 74 | 75 | /* 76 | * (non-Javadoc) 77 | * 78 | * @see codemining.languagetools.ITokenizer#getIdentifierType() 79 | */ 80 | @Override 81 | public String getIdentifierType() { 82 | return Integer.toString(Token.tIDENTIFIER); 83 | } 84 | 85 | @Override 86 | public Collection getKeywordTypes() { 87 | throw new NotImplementedException(); 88 | } 89 | 90 | @Override 91 | public Collection getLiteralTypes() { 92 | throw new NotImplementedException(); 93 | } 94 | 95 | /* 96 | * (non-Javadoc) 97 | * 98 | * @see 99 | * codemining.languagetools.ITokenizer#getTokenFromString(java.lang.String) 100 | */ 101 | @Override 102 | public FullToken getTokenFromString(final String token) { 103 | if (token.equals(ITokenizer.SENTENCE_START)) { 104 | return new FullToken(ITokenizer.SENTENCE_START, 105 | ITokenizer.SENTENCE_START); 106 | } 107 | 108 | if (token.equals(ITokenizer.SENTENCE_END)) { 109 | return new FullToken(ITokenizer.SENTENCE_END, 110 | ITokenizer.SENTENCE_END); 111 | } 112 | return getTokenListFromCode(token.toCharArray()).get(1); 113 | } 114 | 115 | /* 116 | * (non-Javadoc) 117 | * 118 | * @see codemining.languagetools.ITokenizer#getTokenListFromCode(char[]) 119 | */ 120 | @Override 121 | public List getTokenListFromCode(final char[] code) { 122 | final List tokens = Lists.newArrayList(); 123 | tokens.add(new FullToken(SENTENCE_START, SENTENCE_START)); 124 | 125 | final Scanner scanner = new Scanner(); 126 | scanner.setSource(code); 127 | 128 | do { 129 | final int token = scanner.getNextToken(); 130 | if (token == Token.tWHITESPACE) { 131 | continue; 132 | } 133 | final String nxtToken = new String(scanner.getCurrentTokenSource()); 134 | tokens.add(new FullToken(nxtToken, Integer.toString(token))); 135 | } while (!scanner.atEnd()); 136 | 137 | tokens.add(new FullToken(SENTENCE_END, SENTENCE_END)); 138 | return tokens; 139 | } 140 | 141 | @Override 142 | public List getTokenListFromCode(final File codeFile) 143 | throws IOException { 144 | return getTokenListFromCode(FileUtils.readFileToString(codeFile) 145 | .toCharArray()); 146 | } 147 | 148 | /* 149 | * (non-Javadoc) 150 | * 151 | * @see codemining.languagetools.ITokenizer#tokenListFromCode(char[]) 152 | */ 153 | @Override 154 | public List tokenListFromCode(final char[] code) { 155 | final List tokens = Lists.newArrayList(); 156 | tokens.add(SENTENCE_START); 157 | 158 | final Scanner scanner = new Scanner(); 159 | scanner.setSource(code); 160 | 161 | do { 162 | final int token = scanner.getNextToken(); 163 | if (token == Token.tWHITESPACE) { 164 | continue; 165 | } 166 | final String nxtToken = new String(scanner.getCurrentTokenSource()); 167 | tokens.add(nxtToken); 168 | } while (!scanner.atEnd()); 169 | 170 | tokens.add(SENTENCE_END); 171 | return tokens; 172 | } 173 | 174 | @Override 175 | public List tokenListFromCode(final File codeFile) 176 | throws IOException { 177 | return tokenListFromCode(FileUtils.readFileToString(codeFile) 178 | .toCharArray()); 179 | } 180 | 181 | /* 182 | * (non-Javadoc) 183 | * 184 | * @see codemining.languagetools.ITokenizer#tokenListWithPos(char[]) 185 | */ 186 | @Override 187 | public SortedMap tokenListWithPos(final char[] code) { 188 | final SortedMap tokens = Maps.newTreeMap(); 189 | tokens.put(-1, SENTENCE_START); 190 | tokens.put(Integer.MAX_VALUE, SENTENCE_END); 191 | 192 | final Scanner scanner = new Scanner(); 193 | scanner.setSource(code); 194 | do { 195 | final int token = scanner.getNextToken(); 196 | if (token == Token.tWHITESPACE) { 197 | continue; 198 | } 199 | final String nxtToken = new String(scanner.getCurrentTokenSource()); 200 | tokens.put(scanner.getCurrentPosition(), nxtToken); 201 | } while (!scanner.atEnd()); 202 | return tokens; 203 | } 204 | 205 | @Override 206 | public SortedMap tokenListWithPos(final File file) 207 | throws IOException { 208 | return fullTokenListWithPos(FileUtils.readFileToString(file) 209 | .toCharArray()); 210 | } 211 | 212 | } 213 | -------------------------------------------------------------------------------- /src/main/java/codemining/cpp/codeutils/CppASTAnnotatedTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.cpp.codeutils; 5 | 6 | import codemining.languagetools.ITokenizer; 7 | 8 | /** 9 | * A C++ AST Annotated Tokenizer 10 | * 11 | * @author Miltos Allamanis 12 | * 13 | */ 14 | public class CppASTAnnotatedTokenizer extends AbstractCdtASTAnnotatedTokenizer { 15 | 16 | private static final long serialVersionUID = -8016456170070671980L; 17 | 18 | /** 19 | * 20 | */ 21 | public CppASTAnnotatedTokenizer() { 22 | super(CppASTExtractor.class, ""); 23 | } 24 | 25 | /** 26 | * @param base 27 | */ 28 | public CppASTAnnotatedTokenizer(final ITokenizer base) { 29 | super(base, CppASTExtractor.class, ""); 30 | } 31 | 32 | public CppASTAnnotatedTokenizer(final ITokenizer base, 33 | final String codeBasePath) { 34 | super(base, CppASTExtractor.class, codeBasePath); 35 | } 36 | 37 | public CppASTAnnotatedTokenizer(final String codeBasePath) { 38 | super(CppASTExtractor.class, codeBasePath); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/codemining/cpp/codeutils/CppASTExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.cpp.codeutils; 5 | 6 | import org.eclipse.cdt.core.dom.ast.IASTTranslationUnit; 7 | import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage; 8 | import org.eclipse.cdt.core.index.IIndex; 9 | import org.eclipse.cdt.core.parser.FileContent; 10 | import org.eclipse.cdt.core.parser.IParserLogService; 11 | import org.eclipse.cdt.core.parser.IScannerInfo; 12 | import org.eclipse.cdt.core.parser.IncludeFileContentProvider; 13 | import org.eclipse.core.runtime.CoreException; 14 | 15 | /** 16 | * A C++ AST Extractor. 17 | * 18 | * For more look here 19 | * http://www.inf.unibz.it/~gsucci/publications/full%20text/full 20 | * %20text/OSS12.pdf 21 | * 22 | * @author Miltos Allamanis 23 | * 24 | */ 25 | public class CppASTExtractor extends AbstractCdtAstExtractor { 26 | 27 | @Override 28 | protected IASTTranslationUnit getAstForLanguage(final FileContent fc, 29 | final IScannerInfo si, final IncludeFileContentProvider ifcp, 30 | final IIndex idx, final int options, final IParserLogService log) 31 | throws CoreException { 32 | return GPPLanguage.getDefault().getASTTranslationUnit(fc, si, ifcp, 33 | idx, options, log); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/codemining/cpp/codeutils/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Code utilities related to C/C++ 3 | */ 4 | package codemining.cpp.codeutils; -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/MethodRetriever.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codedata; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Map; 9 | 10 | import org.eclipse.jdt.core.dom.ASTNode; 11 | import org.eclipse.jdt.core.dom.ASTVisitor; 12 | import org.eclipse.jdt.core.dom.CompilationUnit; 13 | import org.eclipse.jdt.core.dom.MethodDeclaration; 14 | 15 | import codemining.java.codeutils.JavaASTExtractor; 16 | 17 | import com.google.common.collect.Maps; 18 | 19 | /** 20 | * A utility class that retrieves the methods (as AST Nodes) of a file. 21 | * 22 | * @author Miltos Allamanis 23 | * 24 | */ 25 | public final class MethodRetriever extends ASTVisitor { 26 | 27 | public static Map getMethodNodes(final File file) 28 | throws IOException { 29 | final JavaASTExtractor astExtractor = new JavaASTExtractor(false); 30 | final MethodRetriever m = new MethodRetriever(); 31 | final CompilationUnit cu = astExtractor.getAST(file); 32 | cu.accept(m); 33 | return m.methods; 34 | } 35 | 36 | public static Map getMethodNodes( 37 | final String file) throws Exception { 38 | final JavaASTExtractor astExtractor = new JavaASTExtractor(false); 39 | final MethodRetriever m = new MethodRetriever(); 40 | final ASTNode cu = astExtractor.getBestEffortAstNode(file); 41 | cu.accept(m); 42 | return m.methods; 43 | } 44 | 45 | private final Map methods = Maps.newTreeMap(); 46 | 47 | private MethodRetriever() { 48 | 49 | } 50 | 51 | @Override 52 | public boolean visit(final MethodDeclaration node) { 53 | methods.put(node.getName().toString(), node); 54 | return super.visit(node); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/PackageInfoExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codedata; 5 | 6 | import java.util.Collections; 7 | import java.util.List; 8 | 9 | import org.eclipse.jdt.core.dom.ASTVisitor; 10 | import org.eclipse.jdt.core.dom.CompilationUnit; 11 | import org.eclipse.jdt.core.dom.ImportDeclaration; 12 | import org.eclipse.jdt.core.dom.PackageDeclaration; 13 | 14 | import com.google.common.collect.Lists; 15 | 16 | /** 17 | * Get package information from Java source code. 18 | * 19 | * @author Miltos Allamanis 20 | * 21 | */ 22 | public class PackageInfoExtractor extends ASTVisitor { 23 | 24 | private String packageName; 25 | private final List packageImports; 26 | 27 | private final CompilationUnit cu; 28 | 29 | public PackageInfoExtractor(final CompilationUnit cu) { 30 | this.cu = cu; 31 | packageImports = Lists.newArrayList(); 32 | cu.accept(this); 33 | } 34 | 35 | public List getImports() { 36 | return Collections.unmodifiableList(packageImports); 37 | } 38 | 39 | public String getPackageName() { 40 | return packageName; 41 | } 42 | 43 | @Override 44 | public boolean visit(ImportDeclaration node) { 45 | packageImports.add(node.getName().getFullyQualifiedName()); 46 | return false; 47 | } 48 | 49 | @Override 50 | public boolean visit(PackageDeclaration node) { 51 | packageName = node.getName().toString(); 52 | return false; 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/metrics/CyclomaticCalculator.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codedata.metrics; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.logging.Logger; 6 | 7 | import org.eclipse.jdt.core.dom.ASTNode; 8 | import org.eclipse.jdt.core.dom.ASTVisitor; 9 | import org.eclipse.jdt.core.dom.CatchClause; 10 | import org.eclipse.jdt.core.dom.ConditionalExpression; 11 | import org.eclipse.jdt.core.dom.DoStatement; 12 | import org.eclipse.jdt.core.dom.EnhancedForStatement; 13 | import org.eclipse.jdt.core.dom.ForStatement; 14 | import org.eclipse.jdt.core.dom.IfStatement; 15 | import org.eclipse.jdt.core.dom.MethodDeclaration; 16 | import org.eclipse.jdt.core.dom.SwitchCase; 17 | import org.eclipse.jdt.core.dom.WhileStatement; 18 | 19 | import codemining.java.codeutils.JavaASTExtractor; 20 | 21 | /** 22 | * Compute McCabe's Cyclomatic Complexity. 23 | * 24 | * @author Miltos Allamanis 25 | * 26 | */ 27 | public class CyclomaticCalculator implements IFileMetricRetriever { 28 | 29 | /** 30 | * Visit all "junctions" in an AST and increment complexity. 31 | * 32 | */ 33 | private static class JunctionVisitor extends ASTVisitor { 34 | int complexity = 0; 35 | 36 | @Override 37 | public boolean visit(final CatchClause arg0) { 38 | complexity++; 39 | return super.visit(arg0); 40 | } 41 | 42 | @Override 43 | public boolean visit(final ConditionalExpression arg0) { 44 | complexity++; 45 | return super.visit(arg0); 46 | } 47 | 48 | @Override 49 | public boolean visit(final DoStatement arg0) { 50 | complexity++; 51 | return super.visit(arg0); 52 | } 53 | 54 | @Override 55 | public boolean visit(final EnhancedForStatement arg0) { 56 | complexity++; 57 | return super.visit(arg0); 58 | } 59 | 60 | @Override 61 | public boolean visit(final ForStatement arg0) { 62 | complexity++; 63 | return super.visit(arg0); 64 | } 65 | 66 | @Override 67 | public boolean visit(final IfStatement arg0) { 68 | complexity++; 69 | return super.visit(arg0); 70 | } 71 | 72 | @Override 73 | public boolean visit(final MethodDeclaration arg0) { 74 | /* 75 | * if (isConcrete(arg0)) { complexity.startMethod(); return 76 | * super.visit(arg0); } return false; 77 | */ 78 | complexity++; // TODO: Not exactly true, but we'll use that 79 | return super.visit(arg0); 80 | } 81 | 82 | @Override 83 | public boolean visit(final SwitchCase arg0) { 84 | complexity++; 85 | return super.visit(arg0); 86 | } 87 | 88 | @Override 89 | public boolean visit(final WhileStatement arg0) { 90 | complexity++; 91 | return super.visit(arg0); 92 | } 93 | } 94 | 95 | private static final Logger LOGGER = Logger 96 | .getLogger(CyclomaticCalculator.class.getName()); 97 | 98 | public int getComplexity(final File file) throws IOException { 99 | final JavaASTExtractor ast = new JavaASTExtractor(false); 100 | final JunctionVisitor visitor = new JunctionVisitor(); 101 | ast.getAST(file).accept(visitor); 102 | return visitor.complexity; 103 | } 104 | 105 | @Override 106 | public double getMetricForASTNode(final ASTNode node) { 107 | final JunctionVisitor visitor = new JunctionVisitor(); 108 | node.accept(visitor); 109 | return visitor.complexity; 110 | } 111 | 112 | @Override 113 | public double getMetricForFile(final File file) throws IOException { 114 | return getComplexity(file); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/metrics/IFileMetricRetriever.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codedata.metrics; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | 9 | import org.eclipse.jdt.core.dom.ASTNode; 10 | 11 | /** 12 | * An interface for all the classes that can return a metric 13 | * 14 | * @author Miltos Allamanis 15 | * 16 | */ 17 | public interface IFileMetricRetriever { 18 | double getMetricForASTNode(final ASTNode node); 19 | 20 | double getMetricForFile(final File file) throws IOException; 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/metrics/JavaMethodClassCounter.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codedata.metrics; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.List; 6 | 7 | import org.apache.commons.io.FileUtils; 8 | import org.eclipse.jdt.core.dom.ASTNode; 9 | import org.eclipse.jdt.core.dom.ASTVisitor; 10 | import org.eclipse.jdt.core.dom.CompilationUnit; 11 | 12 | import codemining.java.codeutils.JavaASTExtractor; 13 | 14 | public final class JavaMethodClassCounter { 15 | 16 | public static class MethodClassCountVisitor extends ASTVisitor { 17 | 18 | public int noMethods = 0; 19 | public int noClasses = 0; 20 | 21 | @Override 22 | public void postVisit(final ASTNode node) { 23 | 24 | if (node.getNodeType() == ASTNode.METHOD_DECLARATION) 25 | noMethods++; 26 | 27 | if (node.getNodeType() == ASTNode.TYPE_DECLARATION 28 | || node.getNodeType() == ASTNode.ENUM_DECLARATION) 29 | noClasses++; 30 | } 31 | 32 | } 33 | 34 | public static void main(final String[] args) throws IOException { 35 | if (args.length != 1) { 36 | System.err.println("Usage "); 37 | System.exit(-1); 38 | } 39 | final File directory = new File(args[0]); 40 | countMethodsClasses(directory); 41 | } 42 | 43 | public static void countMethodsClasses(final File projectDir) 44 | throws IOException { 45 | 46 | System.out.println("\n===== Project " + projectDir); 47 | final MethodClassCountVisitor mccv = new MethodClassCountVisitor(); 48 | final JavaASTExtractor astExtractor = new JavaASTExtractor(false); 49 | 50 | final List files = (List) FileUtils.listFiles(projectDir, 51 | new String[] { "java" }, true); 52 | 53 | int count = 0; 54 | for (final File file : files) { 55 | 56 | final CompilationUnit cu = astExtractor.getAST(file); 57 | cu.accept(mccv); 58 | 59 | if (count % 1000 == 0) 60 | System.out.println("At file " + count + " of " + files.size()); 61 | count++; 62 | } 63 | 64 | System.out.println("Project " + projectDir); 65 | System.out.println("No. *.java files " + files.size()); 66 | System.out.println("No. Methods: " + mccv.noMethods); 67 | System.out.println("No. Classes: " + mccv.noClasses); 68 | } 69 | 70 | private JavaMethodClassCounter() { 71 | 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/metrics/LinesOfCodeMetric.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codedata.metrics; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | 9 | import org.apache.commons.io.FileUtils; 10 | import org.eclipse.jdt.core.dom.ASTNode; 11 | 12 | /** 13 | * Find how many lines of code there are in the given file. 14 | * 15 | * Note that if you give a file it returns the length including the contents, 16 | * while giving an AST Node ignores them. 17 | * 18 | * @author Miltos Allamanis 19 | * 20 | */ 21 | public class LinesOfCodeMetric implements IFileMetricRetriever { 22 | 23 | /* 24 | * (non-Javadoc) 25 | * 26 | * @see 27 | * uk.ac.ed.inf.codedataextractors.IFileMetricRetriever#getMetricForASTNode 28 | * (org.eclipse.jdt.core.dom.ASTNode) 29 | */ 30 | @Override 31 | public double getMetricForASTNode(final ASTNode node) { 32 | return node.toString().split(System.getProperty("line.separator")).length; 33 | } 34 | 35 | /* 36 | * (non-Javadoc) 37 | * 38 | * @see 39 | * uk.ac.ed.inf.codedataextractors.IFileMetricRetriever#getMetricForFile 40 | * (java.io.File) 41 | */ 42 | @Override 43 | public double getMetricForFile(File file) throws IOException { 44 | final String fileContents = FileUtils.readFileToString(file); 45 | // This returns the real lines, while the other returns without the 46 | // comments. 47 | return fileContents.split(System.getProperty("line.separator")).length; 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/metrics/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Java Code Metrics 3 | */ 4 | package codemining.java.codedata.metrics; -------------------------------------------------------------------------------- /src/main/java/codemining/java/codedata/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Package providing information and datasets from Java files. 3 | */ 4 | package codemining.java.codedata; 5 | 6 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/MethodExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import java.util.Stack; 11 | 12 | import org.apache.commons.lang.exception.ExceptionUtils; 13 | import org.eclipse.jdt.core.dom.ASTVisitor; 14 | import org.eclipse.jdt.core.dom.CompilationUnit; 15 | import org.eclipse.jdt.core.dom.ImportDeclaration; 16 | import org.eclipse.jdt.core.dom.MethodDeclaration; 17 | import org.eclipse.jdt.core.dom.TypeDeclaration; 18 | 19 | import com.google.common.collect.Lists; 20 | 21 | /** 22 | * Extract all methods in a class 23 | * 24 | * @author Miltos Allamanis 25 | * 26 | */ 27 | public class MethodExtractor { 28 | 29 | private static class MethodVisitor extends ASTVisitor { 30 | 31 | final List allMethods = Lists.newArrayList(); 32 | 33 | Stack className = new Stack(); 34 | 35 | private String currentPackageName; 36 | 37 | private final ProjectTypeInformation pti; 38 | 39 | public MethodVisitor(final ProjectTypeInformation pti) { 40 | this.pti = pti; 41 | } 42 | 43 | @Override 44 | public void endVisit(final TypeDeclaration node) { 45 | className.pop(); 46 | super.endVisit(node); 47 | } 48 | 49 | /** 50 | * @param node 51 | * @return 52 | */ 53 | public boolean isOverride(final MethodDeclaration node) { 54 | try { 55 | final boolean hasAnnotation = MethodUtils.hasOverrideAnnotation(node); 56 | if (pti == null || hasAnnotation) { 57 | return hasAnnotation; 58 | } 59 | 60 | final boolean isOverride = pti.isMethodOverride(className.peek(), node); 61 | return hasAnnotation || isOverride; 62 | } catch (final Throwable e) { 63 | System.err.println(e + ":" + node.toString()); 64 | return false; 65 | } 66 | } 67 | 68 | @Override 69 | public boolean visit(final CompilationUnit node) { 70 | if (node.getPackage() != null) { 71 | currentPackageName = node.getPackage().getName().getFullyQualifiedName(); 72 | } else { 73 | currentPackageName = ""; 74 | } 75 | return super.visit(node); 76 | } 77 | 78 | @Override 79 | public boolean visit(final ImportDeclaration node) { 80 | // Don't visit. It's boring 81 | return false; 82 | } 83 | 84 | @Override 85 | public boolean visit(final MethodDeclaration node) { 86 | if (node.isConstructor()) { 87 | return super.visit(node); 88 | } else if (isOverride(node)) { 89 | return super.visit(node); 90 | } 91 | allMethods.add(node); 92 | return super.visit(node); 93 | } 94 | 95 | @Override 96 | public boolean visit(final TypeDeclaration node) { 97 | if (className.isEmpty()) { 98 | className.push(currentPackageName + "." + node.getName().getIdentifier()); 99 | } else { 100 | className.push(className.peek() + "." + node.getName().getIdentifier()); 101 | } 102 | return super.visit(node); 103 | } 104 | 105 | } 106 | 107 | public static List getMethods(final File file) throws IOException { 108 | return getMethods(file, null); 109 | } 110 | 111 | public static List getMethods(final File file, final ProjectTypeInformation pti) 112 | throws IOException { 113 | try { 114 | final JavaASTExtractor ex = new JavaASTExtractor(false); 115 | final MethodVisitor mv = new MethodVisitor(pti); 116 | final CompilationUnit cu = ex.getAST(file); 117 | cu.accept(mv); 118 | return mv.allMethods; 119 | } catch (Exception e) { 120 | System.err.println(ExceptionUtils.getFullStackTrace(e)); 121 | } 122 | return new ArrayList<>(); 123 | } 124 | 125 | private MethodExtractor() { 126 | 127 | } 128 | 129 | } 130 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/MethodUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils; 5 | 6 | import java.util.List; 7 | 8 | import org.eclipse.jdt.core.dom.Annotation; 9 | import org.eclipse.jdt.core.dom.IExtendedModifier; 10 | import org.eclipse.jdt.core.dom.MethodDeclaration; 11 | import org.eclipse.jdt.core.dom.SingleVariableDeclaration; 12 | 13 | /** 14 | * A set of utility methods for Java Methods. 15 | * 16 | * @author Miltos Allamanis 17 | * 18 | */ 19 | public final class MethodUtils { 20 | 21 | /** 22 | * @param node 23 | * @return 24 | */ 25 | public static String getMethodType(final MethodDeclaration node) { 26 | final StringBuffer typeSb = new StringBuffer(); 27 | if (node.getReturnType2() != null) { 28 | typeSb.append(node.getReturnType2().toString()).append("("); 29 | } else if (node.isConstructor()) { 30 | typeSb.append("constructor("); 31 | } else { 32 | typeSb.append("void("); 33 | } 34 | for (final Object svd : node.parameters()) { 35 | final SingleVariableDeclaration decl = (SingleVariableDeclaration) svd; 36 | typeSb.append(decl.getType().toString()); 37 | typeSb.append(","); 38 | } 39 | typeSb.append(")"); 40 | 41 | final String methodType = typeSb.toString(); 42 | return methodType; 43 | } 44 | 45 | public static boolean hasOverrideAnnotation(final MethodDeclaration node) { 46 | final List modifiers = node.modifiers(); 47 | for (final Object mod : modifiers) { 48 | final IExtendedModifier modifier = (IExtendedModifier) mod; 49 | if (modifier.isAnnotation()) { 50 | final Annotation annotation = (Annotation) modifier; 51 | if (annotation.getTypeName().toString().equals("Override")) { 52 | return true; 53 | } 54 | } 55 | } 56 | return false; 57 | } 58 | 59 | private MethodUtils() { 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/MethodsInClass.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils; 5 | 6 | import java.io.File; 7 | import java.util.Collection; 8 | import java.util.Stack; 9 | import java.util.logging.Logger; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.apache.commons.io.filefilter.DirectoryFileFilter; 13 | import org.eclipse.jdt.core.dom.ASTVisitor; 14 | import org.eclipse.jdt.core.dom.CompilationUnit; 15 | import org.eclipse.jdt.core.dom.EnumDeclaration; 16 | import org.eclipse.jdt.core.dom.ImportDeclaration; 17 | import org.eclipse.jdt.core.dom.MethodDeclaration; 18 | import org.eclipse.jdt.core.dom.TypeDeclaration; 19 | 20 | import codemining.java.tokenizers.JavaTokenizer; 21 | 22 | import com.google.common.collect.HashMultimap; 23 | import com.google.common.collect.Multimap; 24 | 25 | /** 26 | * Retrieve all the methods contained in a given class. 27 | * 28 | * @author Miltos Allamanis 29 | * 30 | */ 31 | public class MethodsInClass { 32 | 33 | private class MethodExtractor extends ASTVisitor { 34 | 35 | Stack className = new Stack(); 36 | 37 | private String currentPackageName; 38 | 39 | @Override 40 | public void endVisit(final EnumDeclaration node) { 41 | className.pop(); 42 | super.endVisit(node); 43 | } 44 | 45 | /* 46 | * (non-Javadoc) 47 | * 48 | * @see 49 | * org.eclipse.jdt.core.dom.ASTVisitor#endVisit(org.eclipse.jdt.core 50 | * .dom.TypeDeclaration) 51 | */ 52 | @Override 53 | public void endVisit(final TypeDeclaration node) { 54 | className.pop(); 55 | super.endVisit(node); 56 | } 57 | 58 | /* 59 | * (non-Javadoc) 60 | * 61 | * @see 62 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 63 | * .CompilationUnit) 64 | */ 65 | @Override 66 | public boolean visit(final CompilationUnit node) { 67 | if (node.getPackage() != null) { 68 | currentPackageName = node.getPackage().getName() 69 | .getFullyQualifiedName(); 70 | } 71 | return super.visit(node); 72 | } 73 | 74 | @Override 75 | public boolean visit(final EnumDeclaration node) { 76 | if (className.isEmpty()) { 77 | className.push(currentPackageName + "." 78 | + node.getName().getIdentifier()); 79 | } else { 80 | className.push(className.peek() + "." 81 | + node.getName().getIdentifier()); 82 | } 83 | return super.visit(node); 84 | } 85 | 86 | /* 87 | * (non-Javadoc) 88 | * 89 | * @see 90 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 91 | * .ImportDeclaration) 92 | */ 93 | @Override 94 | public boolean visit(final ImportDeclaration node) { 95 | // Don't visit. It's boring 96 | return false; 97 | } 98 | 99 | /* 100 | * (non-Javadoc) 101 | * 102 | * @see 103 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 104 | * .MethodDeclaration) 105 | */ 106 | @Override 107 | public boolean visit(final MethodDeclaration node) { 108 | final String methodType = MethodUtils.getMethodType(node); 109 | methodsForClasses.put(className.peek(), node.getName() 110 | .getIdentifier() + ":" + methodType); 111 | return false; 112 | } 113 | 114 | /* 115 | * (non-Javadoc) 116 | * 117 | * @see 118 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 119 | * .TypeDeclaration) 120 | */ 121 | @Override 122 | public boolean visit(final TypeDeclaration node) { 123 | if (className.isEmpty()) { 124 | className.push(currentPackageName + "." 125 | + node.getName().getIdentifier()); 126 | } else { 127 | className.push(className.peek() + "." 128 | + node.getName().getIdentifier()); 129 | } 130 | return super.visit(node); 131 | } 132 | 133 | } 134 | 135 | public static void main(final String[] args) { 136 | if (args.length != 1) { 137 | System.err.println("Usage "); 138 | System.exit(-1); 139 | } 140 | 141 | final MethodsInClass mic = new MethodsInClass(); 142 | mic.scan(FileUtils 143 | .listFiles(new File(args[0]), JavaTokenizer.javaCodeFileFilter, 144 | DirectoryFileFilter.DIRECTORY)); 145 | System.out.println(mic); 146 | } 147 | 148 | /** 149 | * Class -> MethodName 150 | */ 151 | private final Multimap methodsForClasses = HashMultimap 152 | .create(); 153 | 154 | private static final Logger LOGGER = Logger.getLogger(MethodsInClass.class 155 | .getName()); 156 | 157 | public MethodsInClass() { 158 | methodsForClasses.put("java.lang.Object", "toString:String()"); 159 | methodsForClasses.put("java.lang.Object", "equals:boolean(Object,)"); 160 | methodsForClasses.put("java.lang.Object", "hashCode:int()"); 161 | methodsForClasses.put("java.lang.Runnable", "run:void()"); 162 | } 163 | 164 | public Collection getMethodsForClass(final String classname) { 165 | return methodsForClasses.get(classname); 166 | } 167 | 168 | public void scan(final Collection files) { 169 | final MethodExtractor me = new MethodExtractor(); 170 | final JavaASTExtractor jEx = new JavaASTExtractor(false); 171 | for (final File f : files) { 172 | try { 173 | final CompilationUnit cu = jEx.getAST(f); 174 | cu.accept(me); 175 | } catch (final Throwable e) { 176 | LOGGER.warning("Failed to get methods from " + f); 177 | } 178 | } 179 | } 180 | 181 | /* 182 | * (non-Javadoc) 183 | * 184 | * @see java.lang.Object#toString() 185 | */ 186 | @Override 187 | public String toString() { 188 | return methodsForClasses.toString(); 189 | } 190 | 191 | } 192 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/ProjectTypeInformation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils; 5 | 6 | import java.io.File; 7 | import java.util.Collection; 8 | 9 | import org.apache.commons.io.FileUtils; 10 | import org.apache.commons.io.filefilter.DirectoryFileFilter; 11 | import org.eclipse.jdt.core.dom.MethodDeclaration; 12 | 13 | import codemining.java.tokenizers.JavaTokenizer; 14 | import codemining.languagetools.ClassHierarchy; 15 | import codemining.languagetools.ClassHierarchy.Type; 16 | 17 | import com.google.common.base.Optional; 18 | 19 | /** 20 | * Collect information about classes and their implementing methods. 21 | * 22 | * @author Miltos Allamanis 23 | * 24 | */ 25 | public class ProjectTypeInformation { 26 | 27 | private final File projectDirectory; 28 | private final MethodsInClass methodInformation = new MethodsInClass(); 29 | private ClassHierarchy hierarchy = null; 30 | 31 | public ProjectTypeInformation(final File projectDirectory) { 32 | this.projectDirectory = projectDirectory; 33 | } 34 | 35 | public void collect() { 36 | final Collection allFiles = FileUtils 37 | .listFiles(projectDirectory, JavaTokenizer.javaCodeFileFilter, 38 | DirectoryFileFilter.DIRECTORY); 39 | methodInformation.scan(allFiles); 40 | final JavaTypeHierarchyExtractor hierarchyExtractor = new JavaTypeHierarchyExtractor(); 41 | hierarchyExtractor.addFilesToCorpus(allFiles); 42 | hierarchy = hierarchyExtractor.getHierarchy(); 43 | } 44 | 45 | public boolean isMethodOverride(final String fullyQualifiedNameOfClass, 46 | final MethodDeclaration method) { 47 | final String methodSignature = method.getName().getIdentifier() + ":" 48 | + MethodUtils.getMethodType(method); 49 | if (!methodInformation.getMethodsForClass(fullyQualifiedNameOfClass) 50 | .contains(methodSignature)) { 51 | return false; 52 | } 53 | final Optional type = hierarchy 54 | .getTypeForName(fullyQualifiedNameOfClass); 55 | if (!type.isPresent()) { 56 | return false; 57 | } 58 | for (final Type implementor : type.get().getImplementingTypesClosure()) { 59 | if (methodInformation.getMethodsForClass( 60 | implementor.fullQualifiedName).contains(methodSignature)) { 61 | return true; 62 | } 63 | } 64 | return false; 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/UsagePointExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.List; 9 | import java.util.Set; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.apache.commons.io.filefilter.DirectoryFileFilter; 13 | import org.eclipse.jdt.core.dom.ASTNode; 14 | import org.eclipse.jdt.core.dom.ASTVisitor; 15 | import org.eclipse.jdt.core.dom.FieldDeclaration; 16 | import org.eclipse.jdt.core.dom.ImportDeclaration; 17 | import org.eclipse.jdt.core.dom.SingleVariableDeclaration; 18 | import org.eclipse.jdt.core.dom.VariableDeclarationExpression; 19 | import org.eclipse.jdt.core.dom.VariableDeclarationStatement; 20 | 21 | import codemining.java.tokenizers.JavaTokenizer; 22 | 23 | import com.google.common.collect.Lists; 24 | import com.google.common.collect.Sets; 25 | 26 | /** 27 | * Given a Java file and a fully qualified name of a class, find those blocks 28 | * that use the class in question. 29 | * 30 | * @author Miltos Allamanis 31 | * 32 | */ 33 | public class UsagePointExtractor { 34 | 35 | private static final class UsageExtractor extends ASTVisitor { 36 | /** 37 | * Return the imported class. 38 | * 39 | * @param qName 40 | * @return 41 | */ 42 | private static String getImportedClass(final String qName) { 43 | return qName.substring(qName.lastIndexOf('.') + 1); 44 | } 45 | 46 | final List interestingNodes = Lists.newArrayList(); 47 | 48 | final String fullyQualifiedName; 49 | final Set className = Sets.newTreeSet(); 50 | 51 | /* 52 | * (non-Javadoc) 53 | * 54 | * @see 55 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 56 | * .ImportDeclaration) 57 | */ 58 | UsageExtractor(final String fullyQualifiedName) { 59 | this.fullyQualifiedName = fullyQualifiedName; 60 | // Add the fully qualified name in the rare case where 61 | // no import is needed (i.e. in java.lang.) 62 | className.add(fullyQualifiedName); 63 | } 64 | 65 | /* 66 | * (non-Javadoc) 67 | * 68 | * @see 69 | * org.eclipse.jdt.core.dom.ASTVisitor#preVisit2(org.eclipse.jdt.core 70 | * .dom.ASTNode) 71 | */ 72 | @Override 73 | public boolean preVisit2(final ASTNode node) { 74 | return !interestingNodes.contains(node); 75 | } 76 | 77 | /* 78 | * (non-Javadoc) 79 | * 80 | * @see 81 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 82 | * .FieldDeclaration) 83 | */ 84 | @Override 85 | public boolean visit(final FieldDeclaration node) { 86 | if (className.contains(node.getType().toString())) { 87 | interestingNodes.add(node.getParent()); 88 | } 89 | return false; 90 | } 91 | 92 | @Override 93 | public boolean visit(final ImportDeclaration node) { 94 | final String qualifiedName = node.getName().getFullyQualifiedName(); 95 | if (qualifiedName.startsWith(fullyQualifiedName)) { 96 | className.add(getImportedClass(qualifiedName)); 97 | className.add(qualifiedName); 98 | } 99 | return false; 100 | } 101 | 102 | /* 103 | * (non-Javadoc) 104 | * 105 | * @see 106 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 107 | * .SingleVariableDeclaration) 108 | */ 109 | @Override 110 | public boolean visit(final SingleVariableDeclaration node) { 111 | if (className.contains(node.getType().toString())) { 112 | interestingNodes.add(node.getParent()); 113 | } 114 | return false; 115 | } 116 | 117 | /* 118 | * (non-Javadoc) 119 | * 120 | * @see 121 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 122 | * .VariableDeclarationExpression) 123 | */ 124 | @Override 125 | public boolean visit(final VariableDeclarationExpression node) { 126 | if (className.contains(node.getType().toString())) { 127 | interestingNodes.add(node.getParent()); 128 | } 129 | return false; 130 | } 131 | 132 | /* 133 | * (non-Javadoc) 134 | * 135 | * @see 136 | * org.eclipse.jdt.core.dom.ASTVisitor#visit(org.eclipse.jdt.core.dom 137 | * .VariableDeclarationStatement) 138 | */ 139 | @Override 140 | public boolean visit(final VariableDeclarationStatement node) { 141 | if (className.contains(node.getType().toString())) { 142 | interestingNodes.add(node.getParent()); 143 | } 144 | return false; 145 | } 146 | 147 | } 148 | 149 | /** 150 | * @param args 151 | */ 152 | public static void main(final String[] args) { 153 | if (args.length != 2) { 154 | System.err.println("Usage "); 155 | System.exit(-1); 156 | } 157 | 158 | final File directory = new File(args[1]); 159 | final String qualifiedClass = args[0]; 160 | 161 | for (final File fi : FileUtils 162 | .listFiles(directory, JavaTokenizer.javaCodeFileFilter, 163 | DirectoryFileFilter.DIRECTORY)) { 164 | try { 165 | final List usages = usagePoints(qualifiedClass, fi); 166 | if (!usages.isEmpty()) { 167 | System.out.println(fi.getAbsolutePath()); 168 | for (final ASTNode node : usages) { 169 | System.out 170 | .println("----------------------------------------------"); 171 | System.out.println(node); 172 | } 173 | } 174 | } catch (final Exception e) { 175 | System.err.println("Error processing " + fi.getName()); 176 | } 177 | 178 | } 179 | 180 | } 181 | 182 | /** 183 | * 184 | * @param qualifiedName 185 | * the fully qualified name of the class or the package 186 | * @param f 187 | * @return 188 | * @throws IOException 189 | */ 190 | public static List usagePoints(final String qualifiedName, 191 | final File f) throws IOException { 192 | final JavaASTExtractor ex = new JavaASTExtractor(false); 193 | final UsageExtractor usageExtractor = new UsageExtractor(qualifiedName); 194 | ex.getAST(f).accept(usageExtractor); 195 | return usageExtractor.interestingNodes; 196 | } 197 | 198 | } 199 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/binding/JavaMethodInvocationBindingExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.binding; 5 | 6 | import static com.google.common.base.Preconditions.checkArgument; 7 | 8 | import java.util.Collection; 9 | import java.util.Map.Entry; 10 | import java.util.Set; 11 | 12 | import org.eclipse.jdt.core.dom.ASTNode; 13 | import org.eclipse.jdt.core.dom.ASTVisitor; 14 | import org.eclipse.jdt.core.dom.MethodInvocation; 15 | 16 | import codemining.java.tokenizers.JavaTokenizer; 17 | import codemining.languagetools.ITokenizer; 18 | 19 | import com.google.common.collect.HashMultimap; 20 | import com.google.common.collect.Multimap; 21 | import com.google.common.collect.Sets; 22 | 23 | /** 24 | * Extract Java method bindings. Each method call or definition is used by 25 | * itself 26 | * 27 | * @author Miltos Allamanis 28 | * 29 | */ 30 | public class JavaMethodInvocationBindingExtractor extends 31 | AbstractJavaNameBindingsExtractor { 32 | 33 | public static enum AvailableFeatures { 34 | IMPLEMENTOR_VOCABULARY, ANCESTRY, NUMBER_ARGUMENTS 35 | } 36 | 37 | private static class MethodBindings extends ASTVisitor { 38 | /** 39 | * A map from the method name to the position. 40 | */ 41 | final Multimap methodNamePostions = HashMultimap 42 | .create(); 43 | 44 | @Override 45 | public boolean visit(final MethodInvocation node) { 46 | final String name = node.getName().toString(); 47 | methodNamePostions.put(name, node.getName()); 48 | return super.visit(node); 49 | } 50 | } 51 | 52 | private final Set activeFeatures = Sets 53 | .newHashSet(AvailableFeatures.values()); 54 | 55 | public JavaMethodInvocationBindingExtractor() { 56 | super(new JavaTokenizer()); 57 | } 58 | 59 | public JavaMethodInvocationBindingExtractor(final ITokenizer tokenizer) { 60 | super(tokenizer); 61 | } 62 | 63 | @Override 64 | public Set getAvailableFeatures() { 65 | return Sets.newHashSet(AvailableFeatures.values()); 66 | } 67 | 68 | @Override 69 | protected Set getFeatures(final Set boundNodes) { 70 | checkArgument(boundNodes.size() == 1); 71 | final ASTNode method = boundNodes.iterator().next().getParent(); 72 | final Set features = Sets.newHashSet(); 73 | checkArgument(method instanceof MethodInvocation); 74 | final MethodInvocation mi = (MethodInvocation) method; 75 | if (activeFeatures.contains(AvailableFeatures.NUMBER_ARGUMENTS)) { 76 | features.add("nArgs:" + mi.arguments().size()); 77 | } 78 | if (activeFeatures.contains(AvailableFeatures.IMPLEMENTOR_VOCABULARY)) { 79 | JavaFeatureExtractor.addImplementorVocab(mi, features); 80 | } 81 | if (activeFeatures.contains(AvailableFeatures.ANCESTRY)) { 82 | JavaFeatureExtractor.addAstAncestryFeatures(features, method); 83 | } 84 | return features; 85 | } 86 | 87 | @Override 88 | public Set> getNameBindings(final ASTNode node) { 89 | final MethodBindings mb = new MethodBindings(); 90 | node.accept(mb); 91 | 92 | final Set> nameBindings = Sets.newHashSet(); 93 | for (final Entry entry : mb.methodNamePostions 94 | .entries()) { 95 | final Set boundNodes = Sets.newIdentityHashSet(); 96 | boundNodes.add(entry.getValue()); 97 | nameBindings.add(boundNodes); 98 | } 99 | return nameBindings; 100 | } 101 | 102 | @Override 103 | public void setActiveFeatures(final Set activeFeatures) { 104 | this.activeFeatures.clear(); 105 | this.activeFeatures 106 | .addAll((Collection) activeFeatures); 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/binding/JavaVariableFeatureExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.binding; 5 | 6 | import java.util.Collection; 7 | import java.util.List; 8 | import java.util.Set; 9 | 10 | import org.eclipse.jdt.core.dom.ASTNode; 11 | import org.eclipse.jdt.core.dom.FieldDeclaration; 12 | import org.eclipse.jdt.core.dom.SingleVariableDeclaration; 13 | import org.eclipse.jdt.core.dom.Type; 14 | import org.eclipse.jdt.core.dom.VariableDeclaration; 15 | import org.eclipse.jdt.core.dom.VariableDeclarationExpression; 16 | import org.eclipse.jdt.core.dom.VariableDeclarationFragment; 17 | import org.eclipse.jdt.core.dom.VariableDeclarationStatement; 18 | 19 | import com.google.common.collect.Sets; 20 | 21 | /** 22 | * Utility class to extract features from a variable. 23 | * 24 | * @author Miltos Allamanis 25 | * 26 | */ 27 | public class JavaVariableFeatureExtractor { 28 | 29 | public static enum AvailableFeatures { 30 | IMPLEMENTOR_VOCABULARY, TYPE, MODIFIERS, ANCESTRY 31 | } 32 | 33 | private Set activeFeatures = Sets 34 | .newHashSet(AvailableFeatures.values()); 35 | 36 | public JavaVariableFeatureExtractor() { 37 | } 38 | 39 | /** 40 | * @param features 41 | * @param declarationPoint 42 | */ 43 | private void getDeclarationFeatures(final Set features, 44 | final ASTNode declarationPoint) { 45 | final Type variableType; 46 | final List modifiers; 47 | final ASTNode ancestryFrom; 48 | if (declarationPoint.getParent() instanceof SingleVariableDeclaration) { 49 | final SingleVariableDeclaration declaration = (SingleVariableDeclaration) declarationPoint 50 | .getParent(); 51 | variableType = declaration.getType(); 52 | modifiers = declaration.modifiers(); 53 | ancestryFrom = declaration; 54 | } else if (declarationPoint.getParent() instanceof VariableDeclarationStatement) { 55 | final VariableDeclarationStatement declaration = (VariableDeclarationStatement) declarationPoint 56 | .getParent(); 57 | variableType = declaration.getType(); 58 | modifiers = declaration.modifiers(); 59 | ancestryFrom = declaration; 60 | } else if (declarationPoint.getParent() instanceof VariableDeclarationFragment) { 61 | if (declarationPoint.getParent().getParent() instanceof VariableDeclarationStatement) { 62 | final VariableDeclarationStatement declaration = (VariableDeclarationStatement) declarationPoint 63 | .getParent().getParent(); 64 | variableType = declaration.getType(); 65 | modifiers = declaration.modifiers(); 66 | ancestryFrom = declaration; 67 | } else if (declarationPoint.getParent().getParent() instanceof FieldDeclaration) { 68 | final FieldDeclaration declaration = (FieldDeclaration) declarationPoint 69 | .getParent().getParent(); 70 | variableType = declaration.getType(); 71 | modifiers = declaration.modifiers(); 72 | ancestryFrom = declaration; 73 | } else if (declarationPoint.getParent().getParent() instanceof VariableDeclarationExpression) { 74 | final VariableDeclarationExpression declaration = (VariableDeclarationExpression) declarationPoint 75 | .getParent().getParent(); 76 | variableType = declaration.getType(); 77 | modifiers = declaration.modifiers(); 78 | ancestryFrom = declaration; 79 | } else { 80 | return; 81 | } 82 | } else { 83 | throw new IllegalStateException("Should not reach this"); 84 | } 85 | 86 | if (activeFeatures.contains(AvailableFeatures.TYPE)) { 87 | JavaFeatureExtractor.addTypeFeatures(variableType, features); 88 | } 89 | if (activeFeatures.contains(AvailableFeatures.MODIFIERS)) { 90 | JavaFeatureExtractor.addModifierFeatures(features, modifiers); 91 | } 92 | if (activeFeatures.contains(AvailableFeatures.ANCESTRY)) { 93 | JavaFeatureExtractor.addAstAncestryFeatures(features, ancestryFrom); 94 | } 95 | } 96 | 97 | public void setActiveFeatures(final Collection features) { 98 | activeFeatures = Sets.newHashSet(features); 99 | } 100 | 101 | public Set variableFeatures(final Set boundNodesOfVariable) { 102 | // Find the declaration and extract features 103 | final Set features = Sets.newHashSet(); 104 | for (final ASTNode node : boundNodesOfVariable) { 105 | if (!(node.getParent() instanceof VariableDeclaration)) { 106 | continue; 107 | } 108 | getDeclarationFeatures(features, node); 109 | if (activeFeatures 110 | .contains(AvailableFeatures.IMPLEMENTOR_VOCABULARY)) { 111 | JavaFeatureExtractor.addImplementorVocab(node, features); 112 | } 113 | break; 114 | } 115 | return features; 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/binding/tui/JavaBindingsPerFeatureTypeToJson.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.binding.tui; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Collections; 9 | 10 | import codemining.java.codeutils.binding.AbstractJavaNameBindingsExtractor; 11 | 12 | import com.google.common.collect.Sets; 13 | import com.google.gson.JsonIOException; 14 | 15 | /** 16 | * Extract bindings for a given type, including one type of feature per time. 17 | * 18 | * @author Miltos Allamanis 19 | * 20 | */ 21 | public class JavaBindingsPerFeatureTypeToJson { 22 | 23 | /** 24 | * @param args 25 | */ 26 | public static void main(final String[] args) { 27 | if (args.length != 3) { 28 | System.err 29 | .println("Usage variables|methodinvocations|" 30 | + "methodinvocations_typegram|methoddeclarations|methoddeclarations_nooverride" 31 | + "methoddeclarations_typegram|types "); 32 | System.exit(-1); 33 | } 34 | 35 | final File inputFolder = new File(args[0]); 36 | final String outputFolderAndPrefix = args[2]; 37 | final AbstractJavaNameBindingsExtractor bindingExtractor = JavaBindingsToJson 38 | .getExtractorForName(args[1], inputFolder); 39 | 40 | for (final Object featureType : bindingExtractor.getAvailableFeatures()) { 41 | try { 42 | System.out.println("Using only " + featureType + " feature"); 43 | bindingExtractor 44 | .setActiveFeatures(Sets.newHashSet(featureType)); 45 | final File outputFile = new File(outputFolderAndPrefix 46 | + featureType.toString() + ".json"); 47 | System.out.println("Generating at " + outputFile); 48 | JavaBindingsToJson.extractBindings(inputFolder, outputFile, 49 | bindingExtractor); 50 | } catch (JsonIOException | IOException e) { 51 | e.printStackTrace(); 52 | } 53 | } 54 | 55 | try { 56 | System.out.println("Using no features"); 57 | bindingExtractor.setActiveFeatures(Collections.EMPTY_SET); 58 | final File outputFile = new File(outputFolderAndPrefix 59 | + "NO_FEAT.json"); 60 | System.out.println("Generating at " + outputFile); 61 | JavaBindingsToJson.extractBindings(inputFolder, outputFile, 62 | bindingExtractor); 63 | } catch (JsonIOException | IOException e) { 64 | e.printStackTrace(); 65 | } 66 | } 67 | 68 | private JavaBindingsPerFeatureTypeToJson() { 69 | // No instantiations. 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Java Code Utilities for tokenizing and retrieving 3 | * Java AST. 4 | */ 5 | package codemining.java.codeutils; 6 | 7 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/scopes/AllScopeExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.scopes; 5 | 6 | import static com.google.common.base.Preconditions.checkArgument; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.List; 11 | 12 | import org.eclipse.jdt.core.dom.ASTNode; 13 | 14 | import codemining.languagetools.IScopeExtractor; 15 | import codemining.languagetools.ParseType; 16 | import codemining.languagetools.Scope; 17 | 18 | import com.google.common.collect.Lists; 19 | import com.google.common.collect.Multimap; 20 | import com.google.common.collect.TreeMultimap; 21 | 22 | /** 23 | * Aggregate all extractors. 24 | * 25 | * @author Miltos Allamanis 26 | * 27 | */ 28 | public class AllScopeExtractor { 29 | 30 | public static final class AllScopeSnippetExtractor implements 31 | IScopeExtractor { 32 | 33 | public AllScopeSnippetExtractor() { 34 | allExtractors = Lists.newArrayList(); 35 | allExtractors 36 | .add(new VariableScopeExtractor.VariableScopeSnippetExtractor()); 37 | allExtractors 38 | .add(new MethodScopeExtractor.MethodScopeSnippetExtractor( 39 | true)); 40 | allExtractors 41 | .add(new TypenameScopeExtractor.TypenameSnippetExtractor( 42 | true)); 43 | } 44 | 45 | public AllScopeSnippetExtractor(final boolean variables, 46 | final boolean methods, final boolean types) { 47 | allExtractors = Lists.newArrayList(); 48 | checkArgument(variables | methods | types, 49 | "At least one option must be set"); 50 | if (variables) { 51 | allExtractors 52 | .add(new VariableScopeExtractor.VariableScopeSnippetExtractor()); 53 | } 54 | if (methods) { 55 | allExtractors 56 | .add(new MethodScopeExtractor.MethodScopeSnippetExtractor( 57 | true)); 58 | } 59 | if (types) { 60 | allExtractors 61 | .add(new TypenameScopeExtractor.TypenameSnippetExtractor( 62 | true)); 63 | } 64 | } 65 | 66 | private final List allExtractors; 67 | 68 | @Override 69 | public Multimap getFromFile(final File file) 70 | throws IOException { 71 | final Multimap scopes = TreeMultimap.create(); 72 | for (final IScopeExtractor extractor : allExtractors) { 73 | scopes.putAll(extractor.getFromFile(file)); 74 | } 75 | return scopes; 76 | } 77 | 78 | @Override 79 | public Multimap getFromNode(ASTNode node) { 80 | final Multimap scopes = TreeMultimap.create(); 81 | for (final IScopeExtractor extractor : allExtractors) { 82 | scopes.putAll(extractor.getFromNode(node)); 83 | } 84 | return scopes; 85 | } 86 | 87 | @Override 88 | public Multimap getFromString(final String file, 89 | final ParseType parseType) { 90 | final Multimap scopes = TreeMultimap.create(); 91 | for (final IScopeExtractor extractor : allExtractors) { 92 | scopes.putAll(extractor.getFromString(file, parseType)); 93 | } 94 | return scopes; 95 | } 96 | } 97 | 98 | /** 99 | * 100 | */ 101 | private AllScopeExtractor() { 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/scopes/MethodScopeExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.scopes; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Map.Entry; 9 | import java.util.logging.Logger; 10 | 11 | import org.eclipse.jdt.core.dom.ASTNode; 12 | import org.eclipse.jdt.core.dom.ASTVisitor; 13 | import org.eclipse.jdt.core.dom.MethodDeclaration; 14 | import org.eclipse.jdt.core.dom.MethodInvocation; 15 | import org.eclipse.jdt.core.dom.TypeDeclaration; 16 | 17 | import codemining.java.codeutils.JavaASTExtractor; 18 | import codemining.languagetools.IScopeExtractor; 19 | import codemining.languagetools.ParseType; 20 | import codemining.languagetools.Scope; 21 | import codemining.languagetools.Scope.ScopeType; 22 | 23 | import com.google.common.base.Objects; 24 | import com.google.common.collect.HashMultimap; 25 | import com.google.common.collect.Multimap; 26 | import com.google.common.collect.TreeMultimap; 27 | 28 | /** 29 | * Extract method names from a scope. 30 | * 31 | * @author Miltos Allamanis 32 | * 33 | */ 34 | public class MethodScopeExtractor { 35 | 36 | public static class Method { 37 | 38 | public final String name; 39 | public final ScopeType type; 40 | 41 | public Method(final String name, final ScopeType type) { 42 | this.name = name; 43 | this.type = type; 44 | } 45 | 46 | @Override 47 | public boolean equals(Object obj) { 48 | if (!(obj instanceof Method)) { 49 | return false; 50 | } 51 | Method other = (Method) obj; 52 | return name.equals(other.name) && type == other.type; 53 | } 54 | 55 | @Override 56 | public int hashCode() { 57 | return Objects.hashCode(name, type); 58 | } 59 | } 60 | 61 | public static final class MethodScopeSnippetExtractor implements 62 | IScopeExtractor { 63 | 64 | final boolean methodAsRoots; 65 | 66 | public MethodScopeSnippetExtractor(final boolean useMethodsAsRoots) { 67 | methodAsRoots = useMethodsAsRoots; 68 | } 69 | 70 | @Override 71 | public final Multimap getFromFile(final File f) { 72 | try { 73 | return getScopeSnippets(f, methodAsRoots); 74 | } catch (IOException e) { 75 | LOGGER.severe("Unable to extract method scope snippets from file " 76 | + f.getName()); 77 | throw new IllegalArgumentException( 78 | "Unable to extract method scope snippets from file"); 79 | } 80 | } 81 | 82 | @Override 83 | public Multimap getFromNode(final ASTNode node) { 84 | return getScopeSnippets(node, methodAsRoots); 85 | } 86 | 87 | @Override 88 | public final Multimap getFromString(final String code, 89 | final ParseType parseType) { 90 | return getScopeSnippets(code, methodAsRoots, parseType); 91 | } 92 | } 93 | 94 | private static class ScopeFinder extends ASTVisitor { 95 | 96 | final Multimap methods = HashMultimap.create(); 97 | 98 | ASTNode classNode = null; 99 | ASTNode currentMethodNode = null; 100 | final boolean methodAsRoot; 101 | 102 | public ScopeFinder(final boolean methodAsRoots) { 103 | methodAsRoot = methodAsRoots; 104 | } 105 | 106 | @Override 107 | public void endVisit(MethodDeclaration node) { 108 | if (currentMethodNode == node) { 109 | currentMethodNode = null; 110 | } 111 | super.endVisit(node); 112 | } 113 | 114 | @Override 115 | public boolean visit(MethodDeclaration node) { 116 | if (currentMethodNode == null) { 117 | currentMethodNode = node; 118 | } 119 | if (node.isConstructor()) 120 | return super.visit(node); 121 | final String name = node.getName().toString(); 122 | 123 | final Method mth = new Method(name, ScopeType.SCOPE_CLASS); 124 | if (classNode != null) { 125 | methods.put(classNode, mth); 126 | } 127 | return super.visit(node); 128 | } 129 | 130 | @Override 131 | public boolean visit(MethodInvocation node) { 132 | final String name = node.getName().toString(); 133 | 134 | if (methodAsRoot && currentMethodNode != null) { 135 | final Method mth = new Method(name, ScopeType.SCOPE_METHOD); 136 | methods.put(currentMethodNode, mth); 137 | } else { 138 | final Method mth = new Method(name, ScopeType.SCOPE_CLASS); 139 | methods.put(classNode, mth); 140 | } 141 | return super.visit(node); 142 | } 143 | 144 | @Override 145 | public boolean visit(TypeDeclaration node) { 146 | if (classNode == null) { 147 | classNode = node; 148 | } 149 | return super.visit(node); 150 | } 151 | } 152 | 153 | public static final String METHOD_CALL = "%MethodCall%"; 154 | 155 | private static final Logger LOGGER = Logger 156 | .getLogger(MethodScopeExtractor.class.getName()); 157 | 158 | public static Multimap getScopeSnippets(final ASTNode node, 159 | final boolean methodAsRoots) { 160 | final ScopeFinder scopeFinder = new ScopeFinder(methodAsRoots); 161 | node.accept(scopeFinder); 162 | 163 | final Multimap scopes = TreeMultimap.create(); 164 | for (final Entry method : scopeFinder.methods 165 | .entries()) { 166 | scopes.put(new Scope(method.getKey().toString(), 167 | method.getValue().type, METHOD_CALL, 0, 0), method 168 | .getValue().name); 169 | } 170 | 171 | return scopes; 172 | 173 | } 174 | 175 | public static Multimap getScopeSnippets(final File file, 176 | final boolean methodAsRoots) throws IOException { 177 | final JavaASTExtractor ex = new JavaASTExtractor(false); 178 | return getScopeSnippets(ex.getAST(file), methodAsRoots); 179 | } 180 | 181 | public static Multimap getScopeSnippets(final String code, 182 | final boolean methodAsRoots, final ParseType parseType) { 183 | final JavaASTExtractor ex = new JavaASTExtractor(false); 184 | return getScopeSnippets(ex.getAST(code, parseType), methodAsRoots); 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/scopes/ScopedIdentifierRenaming.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.scopes; 5 | 6 | import java.util.Collections; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | import codemining.java.codeutils.JavaASTExtractor; 11 | import codemining.java.tokenizers.JavaTokenizer; 12 | import codemining.languagetools.IScopeExtractor; 13 | import codemining.languagetools.ITokenizer; 14 | import codemining.languagetools.ParseType; 15 | import codemining.languagetools.Scope; 16 | 17 | import com.google.common.collect.Maps; 18 | import com.google.common.collect.Multimap; 19 | 20 | /** 21 | * Rename an identifer given a scope. 22 | * 23 | * @author Miltos Allamanis 24 | * 25 | */ 26 | public class ScopedIdentifierRenaming { 27 | 28 | final IScopeExtractor scopeExtractor; 29 | 30 | final ITokenizer tokenizer = new JavaTokenizer(); 31 | 32 | final ParseType parseKindToUseOnOriginal; 33 | 34 | public ScopedIdentifierRenaming(final IScopeExtractor scopeExtractor, 35 | final ParseType parseType) { 36 | this.scopeExtractor = scopeExtractor; 37 | parseKindToUseOnOriginal = parseType; 38 | } 39 | 40 | public String getFormattedRenamedCode(final String originalScopeCode, 41 | final String from, final String to, final String wholeFile) { 42 | final String code = getRenamedCode(originalScopeCode, from, to, 43 | wholeFile); 44 | final JavaASTExtractor ex = new JavaASTExtractor(false); 45 | return ex.getASTNode(code, parseKindToUseOnOriginal).toString(); 46 | } 47 | 48 | /** 49 | * @param originalScopeCode 50 | * @param from 51 | * @param to 52 | * @param wholeFile 53 | * @return 54 | */ 55 | public String getRenamedCode(final String originalScopeCode, 56 | final String from, final String to, final String wholeFile) { 57 | final Map varMapping = Maps.newTreeMap(); 58 | varMapping.put(from, to); 59 | return getRenamedCode(originalScopeCode, wholeFile, varMapping); 60 | } 61 | 62 | /** 63 | * @param originalScopeCode 64 | * @param wholeFile 65 | * @param varMapping 66 | * @return 67 | */ 68 | public String getRenamedCode(final String originalScopeCode, 69 | final String wholeFile, final Map varMapping) { 70 | final JavaASTExtractor ex = new JavaASTExtractor(false); 71 | final String originalCode = renameVariableInSnippet( 72 | ex.getASTNode(wholeFile, parseKindToUseOnOriginal).toString(), 73 | Collections.EMPTY_MAP); 74 | final String snippetToBeReplaced = renameVariableInSnippet( 75 | originalScopeCode, varMapping); 76 | 77 | final String code = originalCode.replace( 78 | renameVariableInSnippet(originalScopeCode, 79 | Collections.EMPTY_MAP), snippetToBeReplaced); 80 | return code; 81 | } 82 | 83 | public Multimap getRenamedScopes(final Scope originalScope, 84 | final String from, final String to, final String wholeFile) { 85 | return getRenamedScopes(originalScope.code, from, to, wholeFile); 86 | } 87 | 88 | public Multimap getRenamedScopes( 89 | final String originalScopeCode, final String from, final String to, 90 | final String wholeFile) { 91 | final String code = getRenamedCode(originalScopeCode, from, to, 92 | wholeFile); 93 | return scopeExtractor.getFromString(code, parseKindToUseOnOriginal); 94 | } 95 | 96 | /** 97 | * Crudely rename the name of an identifier by searching for similarly named 98 | * tokens. 99 | * 100 | * @param snippet 101 | * @param variableMapping 102 | * from, to 103 | * @return 104 | */ 105 | private String renameVariableInSnippet(final String snippet, 106 | final Map variableMapping) { 107 | final List tokens = tokenizer.tokenListFromCode(snippet 108 | .toCharArray()); 109 | 110 | final StringBuffer bf = new StringBuffer(); 111 | for (final String token : tokens) { 112 | if (variableMapping.containsKey(token)) { 113 | bf.append(variableMapping.get(token)); 114 | } else if (token.equals(ITokenizer.SENTENCE_START) 115 | || token.equals(ITokenizer.SENTENCE_END)) { 116 | continue; 117 | } else { 118 | bf.append(token); 119 | } 120 | bf.append(" "); 121 | } 122 | return bf.toString(); 123 | 124 | } 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/scopes/ScopesTUI.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codeutils.scopes; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | 6 | import codemining.java.codeutils.scopes.AllScopeExtractor.AllScopeSnippetExtractor; 7 | import codemining.languagetools.IScopeExtractor; 8 | 9 | public class ScopesTUI { 10 | 11 | /** 12 | * @param name 13 | * @return 14 | * @throws UnsupportedOperationException 15 | */ 16 | public static IScopeExtractor getScopeExtractorByName(final String name) 17 | throws UnsupportedOperationException { 18 | final IScopeExtractor scopeExtractor; 19 | if (name.equals("variable")) { 20 | scopeExtractor = new VariableScopeExtractor.VariableScopeSnippetExtractor(); 21 | } else if (name.equals("method")) { 22 | scopeExtractor = new MethodScopeExtractor.MethodScopeSnippetExtractor( 23 | true); 24 | } else if (name.equals("type")) { 25 | scopeExtractor = new TypenameScopeExtractor.TypenameSnippetExtractor( 26 | true); 27 | } else if (name.equals("all")) { 28 | scopeExtractor = new AllScopeSnippetExtractor(); 29 | } else { 30 | throw new UnsupportedOperationException( 31 | "Unknown type of identifier."); 32 | } 33 | return scopeExtractor; 34 | } 35 | 36 | /** 37 | * @param args 38 | * @throws IOException 39 | */ 40 | public static void main(String[] args) throws IOException { 41 | if (args.length < 2) { 42 | System.err.println("Usage all|variable|method|type"); 43 | return; 44 | } 45 | final String name = args[1]; 46 | final IScopeExtractor scopeExtractor = getScopeExtractorByName(name); 47 | 48 | System.out.println(scopeExtractor.getFromFile(new File(args[0]))); 49 | 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/codeutils/scopes/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Code that has to do with extracting data from scopes. 3 | * @author Miltos Allamanis 4 | * 5 | */ 6 | package codemining.java.codeutils.scopes; -------------------------------------------------------------------------------- /src/main/java/codemining/java/tokenizers/JavaIdentifierAnnotatedTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.tokenizers; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.SortedMap; 9 | import java.util.logging.Logger; 10 | 11 | import org.apache.commons.lang.exception.ExceptionUtils; 12 | import org.eclipse.jdt.core.compiler.ITerminalSymbols; 13 | import org.eclipse.jdt.core.compiler.InvalidInputException; 14 | import org.eclipse.jdt.internal.core.util.PublicScanner; 15 | 16 | import codemining.java.codeutils.IdentifierPerType; 17 | 18 | import com.google.common.collect.Lists; 19 | import com.google.common.collect.Maps; 20 | import com.google.common.collect.Range; 21 | import com.google.common.collect.RangeSet; 22 | 23 | /** 24 | * A Java tokenizer that annotates the type of the identifier tokens. 25 | * 26 | * @author Miltos Allamanis 27 | * 28 | */ 29 | public class JavaIdentifierAnnotatedTokenizer extends JavaTokenizer { 30 | 31 | private static class IdentifierTypeRetriever { 32 | 33 | final Map> variables; 34 | final Map> methods; 35 | final Map> types; 36 | 37 | public IdentifierTypeRetriever(final char[] code) throws Exception { 38 | variables = IdentifierPerType.getVariableIdentifiersRanges(code); 39 | methods = IdentifierPerType.getMethodIdentifiersRanges(code); 40 | types = IdentifierPerType.getTypeIdentifiersRanges(code); 41 | } 42 | 43 | public String getIdentifierType(final PublicScanner scanner) { 44 | final int startPos = scanner.getCurrentTokenStartPosition(); 45 | final int endPos = scanner.getCurrentTokenEndPosition(); 46 | final Range tokenRange = Range 47 | .closedOpen(startPos, endPos); 48 | 49 | final String tokenName = scanner.getCurrentTokenString(); 50 | 51 | // TODO: Find the tightest of all 52 | if (isInSet(tokenName, tokenRange, variables)) { 53 | return IDENTIFIER_PREFIX + "_VAR"; 54 | } else if (isInSet(tokenName, tokenRange, methods)) { 55 | return IDENTIFIER_PREFIX + "_METHOD"; 56 | } else if (isInSet(tokenName, tokenRange, types)) { 57 | return IDENTIFIER_PREFIX + "_TYPE"; 58 | } 59 | return IDENTIFIER_PREFIX + "_UNK"; 60 | } 61 | 62 | private boolean isInSet(final String token, 63 | final Range tokenRange, 64 | final Map> set) { 65 | if (!set.containsKey(token)) { 66 | return false; 67 | } 68 | // TODO: Check if in scope 69 | return true; 70 | } 71 | } 72 | 73 | private static final long serialVersionUID = -4779695380807928575L; 74 | 75 | private static final Logger LOGGER = Logger 76 | .getLogger(JavaIdentifierAnnotatedTokenizer.class.getName()); 77 | 78 | public static final String IDENTIFIER_PREFIX = "IDENTIFIER"; 79 | 80 | public static final String LITERAL = "LITERAL"; 81 | 82 | public JavaIdentifierAnnotatedTokenizer() { 83 | super(); 84 | } 85 | 86 | public JavaIdentifierAnnotatedTokenizer(final boolean tokenizeComments) { 87 | super(tokenizeComments); 88 | } 89 | 90 | @Override 91 | public SortedMap fullTokenListWithPos(final char[] code) { 92 | IdentifierTypeRetriever idRetriever; 93 | try { 94 | idRetriever = new IdentifierTypeRetriever(code); 95 | } catch (final Exception e) { 96 | throw new IllegalArgumentException(e); 97 | } 98 | 99 | final PublicScanner scanner = prepareScanner(); 100 | final SortedMap tokens = Maps.newTreeMap(); 101 | tokens.put(-1, new FullToken(SENTENCE_START, SENTENCE_START)); 102 | tokens.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END, SENTENCE_END)); 103 | scanner.setSource(code); 104 | while (!scanner.atEnd()) { 105 | do { 106 | try { 107 | final int token = scanner.getNextToken(); 108 | if (token == ITerminalSymbols.TokenNameEOF) { 109 | break; 110 | } 111 | final String nxtToken = transformToken(token, 112 | scanner.getCurrentTokenString()); 113 | final String tokenType = getTokenType(token, scanner, 114 | idRetriever); 115 | 116 | final int position = scanner.getCurrentTokenStartPosition(); 117 | tokens.put(position, new FullToken(nxtToken, tokenType)); 118 | } catch (final InvalidInputException e) { 119 | LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); 120 | } 121 | } while (!scanner.atEnd()); 122 | 123 | } 124 | return tokens; 125 | } 126 | 127 | @Override 128 | public String getIdentifierType() { 129 | throw new UnsupportedOperationException( 130 | "There is no single indentifier type for this tokenizer."); 131 | } 132 | 133 | @Override 134 | public FullToken getTokenFromString(final String token) { 135 | throw new UnsupportedOperationException( 136 | "Cannot compute token from just a string using this tokenizer."); 137 | } 138 | 139 | @Override 140 | public List getTokenListFromCode(final char[] code) { 141 | IdentifierTypeRetriever idRetriever; 142 | try { 143 | idRetriever = new IdentifierTypeRetriever(code); 144 | } catch (final Exception e) { 145 | throw new IllegalArgumentException(e); 146 | } 147 | 148 | final List tokens = Lists.newArrayList(); 149 | tokens.add(new FullToken(SENTENCE_START, SENTENCE_START)); 150 | final PublicScanner scanner = prepareScanner(); 151 | scanner.setSource(code); 152 | do { 153 | try { 154 | final int token = scanner.getNextToken(); 155 | if (token == ITerminalSymbols.TokenNameEOF) { 156 | break; 157 | } 158 | final String nxtToken = transformToken(token, 159 | scanner.getCurrentTokenString()); 160 | 161 | final String tokenType = getTokenType(token, scanner, 162 | idRetriever); 163 | tokens.add(new FullToken(stripTokenIfNeeded(nxtToken), 164 | tokenType)); 165 | } catch (final InvalidInputException e) { 166 | LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); 167 | } catch (final StringIndexOutOfBoundsException e) { 168 | LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); 169 | } 170 | } while (!scanner.atEnd()); 171 | tokens.add(new FullToken(SENTENCE_END, SENTENCE_END)); 172 | return tokens; 173 | } 174 | 175 | private final String getTokenType(final int tokenType, 176 | final PublicScanner scanner, final IdentifierTypeRetriever retriever) { 177 | if (tokenType == ITerminalSymbols.TokenNameIdentifier) { 178 | return retriever.getIdentifierType(scanner); 179 | } else if (JavaTokenTypeTokenizer.isLiteralToken(tokenType)) { 180 | return LITERAL; 181 | } else { 182 | return scanner.getCurrentTokenString(); 183 | } 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/tokenizers/JavaTokenizerSomeTokens.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.tokenizers; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import java.util.Set; 9 | import java.util.SortedMap; 10 | import java.util.TreeMap; 11 | import java.util.logging.Logger; 12 | 13 | import org.apache.commons.lang.exception.ExceptionUtils; 14 | import org.eclipse.jdt.core.compiler.ITerminalSymbols; 15 | 16 | import codemining.java.codeutils.IdentifierPerType; 17 | import codemining.util.SettingsLoader; 18 | 19 | /** 20 | * @author Miltos Allamanis 21 | * 22 | */ 23 | public class JavaTokenizerSomeTokens extends JavaTokenizer { 24 | public static final String GENERIC_IDENTIFIER = "%IDENTIFIER%"; 25 | 26 | private static final long serialVersionUID = -8566029315110514304L; 27 | 28 | private static final Logger LOGGER = Logger 29 | .getLogger(JavaTokenizerSomeTokens.class.getName()); 30 | 31 | private Set methodIds; 32 | private Set typeIds; 33 | private Set varIds; 34 | 35 | private final boolean REMOVE_METHOD_IDENTIFIERS = SettingsLoader 36 | .getBooleanSetting("removeMethodIdentifiers", false); 37 | private final boolean REMOVE_VAR_IDENTIFIERS = SettingsLoader 38 | .getBooleanSetting("removeVariableIdentifiers", false); 39 | private final boolean REMOVE_TYPE_IDENTIFIERS = SettingsLoader 40 | .getBooleanSetting("removeTypeIdentifiers", false); 41 | 42 | private void generateValidTokList(final char[] code) throws Exception { 43 | methodIds = IdentifierPerType.getMethodIdentifiers(code); 44 | typeIds = IdentifierPerType.getTypeIdentifiers(code); 45 | varIds = IdentifierPerType.getVariableIdentifiers(code); 46 | } 47 | 48 | @Override 49 | public List tokenListFromCode(final char[] code) { 50 | try { 51 | generateValidTokList(code); 52 | return super.tokenListFromCode(code); 53 | } catch (final Exception e) { 54 | LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); 55 | } 56 | return new ArrayList(); 57 | } 58 | 59 | @Override 60 | public SortedMap tokenListWithPos(final char[] code) { 61 | try { 62 | generateValidTokList(code); 63 | return super.tokenListWithPos(code); 64 | } catch (final Exception e) { 65 | LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); 66 | } 67 | return new TreeMap(); 68 | } 69 | 70 | @Override 71 | protected String transformToken(final int tokenType, final String token) { 72 | if (tokenType != ITerminalSymbols.TokenNameIdentifier) { 73 | return token; 74 | } 75 | if (methodIds.contains(token) && REMOVE_METHOD_IDENTIFIERS) { 76 | return GENERIC_IDENTIFIER; 77 | } else if (varIds.contains(token) && REMOVE_VAR_IDENTIFIERS) { 78 | return GENERIC_IDENTIFIER; 79 | } else if (typeIds.contains(token) && REMOVE_TYPE_IDENTIFIERS) { 80 | return GENERIC_IDENTIFIER; 81 | } 82 | return token; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/codemining/java/tokenizers/JavaWidthAnnotatedWhitespaceTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.tokenizers; 5 | 6 | import java.util.List; 7 | import java.util.Map.Entry; 8 | import java.util.SortedMap; 9 | 10 | import codemining.util.SettingsLoader; 11 | 12 | import com.google.common.collect.Lists; 13 | import com.google.common.collect.Maps; 14 | 15 | /** 16 | * A Java whitespace tokenizer that annotates its non-whitespace tokens with 17 | * width data. 18 | * 19 | * @author Miltos Allamanis 20 | * 21 | */ 22 | public class JavaWidthAnnotatedWhitespaceTokenizer extends 23 | JavaWhitespaceTokenizer { 24 | 25 | private static final long serialVersionUID = -3365546393414164809L; 26 | 27 | /** 28 | * Constant for getting the size quantization of width, column of the 29 | * annotation. 30 | */ 31 | public static final int SIZE_QUANTIAZATION = (int) SettingsLoader 32 | .getNumericSetting("sizeQuantization", 20); 33 | 34 | /** 35 | * @param token 36 | * @return 37 | */ 38 | protected String annotatedTokenToString(final WhitespaceAnnotatedToken token) { 39 | final int columnQ = token.column / SIZE_QUANTIAZATION; 40 | final int widthQ = token.width / SIZE_QUANTIAZATION; 41 | final String annotatedToken = token.token + "_" + columnQ + "_" 42 | + widthQ; 43 | return annotatedToken; 44 | } 45 | 46 | @Override 47 | public SortedMap fullTokenListWithPos(final char[] code) { 48 | final TokenizerImplementation tok = new TokenizerImplementation(); 49 | final SortedMap annotatedTokens = tok 50 | .tokenListWithPosAndWidth(code); 51 | final SortedMap tokens = Maps.newTreeMap(); 52 | 53 | for (final Entry entry : annotatedTokens 54 | .entrySet()) { 55 | tokens.put(entry.getKey(), new FullToken( 56 | annotatedTokenToString(entry.getValue()), 57 | entry.getValue().tokenType)); 58 | } 59 | return tokens; 60 | } 61 | 62 | @Override 63 | public FullToken getTokenFromString(final String token) { 64 | throw new UnsupportedOperationException(); 65 | } 66 | 67 | @Override 68 | public List getTokenListFromCode(final char[] code) { 69 | final TokenizerImplementation tok = new TokenizerImplementation(); 70 | final List annotatedTokens = tok 71 | .getTokensWithWidthData(code); 72 | final List tokens = Lists.newArrayList(); 73 | for (final WhitespaceAnnotatedToken token : annotatedTokens) { 74 | if (token.token.startsWith("WS_")) { 75 | tokens.add(new FullToken(token.token, token.tokenType)); 76 | } else { 77 | final String annotatedToken = annotatedTokenToString(token); 78 | tokens.add(new FullToken(annotatedToken, token.tokenType)); 79 | } 80 | } 81 | return tokens; 82 | } 83 | 84 | @Override 85 | public List tokenListFromCode(final char[] code) { 86 | final TokenizerImplementation tok = new TokenizerImplementation(); 87 | final List annotatedTokens = tok 88 | .getTokensWithWidthData(code); 89 | final List tokens = Lists.newArrayList(); 90 | for (final WhitespaceAnnotatedToken token : annotatedTokens) { 91 | if (token.token.startsWith("WS_")) { 92 | tokens.add(token.token); 93 | } else { 94 | tokens.add(annotatedTokenToString(token)); 95 | } 96 | } 97 | return tokens; 98 | } 99 | 100 | @Override 101 | public SortedMap tokenListWithPos(final char[] code) { 102 | final TokenizerImplementation tok = new TokenizerImplementation(); 103 | final SortedMap annotatedTokens = tok 104 | .tokenListWithPosAndWidth(code); 105 | final SortedMap tokens = Maps.newTreeMap(); 106 | 107 | for (final Entry entry : annotatedTokens 108 | .entrySet()) { 109 | tokens.put(entry.getKey(), annotatedTokenToString(entry.getValue())); 110 | } 111 | return tokens; 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/codemining/js/codedata/metrics/IJavascriptFileMetricRetriever.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.js.codedata.metrics; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | 9 | import org.eclipse.wst.jsdt.core.dom.ASTNode; 10 | 11 | /** 12 | * An interface for all the classes that can return a metric 13 | * 14 | * @author Miltos Allamanis 15 | * 16 | */ 17 | public interface IJavascriptFileMetricRetriever { 18 | double getMetricForASTNode(final ASTNode node); 19 | 20 | double getMetricForFile(final File file) throws IOException; 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/codemining/js/codedata/metrics/JavascriptCyclomaticCalculator.java: -------------------------------------------------------------------------------- 1 | package codemining.js.codedata.metrics; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.logging.Logger; 6 | 7 | import org.eclipse.wst.jsdt.core.dom.ASTNode; 8 | import org.eclipse.wst.jsdt.core.dom.ASTVisitor; 9 | import org.eclipse.wst.jsdt.core.dom.CatchClause; 10 | import org.eclipse.wst.jsdt.core.dom.ConditionalExpression; 11 | import org.eclipse.wst.jsdt.core.dom.DoStatement; 12 | import org.eclipse.wst.jsdt.core.dom.EnhancedForStatement; 13 | import org.eclipse.wst.jsdt.core.dom.ForStatement; 14 | import org.eclipse.wst.jsdt.core.dom.FunctionDeclaration; 15 | import org.eclipse.wst.jsdt.core.dom.IfStatement; 16 | import org.eclipse.wst.jsdt.core.dom.SwitchCase; 17 | import org.eclipse.wst.jsdt.core.dom.WhileStatement; 18 | 19 | import codemining.js.codeutils.JavascriptASTExtractor; 20 | 21 | /** 22 | * Compute McCabe's Cyclomatic Complexity. 23 | * 24 | * @author Miltos Allamanis 25 | * 26 | */ 27 | public class JavascriptCyclomaticCalculator implements 28 | IJavascriptFileMetricRetriever { 29 | 30 | /** 31 | * Visit all "junctions" in an AST and increment complexity. 32 | * 33 | */ 34 | private static class JunctionVisitor extends ASTVisitor { 35 | int complexity = 0; 36 | 37 | @Override 38 | public boolean visit(final CatchClause arg0) { 39 | complexity++; 40 | return super.visit(arg0); 41 | } 42 | 43 | @Override 44 | public boolean visit(final ConditionalExpression arg0) { 45 | complexity++; 46 | return super.visit(arg0); 47 | } 48 | 49 | @Override 50 | public boolean visit(final DoStatement arg0) { 51 | complexity++; 52 | return super.visit(arg0); 53 | } 54 | 55 | @Override 56 | public boolean visit(final EnhancedForStatement arg0) { 57 | complexity++; 58 | return super.visit(arg0); 59 | } 60 | 61 | @Override 62 | public boolean visit(final ForStatement arg0) { 63 | complexity++; 64 | return super.visit(arg0); 65 | } 66 | 67 | @Override 68 | public boolean visit(final IfStatement arg0) { 69 | complexity++; 70 | return super.visit(arg0); 71 | } 72 | 73 | @Override 74 | public boolean visit(final FunctionDeclaration arg0) { 75 | /* 76 | * if (isConcrete(arg0)) { complexity.startMethod(); return 77 | * super.visit(arg0); } return false; 78 | */ 79 | complexity++; // TODO: Not exactly true, but we'll use that 80 | return super.visit(arg0); 81 | } 82 | 83 | @Override 84 | public boolean visit(final SwitchCase arg0) { 85 | complexity++; 86 | return super.visit(arg0); 87 | } 88 | 89 | @Override 90 | public boolean visit(final WhileStatement arg0) { 91 | complexity++; 92 | return super.visit(arg0); 93 | } 94 | } 95 | 96 | private static final Logger LOGGER = Logger 97 | .getLogger(JavascriptCyclomaticCalculator.class.getName()); 98 | 99 | public int getComplexity(final File file) throws IOException { 100 | final JavascriptASTExtractor ast = new JavascriptASTExtractor(false); 101 | final JunctionVisitor visitor = new JunctionVisitor(); 102 | ast.getAST(file).accept(visitor); 103 | return visitor.complexity; 104 | } 105 | 106 | @Override 107 | public double getMetricForASTNode(final ASTNode node) { 108 | final JunctionVisitor visitor = new JunctionVisitor(); 109 | node.accept(visitor); 110 | return visitor.complexity; 111 | } 112 | 113 | @Override 114 | public double getMetricForFile(final File file) throws IOException { 115 | return getComplexity(file); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/codemining/js/codedata/metrics/JavascriptFunctionCounter.java: -------------------------------------------------------------------------------- 1 | package codemining.js.codedata.metrics; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.List; 6 | 7 | import org.apache.commons.io.FileUtils; 8 | import org.eclipse.wst.jsdt.core.dom.ASTNode; 9 | import org.eclipse.wst.jsdt.core.dom.ASTVisitor; 10 | import org.eclipse.wst.jsdt.core.dom.JavaScriptUnit; 11 | 12 | import codemining.js.codeutils.JavascriptASTExtractor; 13 | 14 | public final class JavascriptFunctionCounter { 15 | 16 | public static class MethodClassCountVisitor extends ASTVisitor { 17 | 18 | public int noFunctions = 0; 19 | 20 | @Override 21 | public void postVisit(final ASTNode node) { 22 | 23 | if (node.getNodeType() == ASTNode.FUNCTION_DECLARATION) 24 | noFunctions++; 25 | } 26 | 27 | } 28 | 29 | public static void main(final String[] args) throws IOException { 30 | if (args.length != 1) { 31 | System.err.println("Usage "); 32 | System.exit(-1); 33 | } 34 | final File directory = new File(args[0]); 35 | countFunctions(directory); 36 | } 37 | 38 | public static void countFunctions(final File projectDir) throws IOException { 39 | 40 | System.out.println("\n===== Project " + projectDir); 41 | final MethodClassCountVisitor mccv = new MethodClassCountVisitor(); 42 | final JavascriptASTExtractor astExtractor = new JavascriptASTExtractor( 43 | false); 44 | 45 | final List files = (List) FileUtils.listFiles(projectDir, 46 | new String[] { "js" }, true); 47 | 48 | int count = 0; 49 | for (final File file : files) { 50 | 51 | final JavaScriptUnit cu = astExtractor.getAST(file); 52 | cu.accept(mccv); 53 | 54 | if (count % 1000 == 0) 55 | System.out.println("At file " + count + " of " + files.size()); 56 | count++; 57 | } 58 | 59 | System.out.println("Project " + projectDir); 60 | System.out.println("No. *.js files " + files.size()); 61 | System.out.println("No. Functions: " + mccv.noFunctions); 62 | } 63 | 64 | private JavascriptFunctionCounter() { 65 | 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/codemining/js/codeutils/FunctionRetriever.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.js.codeutils; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Map; 9 | 10 | import org.eclipse.wst.jsdt.core.dom.ASTNode; 11 | import org.eclipse.wst.jsdt.core.dom.ASTVisitor; 12 | import org.eclipse.wst.jsdt.core.dom.FunctionDeclaration; 13 | import org.eclipse.wst.jsdt.core.dom.JavaScriptUnit; 14 | 15 | import com.google.common.collect.Maps; 16 | 17 | /** 18 | * A utility class that retrieves the methods (as AST Nodes) of a file. 19 | * 20 | * @author Miltos Allamanis 21 | * 22 | */ 23 | public final class FunctionRetriever extends ASTVisitor { 24 | 25 | public static Map getFunctionNodes( 26 | final File file) throws IOException { 27 | final JavascriptASTExtractor astExtractor = new JavascriptASTExtractor( 28 | false); 29 | final FunctionRetriever m = new FunctionRetriever(); 30 | final JavaScriptUnit cu = astExtractor.getAST(file); 31 | cu.accept(m); 32 | return m.functions; 33 | } 34 | 35 | public static Map getFunctionNodes( 36 | final String file) throws Exception { 37 | final JavascriptASTExtractor astExtractor = new JavascriptASTExtractor( 38 | false); 39 | final FunctionRetriever m = new FunctionRetriever(); 40 | final ASTNode cu = astExtractor.getCompilationUnitAstNode(file); 41 | cu.accept(m); 42 | return m.functions; 43 | } 44 | 45 | private final Map functions = Maps 46 | .newTreeMap(); 47 | 48 | private FunctionRetriever() { 49 | 50 | } 51 | 52 | @Override 53 | public boolean visit(final FunctionDeclaration node) { 54 | functions.put(node.getName().toString(), node); 55 | return super.visit(node); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/codemining/js/codeutils/binding/JavascriptExactVariableBindingsExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.js.codeutils.binding; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.util.Collections; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Map.Entry; 12 | import java.util.Set; 13 | 14 | import org.apache.commons.lang.NotImplementedException; 15 | import org.eclipse.wst.jsdt.core.dom.*; 16 | 17 | import codemining.js.codeutils.JavascriptASTExtractor; 18 | import codemining.languagetools.bindings.TokenNameBinding; 19 | 20 | import com.google.common.collect.Lists; 21 | import com.google.common.collect.Maps; 22 | import com.google.common.collect.Sets; 23 | 24 | /** 25 | * Retrieve the variable bindings, given an ASTNode. This finds exact bindings 26 | * to the detriment of recall. Partial code snippets are not supported. 27 | * 28 | * @author Miltos Allamanis 29 | * 30 | */ 31 | public class JavascriptExactVariableBindingsExtractor extends 32 | AbstractJavascriptNameBindingsExtractor { 33 | 34 | /** 35 | * This class looks for declarations of variables and the references to 36 | * them. 37 | * 38 | */ 39 | private static class VariableBindingFinder extends ASTVisitor { 40 | /** 41 | * Map of variables (represented as bindings) to all token positions 42 | * where the variable is referenced. 43 | */ 44 | Map> variableScope = Maps 45 | .newIdentityHashMap(); 46 | 47 | private void addBinding(final IVariableBinding binding) { 48 | variableScope.put(binding, Lists. newArrayList()); 49 | } 50 | 51 | /** 52 | * @param binding 53 | */ 54 | private void addBindingData(final IVariableBinding binding, 55 | final ASTNode nameNode) { 56 | if (binding == null) { 57 | return; // Sorry, cannot do anything. 58 | } 59 | final List thisVarBindings = checkNotNull( 60 | variableScope.get(binding), 61 | "Binding was not previously found"); 62 | thisVarBindings.add(nameNode); 63 | } 64 | 65 | /** 66 | * Looks for field declarations (i.e. class member variables). 67 | */ 68 | @Override 69 | public boolean visit(final FieldDeclaration node) { 70 | for (final Object fragment : node.fragments()) { 71 | final VariableDeclarationFragment frag = (VariableDeclarationFragment) fragment; 72 | final IVariableBinding binding = frag.resolveBinding(); 73 | addBinding(binding); 74 | } 75 | return true; 76 | } 77 | 78 | /** 79 | * Visits {@link SimpleName} AST nodes. Resolves the binding of the 80 | * simple name and looks for it in the {@link #variableScope} map. If 81 | * the binding is found, this is a reference to a variable. 82 | * 83 | * @param node 84 | * the node to visit 85 | */ 86 | @Override 87 | public boolean visit(final SimpleName node) { 88 | final IBinding binding = node.resolveBinding(); 89 | if (variableScope.containsKey(binding)) { 90 | addBindingData((IVariableBinding) binding, node); 91 | } 92 | return true; 93 | } 94 | 95 | /** 96 | * Looks for Method Parameters. 97 | */ 98 | @Override 99 | public boolean visit(final SingleVariableDeclaration node) { 100 | final IVariableBinding binding = node.resolveBinding(); 101 | if (binding != null) { 102 | addBinding(binding); 103 | } 104 | return true; 105 | } 106 | 107 | /** 108 | * Looks for variables declared in for loops. 109 | */ 110 | @Override 111 | public boolean visit(final VariableDeclarationExpression node) { 112 | for (final Object fragment : node.fragments()) { 113 | final VariableDeclarationFragment frag = (VariableDeclarationFragment) fragment; 114 | final IVariableBinding binding = frag.resolveBinding(); 115 | if (binding != null) { 116 | addBinding(binding); 117 | } 118 | } 119 | return true; 120 | } 121 | 122 | /** 123 | * Looks for local variable declarations. For every declaration of a 124 | * variable, the parent {@link Block} denoting the variable's scope is 125 | * stored in {@link #variableScope} map. 126 | * 127 | * @param node 128 | * the node to visit 129 | */ 130 | @Override 131 | public boolean visit(final VariableDeclarationStatement node) { 132 | for (final Object fragment : node.fragments()) { 133 | final VariableDeclarationFragment frag = (VariableDeclarationFragment) fragment; 134 | final IVariableBinding binding = frag.resolveBinding(); 135 | if (binding != null) { 136 | addBinding(binding); 137 | } 138 | } 139 | return true; 140 | } 141 | } 142 | 143 | @Override 144 | protected JavascriptASTExtractor createExtractor() { 145 | return new JavascriptASTExtractor(true); 146 | } 147 | 148 | @Override 149 | public Set getAvailableFeatures() { 150 | return Collections.emptySet(); 151 | } 152 | 153 | @Override 154 | public Set> getNameBindings(final ASTNode node) { 155 | final VariableBindingFinder bindingFinder = new VariableBindingFinder(); 156 | node.accept(bindingFinder); 157 | 158 | final Set> nameBindings = Sets.newHashSet(); 159 | for (final Entry> variableBindings : bindingFinder.variableScope 160 | .entrySet()) { 161 | final Set boundNodes = Sets.newIdentityHashSet(); 162 | boundNodes.addAll(variableBindings.getValue()); 163 | nameBindings.add(boundNodes); 164 | } 165 | return nameBindings; 166 | } 167 | 168 | @Override 169 | public List getNameBindings(final String code) { 170 | throw new UnsupportedOperationException( 171 | "Partial snippets cannot be resolved due to the " 172 | + "lack of support from Eclipse JSDT. Consider using the approximate binding extractor."); 173 | } 174 | 175 | @Override 176 | public void setActiveFeatures(final Set activeFeatures) { 177 | throw new NotImplementedException(); 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /src/main/java/codemining/js/codeutils/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * JavaScript code 3 | */ 4 | package codemining.js.codeutils; -------------------------------------------------------------------------------- /src/main/java/codemining/langs/codeutils/AbstractJygmentsTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.langs.codeutils; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.List; 9 | import java.util.SortedMap; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.apache.commons.io.filefilter.AbstractFileFilter; 13 | import org.apache.commons.io.filefilter.RegexFileFilter; 14 | 15 | import codemining.languagetools.ITokenizer; 16 | 17 | import com.google.common.collect.Lists; 18 | import com.google.common.collect.Maps; 19 | import com.threecrickets.jygments.ResolutionException; 20 | import com.threecrickets.jygments.grammar.Lexer; 21 | import com.threecrickets.jygments.grammar.Token; 22 | import com.threecrickets.jygments.grammar.TokenType; 23 | 24 | /** 25 | * Tokenize all languages 26 | * 27 | * @author Miltos Allamanis 28 | * 29 | */ 30 | public abstract class AbstractJygmentsTokenizer implements ITokenizer { 31 | 32 | final Lexer lexer; 33 | 34 | private final RegexFileFilter codeFilter; 35 | 36 | private static final long serialVersionUID = 8826779180772076954L; 37 | 38 | public AbstractJygmentsTokenizer(final String fileSuffix) 39 | throws ResolutionException { 40 | lexer = Lexer.getForFileName("sample." + fileSuffix); 41 | // lexer.setStripAll(true); 42 | // lexer.setStripNewLines(true); 43 | // lexer.setTabSize(1); 44 | codeFilter = new RegexFileFilter(".*\\." + fileSuffix + "$"); 45 | } 46 | 47 | /* 48 | * (non-Javadoc) 49 | * 50 | * @see codemining.languagetools.ITokenizer#getFileFilter() 51 | */ 52 | @Override 53 | public AbstractFileFilter getFileFilter() { 54 | return codeFilter; 55 | } 56 | 57 | @Override 58 | public List getTokenListFromCode(final File codeFile) 59 | throws IOException { 60 | return getTokenListFromCode(FileUtils.readFileToString(codeFile) 61 | .toCharArray()); 62 | } 63 | 64 | public abstract String getTokenString(final Token tok); 65 | 66 | /** 67 | * @param tok 68 | * @return 69 | */ 70 | protected boolean isProgramToken(final Token tok) { 71 | final TokenType tokenType = tok.getType(); 72 | return tokenType == TokenType.Comment 73 | || tokenType == TokenType.Comment_Multiline 74 | || tokenType == TokenType.Comment_Single 75 | || tokenType == TokenType.Comment_Special 76 | || tokenType == TokenType.Comment_Preproc 77 | || tokenType == TokenType.Text || tok.getValue().equals(" ") 78 | || tok.getValue().equals("\n") || tok.getValue().equals("\t"); 79 | } 80 | 81 | /* 82 | * (non-Javadoc) 83 | * 84 | * @see codemining.languagetools.ITokenizer#tokenListFromCode(char[]) 85 | */ 86 | @Override 87 | public List tokenListFromCode(final char[] code) { 88 | final Iterable tokens = lexer.getTokens(new String(code)); 89 | final List toks = Lists.newArrayList(); 90 | toks.add(SENTENCE_START); 91 | for (final Token tok : tokens) { 92 | if (isProgramToken(tok)) { 93 | continue; 94 | } 95 | toks.add(getTokenString(tok)); 96 | } 97 | toks.add(SENTENCE_END); 98 | return toks; 99 | } 100 | 101 | @Override 102 | public List tokenListFromCode(final File codeFile) 103 | throws IOException { 104 | return tokenListFromCode(FileUtils.readFileToString(codeFile) 105 | .toCharArray()); 106 | } 107 | 108 | /* 109 | * (non-Javadoc) 110 | * 111 | * @see codemining.languagetools.ITokenizer#tokenListWithPos(char[]) 112 | */ 113 | @Override 114 | public SortedMap tokenListWithPos(final char[] code) { 115 | final Iterable tokens = lexer.getTokens(new String(code)); 116 | final SortedMap tokensWithPos = Maps.newTreeMap(); 117 | tokensWithPos.put(-1, SENTENCE_START); 118 | tokensWithPos.put(Integer.MAX_VALUE, SENTENCE_END); 119 | for (final Token tok : tokens) { 120 | if (isProgramToken(tok)) { 121 | continue; 122 | } 123 | tokensWithPos.put(tok.getPos(), getTokenString(tok)); 124 | } 125 | return tokensWithPos; 126 | } 127 | 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/codemining/langs/codeutils/CodeTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.langs.codeutils; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Collection; 9 | import java.util.List; 10 | import java.util.SortedMap; 11 | 12 | import org.apache.commons.io.FileUtils; 13 | 14 | import com.google.common.collect.Lists; 15 | import com.google.common.collect.Maps; 16 | import com.threecrickets.jygments.ResolutionException; 17 | import com.threecrickets.jygments.grammar.Token; 18 | import com.threecrickets.jygments.grammar.TokenType; 19 | 20 | /** 21 | * Tokenize the code using the real tokens. 22 | * 23 | * @author Miltos Allamanis 24 | * 25 | */ 26 | public class CodeTokenizer extends AbstractJygmentsTokenizer { 27 | 28 | private static final long serialVersionUID = -981980819807626795L; 29 | 30 | /** 31 | * @param fileSuffix 32 | * @throws ResolutionException 33 | */ 34 | public CodeTokenizer(final String fileSuffix) throws ResolutionException { 35 | super(fileSuffix); 36 | } 37 | 38 | @Override 39 | public SortedMap fullTokenListWithPos(final char[] code) { 40 | final Iterable tokens = lexer.getTokens(new String(code)); 41 | final SortedMap tokensWithPos = Maps.newTreeMap(); 42 | tokensWithPos.put(-1, new FullToken(SENTENCE_START, SENTENCE_START)); 43 | tokensWithPos.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END, 44 | SENTENCE_END)); 45 | for (final Token tok : tokens) { 46 | if (isProgramToken(tok)) { 47 | continue; 48 | } 49 | tokensWithPos.put(tok.getPos(), new FullToken(getTokenString(tok), 50 | tok.getType().getName())); 51 | } 52 | return tokensWithPos; 53 | } 54 | 55 | @Override 56 | public String getIdentifierType() { 57 | return TokenType.Name.getName(); 58 | } 59 | 60 | @Override 61 | public Collection getKeywordTypes() { 62 | return Lists.newArrayList(TokenType.Keyword.getName()); 63 | } 64 | 65 | @Override 66 | public Collection getLiteralTypes() { 67 | return Lists.newArrayList(TokenType.Literal.getName()); 68 | } 69 | 70 | @Override 71 | public FullToken getTokenFromString(final String token) { 72 | return getTokenListFromCode(token.toCharArray()).get(1); 73 | } 74 | 75 | @Override 76 | public List getTokenListFromCode(final char[] code) { 77 | final Iterable tokens = lexer.getTokens(new String(code)); 78 | final List toks = Lists.newArrayList(); 79 | toks.add(new FullToken(SENTENCE_START, SENTENCE_START)); 80 | for (final Token tok : tokens) { 81 | if (isProgramToken(tok)) { 82 | continue; 83 | } 84 | toks.add(new FullToken(getTokenString(tok), tok.getType().getName())); 85 | } 86 | toks.add(new FullToken(SENTENCE_END, SENTENCE_END)); 87 | return toks; 88 | } 89 | 90 | @Override 91 | public List getTokenListFromCode(final File codeFile) 92 | throws IOException { 93 | return getTokenListFromCode(FileUtils.readFileToString(codeFile) 94 | .toCharArray()); 95 | } 96 | 97 | /* 98 | * (non-Javadoc) 99 | * 100 | * @see codemining.langs.codeutils.AbstractCodeTokenizer#getTokenString(com. 101 | * threecrickets.jygments.grammar.Token) 102 | */ 103 | @Override 104 | public String getTokenString(final Token tok) { 105 | return tok.getValue().trim(); 106 | } 107 | 108 | @Override 109 | public SortedMap tokenListWithPos(final File file) 110 | throws IOException { 111 | return fullTokenListWithPos(FileUtils.readFileToString(file) 112 | .toCharArray()); 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /src/main/java/codemining/langs/codeutils/TokenTypeTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.langs.codeutils; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Collection; 9 | import java.util.List; 10 | import java.util.SortedMap; 11 | 12 | import org.apache.commons.io.FileUtils; 13 | 14 | import com.google.common.collect.Lists; 15 | import com.google.common.collect.Maps; 16 | import com.threecrickets.jygments.ResolutionException; 17 | import com.threecrickets.jygments.grammar.Token; 18 | 19 | /** 20 | * Tokenize the code but return only the token types. 21 | * 22 | * @author Miltos Allamanis 23 | * 24 | */ 25 | public class TokenTypeTokenizer extends AbstractJygmentsTokenizer { 26 | 27 | private static final long serialVersionUID = 5822480321022420348L; 28 | 29 | /** 30 | * @param fileSuffix 31 | * @throws ResolutionException 32 | */ 33 | public TokenTypeTokenizer(final String fileSuffix) 34 | throws ResolutionException { 35 | super(fileSuffix); 36 | } 37 | 38 | @Override 39 | public SortedMap fullTokenListWithPos(final char[] code) { 40 | final Iterable tokens = lexer.getTokens(new String(code)); 41 | final SortedMap tokensWithPos = Maps.newTreeMap(); 42 | tokensWithPos.put(-1, new FullToken(SENTENCE_START, SENTENCE_START)); 43 | tokensWithPos.put(Integer.MAX_VALUE, new FullToken(SENTENCE_END, 44 | SENTENCE_END)); 45 | for (final Token tok : tokens) { 46 | if (isProgramToken(tok)) { 47 | continue; 48 | } 49 | tokensWithPos.put(tok.getPos(), new FullToken(getTokenString(tok), 50 | "")); 51 | } 52 | return tokensWithPos; 53 | } 54 | 55 | @Override 56 | public String getIdentifierType() { 57 | throw new IllegalArgumentException("Token types may not be computed"); 58 | } 59 | 60 | @Override 61 | public Collection getKeywordTypes() { 62 | throw new IllegalArgumentException("Token types may not be computed"); 63 | } 64 | 65 | @Override 66 | public Collection getLiteralTypes() { 67 | throw new IllegalArgumentException("Token types may not be computed"); 68 | } 69 | 70 | @Override 71 | public FullToken getTokenFromString(final String token) { 72 | return new FullToken(token, ""); 73 | } 74 | 75 | @Override 76 | public List getTokenListFromCode(final char[] code) { 77 | final Iterable tokens = lexer.getTokens(new String(code)); 78 | final List toks = Lists.newArrayList(); 79 | toks.add(new FullToken(SENTENCE_START, SENTENCE_START)); 80 | for (final Token tok : tokens) { 81 | if (isProgramToken(tok)) { 82 | continue; 83 | } 84 | toks.add(new FullToken(tok.getType().getName(), "")); 85 | } 86 | toks.add(new FullToken(SENTENCE_END, SENTENCE_END)); 87 | return toks; 88 | } 89 | 90 | /* 91 | * (non-Javadoc) 92 | * 93 | * @see codemining.langs.codeutils.AbstractCodeTokenizer#getTokenString(com. 94 | * threecrickets.jygments.grammar.Token) 95 | */ 96 | @Override 97 | public String getTokenString(final Token tok) { 98 | return tok.getType().getName(); 99 | } 100 | 101 | @Override 102 | public SortedMap tokenListWithPos(final File file) 103 | throws IOException { 104 | return fullTokenListWithPos(FileUtils.readFileToString(file) 105 | .toCharArray()); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /src/main/java/codemining/langs/codeutils/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * A package containing code utilitis for all programming languages. 3 | * 4 | * @author Miltos Allamanis 5 | * 6 | */ 7 | package codemining.langs.codeutils; -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/ClassHierarchy.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | import static com.google.common.base.Preconditions.checkArgument; 4 | 5 | import java.io.Serializable; 6 | import java.util.Collection; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | import com.google.common.base.Optional; 11 | import com.google.common.collect.ImmutableList; 12 | import com.google.common.collect.Maps; 13 | import com.google.common.collect.Sets; 14 | 15 | /** 16 | * A class hierarchy contains all the implemented types of a single type. 17 | * 18 | * The type hierarchy makes an effort to be as compact as possible, removing 19 | * redundant is-a relationships. 20 | * 21 | * Not thread safe. 22 | * 23 | */ 24 | public class ClassHierarchy implements Serializable { 25 | 26 | public static final class Type implements Serializable { 27 | 28 | public final String fullQualifiedName; 29 | 30 | private final Set implementingTypes = Sets.newIdentityHashSet(); 31 | 32 | /** 33 | * A closure of the implementing types. 34 | */ 35 | private final Set implementingTypesClosure = Sets 36 | .newIdentityHashSet(); 37 | 38 | private final Set childTypes = Sets.newIdentityHashSet(); 39 | /** 40 | * A closure of the child Types 41 | */ 42 | private final Set childTypesClosure = Sets.newIdentityHashSet(); 43 | 44 | private static final long serialVersionUID = -4245298170285828934L; 45 | 46 | public Type(final String fqName) { 47 | fullQualifiedName = fqName; 48 | } 49 | 50 | /** 51 | * Add a childType for the given type, only if it does not belong to its 52 | * transitive closure. 53 | * 54 | * @param childType 55 | */ 56 | private final void addChildType(final Type childType) { 57 | checkArgument(!implementingTypesClosure.contains(childType)); 58 | if (childTypesClosure.contains(childType)) { 59 | return; 60 | } 61 | 62 | // If the given type is already a child of a parent type, 63 | // we need to remove it from its implementing type. 64 | implementingTypesClosure.forEach(t -> t.childTypes 65 | .remove(childType)); 66 | 67 | childTypes.add(childType); 68 | 69 | // Update parents closures 70 | implementingTypesClosure.forEach(t -> t.childTypesClosure 71 | .add(childType)); 72 | } 73 | 74 | /** 75 | * Add an implementing type of this type only if it does not belong to 76 | * its transitive closure. 77 | * 78 | * @param implementingType 79 | */ 80 | private final void addImplementingType(final Type implementingType) { 81 | checkArgument(!childTypesClosure.contains(implementingType)); 82 | if (implementingTypesClosure.contains(implementingType)) { 83 | return; 84 | } 85 | 86 | // If the type is already an implementing type of a child, 87 | // we need to remove it from its child types 88 | childTypesClosure.forEach(t -> t.implementingTypes 89 | .remove(implementingType)); 90 | 91 | implementingTypes.add(implementingType); 92 | 93 | // Update children closures 94 | childTypesClosure.forEach(t -> t.implementingTypesClosure 95 | .add(implementingType)); 96 | } 97 | 98 | public Collection getImplementingTypesClosure() { 99 | return new ImmutableList.Builder() 100 | .addAll(implementingTypesClosure).addAll(implementingTypes) 101 | .build(); 102 | } 103 | 104 | @Override 105 | public String toString() { 106 | final StringBuilder sb = new StringBuilder(); 107 | sb.append(fullQualifiedName); 108 | if (!implementingTypes.isEmpty()) { 109 | sb.append("[ implements "); 110 | implementingTypes.forEach(t -> sb.append(t.fullQualifiedName 111 | + " ")); 112 | } 113 | if (!childTypes.isEmpty()) { 114 | sb.append(" isimplementedby "); 115 | childTypes.forEach(t -> sb.append(t.fullQualifiedName + " ")); 116 | } 117 | sb.append("]"); 118 | return sb.toString(); 119 | } 120 | 121 | } 122 | 123 | private static final long serialVersionUID = 8866244164953568827L; 124 | 125 | private final Map nameToType = Maps.newTreeMap(); 126 | 127 | /** 128 | * Add a type relationship. 129 | * 130 | * @param type 131 | * @param parentTypeFqn 132 | */ 133 | public void addParentToType(final String type, final String parentTypeFqn) { 134 | final Type childType = getTypeOrNew(type); 135 | final Type parentType = getTypeOrNew(parentTypeFqn); 136 | childType.addImplementingType(parentType); 137 | parentType.addChildType(childType); 138 | } 139 | 140 | public Optional getTypeForName(final String fqName) { 141 | if (nameToType.containsKey(fqName)) { 142 | return Optional.of(nameToType.get(fqName)); 143 | } 144 | return Optional.absent(); 145 | } 146 | 147 | /** 148 | * Get a type that already exists or a create a new type. 149 | * 150 | * @param fqName 151 | */ 152 | private Type getTypeOrNew(final String fqName) { 153 | final Type type; 154 | if (nameToType.containsKey(fqName)) { 155 | type = nameToType.get(fqName); 156 | } else { 157 | type = new Type(fqName); 158 | nameToType.put(fqName, type); 159 | } 160 | return type; 161 | } 162 | 163 | @Override 164 | public String toString() { 165 | final StringBuilder sb = new StringBuilder(); 166 | for (final Type type : nameToType.values()) { 167 | sb.append(type.toString()); 168 | sb.append(System.lineSeparator()); 169 | } 170 | return sb.toString(); 171 | } 172 | 173 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/CodePrinter.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | import java.awt.Color; 4 | import java.io.File; 5 | import java.io.IOException; 6 | import java.util.List; 7 | import java.util.Map.Entry; 8 | import java.util.SortedMap; 9 | 10 | import org.apache.commons.io.FileUtils; 11 | import org.apache.commons.lang.StringEscapeUtils; 12 | 13 | import codemining.languagetools.ITokenizer.FullToken; 14 | import codemining.util.SettingsLoader; 15 | 16 | /** 17 | * Output Java code to HTML with optional coloring. Not thread-safe. 18 | * 19 | * @author Miltos Allamanis 20 | * 21 | */ 22 | public class CodePrinter { 23 | 24 | /** 25 | * The tokenizer used to tokenize code. 26 | */ 27 | final ITokenizer tokenizer; 28 | 29 | /** 30 | * The background Color of the output HTML document. 31 | */ 32 | final Color documentBackgroundColor; 33 | 34 | int lineNumber = 1; 35 | 36 | private final boolean ignoreTokBG = SettingsLoader.getBooleanSetting( 37 | "ignoreTokenBackground", true); 38 | 39 | public static final String CSS_STYLE = ""; 44 | 45 | public CodePrinter(final ITokenizer tokenizer, 46 | final Color documentBackgroundColor) { 47 | this.tokenizer = tokenizer; 48 | this.documentBackgroundColor = documentBackgroundColor; 49 | } 50 | 51 | private void addSlack(final String substring, final StringBuffer buf) { 52 | for (final char c : StringEscapeUtils.escapeHtml(substring) 53 | .toCharArray()) { 54 | if (c == '\n') { 55 | appendLineDiv(buf, true); 56 | } else { 57 | buf.append(c); 58 | } 59 | } 60 | 61 | } 62 | 63 | private void appendLineDiv(final StringBuffer buf, 64 | final boolean closePrevious) { 65 | if (closePrevious) { 66 | buf.append("
\n"); 67 | } 68 | buf.append("
"); 69 | lineNumber++; 70 | } 71 | 72 | /** 73 | * Return a StringBuffer with colored tokens as specified from the 74 | * coloredTokens. There should be one-to-one correspondence with the actual 75 | * tokens. 76 | */ 77 | public StringBuffer getHTMLwithColors( 78 | final List coloredTokens, final File codeFile) 79 | throws IOException, InstantiationException, IllegalAccessException { 80 | final String code = FileUtils.readFileToString(codeFile); 81 | lineNumber = 1; 82 | 83 | final StringBuffer buf = new StringBuffer(); 84 | 85 | final SortedMap toks = tokenizer 86 | .fullTokenListWithPos(code.toCharArray()); 87 | 88 | int i = 0; 89 | int prevPos = 0; 90 | buf.append("\n\n\n"); 91 | buf.append(CSS_STYLE); 92 | buf.append("\n"); 96 | appendLineDiv(buf, false); 97 | for (final Entry entry : toks.entrySet()) { 98 | if (i == 0 || entry.getKey() == Integer.MAX_VALUE) { 99 | i++; 100 | continue; 101 | } 102 | addSlack(code.substring(prevPos, entry.getKey()), buf); 103 | final ColoredToken tok = coloredTokens.get(i); 104 | 105 | buf.append("" 111 | + StringEscapeUtils.escapeHtml(entry.getValue().token) 112 | + ""); 113 | i++; 114 | prevPos = entry.getKey() + entry.getValue().token.length(); 115 | } 116 | buf.append("
"); 117 | return buf; 118 | 119 | } 120 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/ColoredToken.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | import java.awt.Color; 4 | 5 | /** 6 | * Struct class representing a colored token. 7 | * 8 | */ 9 | public final class ColoredToken { 10 | public Color fontColor; 11 | public final Color bgColor; 12 | public final String token; 13 | public String extraStyle; 14 | 15 | /** 16 | * Construct with default bgColor white. 17 | * 18 | * @param token 19 | * @param fontColor 20 | */ 21 | public ColoredToken(final String token, final Color fontColor) { 22 | this.token = token; 23 | this.fontColor = fontColor; 24 | bgColor = Color.WHITE; 25 | extraStyle = ""; 26 | } 27 | 28 | public ColoredToken(final String token, final Color fontColor, 29 | final Color bgColor, final String extraStyle) { 30 | this.token = token; 31 | this.fontColor = fontColor; 32 | this.bgColor = bgColor; 33 | this.extraStyle = extraStyle; 34 | } 35 | 36 | public void setColor(final Color fontColor) { 37 | this.fontColor = fontColor; 38 | } 39 | 40 | public void setStyle(final String extraStyle) { 41 | this.extraStyle = extraStyle; 42 | } 43 | 44 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/FormattingTokenizer.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.Collection; 6 | import java.util.List; 7 | import java.util.Map.Entry; 8 | import java.util.SortedMap; 9 | 10 | import org.apache.commons.io.FileUtils; 11 | import org.apache.commons.io.filefilter.AbstractFileFilter; 12 | 13 | import com.google.common.collect.Lists; 14 | import com.google.common.collect.Maps; 15 | 16 | /** 17 | * Adds a NO_SPACE between tokens that contain, no space. 18 | * 19 | */ 20 | public class FormattingTokenizer implements IFormattingTokenizer { 21 | 22 | public static final String WS_NO_SPACE = "WS_NO_SPACE"; 23 | private static final long serialVersionUID = -1736507313790110846L; 24 | final ITokenizer baseTokenizer; 25 | 26 | public FormattingTokenizer(final ITokenizer baseTokenizer) { 27 | this.baseTokenizer = baseTokenizer; 28 | } 29 | 30 | @Override 31 | public SortedMap fullTokenListWithPos(final char[] code) { 32 | throw new IllegalArgumentException("Cannot be implemented"); 33 | } 34 | 35 | public ITokenizer getBaseTokenizer() { 36 | return baseTokenizer; 37 | } 38 | 39 | @Override 40 | public AbstractFileFilter getFileFilter() { 41 | return baseTokenizer.getFileFilter(); 42 | } 43 | 44 | @Override 45 | public String getIdentifierType() { 46 | return baseTokenizer.getIdentifierType(); 47 | } 48 | 49 | @Override 50 | public Collection getKeywordTypes() { 51 | return baseTokenizer.getKeywordTypes(); 52 | } 53 | 54 | @Override 55 | public Collection getLiteralTypes() { 56 | return baseTokenizer.getLiteralTypes(); 57 | } 58 | 59 | @Override 60 | public FullToken getTokenFromString(final String token) { 61 | return baseTokenizer.getTokenFromString(token); 62 | } 63 | 64 | @Override 65 | public List getTokenListFromCode(final char[] code) { 66 | final List list = Lists.newArrayList(); 67 | final List original = baseTokenizer 68 | .getTokenListFromCode(code); 69 | for (int i = 0; i < original.size() - 1; i++) { 70 | final FullToken currentToken = original.get(i); 71 | list.add(currentToken); 72 | final FullToken nextToken = original.get(i + 1); 73 | if (!currentToken.token.startsWith("WS_") 74 | && !nextToken.token.startsWith("WS_")) { 75 | list.add(new FullToken(WS_NO_SPACE, "")); 76 | } 77 | } 78 | list.add(original.get(original.size() - 1)); 79 | return list; 80 | } 81 | 82 | @Override 83 | public List getTokenListFromCode(final File codeFile) 84 | throws IOException { 85 | return getTokenListFromCode(FileUtils.readFileToString(codeFile) 86 | .toCharArray()); 87 | } 88 | 89 | @Override 90 | public List tokenListFromCode(final char[] code) { 91 | // TODO Duplicate 92 | final List list = Lists.newArrayList(); 93 | final List original = baseTokenizer.tokenListFromCode(code); 94 | for (int i = 0; i < original.size() - 1; i++) { 95 | final String currentToken = original.get(i); 96 | list.add(currentToken); 97 | final String nextToken = original.get(i + 1); 98 | if (!currentToken.startsWith("WS_") && !nextToken.startsWith("WS_")) { 99 | list.add(WS_NO_SPACE); 100 | } 101 | } 102 | list.add(original.get(original.size() - 1)); 103 | return list; 104 | } 105 | 106 | @Override 107 | public List tokenListFromCode(final File codeFile) 108 | throws IOException { 109 | return tokenListFromCode(FileUtils.readFileToString(codeFile) 110 | .toCharArray()); 111 | } 112 | 113 | @Override 114 | public SortedMap tokenListWithPos(final char[] code) { 115 | throw new IllegalArgumentException("Cannot be implemented"); 116 | } 117 | 118 | @Override 119 | public SortedMap tokenListWithPos(final File file) 120 | throws IOException { 121 | return fullTokenListWithPos(FileUtils.readFileToString(file) 122 | .toCharArray()); 123 | } 124 | 125 | /** 126 | * Return the position of just the whitespaces in the code. 127 | * 128 | * @param code 129 | * @return 130 | */ 131 | public SortedMap whitespaceTokenPositions(final char[] code) { 132 | final SortedMap wsPositions = Maps.newTreeMap(); 133 | final SortedMap originalPositions = baseTokenizer 134 | .tokenListWithPos(code); 135 | 136 | boolean previousWasWhitespace = true; 137 | for (final Entry tokenEntry : originalPositions 138 | .entrySet()) { 139 | if (tokenEntry.getValue().startsWith(ITokenizer.SENTENCE_START) 140 | || tokenEntry.getValue() 141 | .startsWith(ITokenizer.SENTENCE_END)) { 142 | continue; 143 | } 144 | if (tokenEntry.getValue().startsWith("WS_")) { 145 | wsPositions.put(tokenEntry.getKey(), tokenEntry.getValue()); 146 | previousWasWhitespace = true; 147 | } else if (!previousWasWhitespace) { 148 | wsPositions.put(tokenEntry.getKey(), WS_NO_SPACE); 149 | previousWasWhitespace = false; 150 | } else { 151 | previousWasWhitespace = false; 152 | } 153 | } 154 | 155 | return wsPositions; 156 | } 157 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/IAstAnnotatedTokenizer.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.Serializable; 6 | import java.util.List; 7 | import java.util.SortedMap; 8 | 9 | import com.google.common.base.Function; 10 | import com.google.common.base.Objects; 11 | 12 | /** 13 | * A tokenizer interface that returns AST annotated tokens. 14 | * 15 | * @author Miltos Allamanis 16 | * 17 | */ 18 | public interface IAstAnnotatedTokenizer extends ITokenizer { 19 | 20 | /** 21 | * A struct class for representing AST annotated tokens. 22 | */ 23 | public static class AstAnnotatedToken implements Serializable { 24 | 25 | private static final long serialVersionUID = -8505721476537620929L; 26 | 27 | public static final Function TOKEN_FLATTEN_FUNCTION = new Function() { 28 | @Override 29 | public FullToken apply(final AstAnnotatedToken input) { 30 | if (input.tokenAstNode != null 31 | && input.parentTokenAstNode != null) { 32 | return new FullToken(input.token.token + "->in{" 33 | + input.tokenAstNode + "->" 34 | + input.parentTokenAstNode + "}", 35 | input.token.tokenType); 36 | } else { 37 | return new FullToken(input.token); 38 | } 39 | } 40 | }; 41 | 42 | public final FullToken token; 43 | public final String tokenAstNode; 44 | public final String parentTokenAstNode; 45 | 46 | public AstAnnotatedToken(final FullToken token, 47 | final String tokenAstNode, final String parentTokenAstNode) { 48 | this.token = token; 49 | this.tokenAstNode = tokenAstNode; 50 | this.parentTokenAstNode = parentTokenAstNode; 51 | } 52 | 53 | @Override 54 | public boolean equals(final Object obj) { 55 | if (this == obj) { 56 | return true; 57 | } 58 | if (obj == null) { 59 | return false; 60 | } 61 | if (getClass() != obj.getClass()) { 62 | return false; 63 | } 64 | final AstAnnotatedToken other = (AstAnnotatedToken) obj; 65 | return Objects.equal(other.token, token) 66 | && Objects.equal(other.tokenAstNode, tokenAstNode) 67 | && Objects.equal(other.parentTokenAstNode, 68 | parentTokenAstNode); 69 | } 70 | 71 | @Override 72 | public int hashCode() { 73 | return Objects.hashCode(token, tokenAstNode, parentTokenAstNode); 74 | } 75 | 76 | @Override 77 | public String toString() { 78 | return TOKEN_FLATTEN_FUNCTION.apply(this).toString(); 79 | } 80 | 81 | } 82 | 83 | public abstract List getAnnotatedTokenListFromCode( 84 | char[] code); 85 | 86 | public abstract List getAnnotatedTokenListFromCode( 87 | File codeFile) throws IOException; 88 | 89 | /** 90 | * @param code 91 | * @return 92 | */ 93 | public abstract SortedMap getAnnotatedTokens( 94 | char[] code); 95 | 96 | /** 97 | * Return the base tokenizer whose tokens are annotated. 98 | * 99 | * @return 100 | */ 101 | public ITokenizer getBaseTokenizer(); 102 | 103 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/IFormattingTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools; 5 | 6 | /** 7 | * A dummy tokenizer interface, suggesting that all implementors produce 8 | * whitespace tokens. 9 | * 10 | * @author Miltos Allamanis 11 | * 12 | */ 13 | public interface IFormattingTokenizer extends ITokenizer { 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/IScopeExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | 9 | import org.eclipse.jdt.core.dom.ASTNode; 10 | 11 | import com.google.common.collect.Multimap; 12 | 13 | /** 14 | * A interface for extracting scoped related information. Scope extractors 15 | * return a multimap that for each scope contains all the identifiers that are 16 | * declared there. 17 | * 18 | * @author Miltos Allamanis 19 | * 20 | */ 21 | public interface IScopeExtractor { 22 | Multimap getFromFile(final File file) throws IOException; 23 | 24 | Multimap getFromNode(final ASTNode node); 25 | 26 | Multimap getFromString(final String code, 27 | final ParseType parseType); 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/ITokenizer.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.Serializable; 6 | import java.util.Collection; 7 | import java.util.List; 8 | import java.util.SortedMap; 9 | 10 | import org.apache.commons.io.filefilter.AbstractFileFilter; 11 | 12 | import com.google.common.base.Function; 13 | import com.google.common.base.Objects; 14 | 15 | /** 16 | * Interface of a code tokenizer. 17 | * 18 | * @author Miltos Allamanis 19 | * 20 | */ 21 | public interface ITokenizer extends Serializable { 22 | 23 | public static class FullToken implements Serializable { 24 | 25 | private static final long serialVersionUID = -49456240173307314L; 26 | 27 | public static final Function TOKEN_NAME_CONVERTER = new Function() { 28 | @Override 29 | public String apply(final FullToken input) { 30 | return input.token; 31 | } 32 | }; 33 | 34 | public final String token; 35 | 36 | public final String tokenType; 37 | 38 | public FullToken(final FullToken other) { 39 | token = other.token; 40 | tokenType = other.tokenType; 41 | } 42 | 43 | public FullToken(final String tokName, final String tokType) { 44 | token = tokName; 45 | tokenType = tokType; 46 | } 47 | 48 | @Override 49 | public boolean equals(final Object obj) { 50 | if (!(obj instanceof FullToken)) { 51 | return false; 52 | } 53 | final FullToken other = (FullToken) obj; 54 | return other.token.equals(token) 55 | && other.tokenType.equals(tokenType); 56 | } 57 | 58 | @Override 59 | public int hashCode() { 60 | return Objects.hashCode(token, tokenType); 61 | } 62 | 63 | @Override 64 | public String toString() { 65 | return token + " (" + tokenType + ")"; 66 | } 67 | 68 | } 69 | 70 | /** 71 | * A sentence end (constant) token 72 | */ 73 | static final String SENTENCE_END = ""; 74 | 75 | /** 76 | * A sentence start (constant) token 77 | */ 78 | static final String SENTENCE_START = ""; 79 | 80 | /** 81 | * Return a list with the full tokens. 82 | * 83 | * @param code 84 | * @return 85 | */ 86 | SortedMap fullTokenListWithPos(final char[] code); 87 | 88 | /** 89 | * Return a file filter, filtering the files that can be tokenized. 90 | * 91 | * @return 92 | * 93 | */ 94 | AbstractFileFilter getFileFilter(); 95 | 96 | /** 97 | * Return the token type that signifies that a token is an identifier. 98 | * 99 | * @return 100 | */ 101 | String getIdentifierType(); 102 | 103 | /** 104 | * Return the token types that are keywords. 105 | * 106 | * @return 107 | */ 108 | Collection getKeywordTypes(); 109 | 110 | /** 111 | * Return the types the represent literals. 112 | * 113 | * @return 114 | */ 115 | Collection getLiteralTypes(); 116 | 117 | /** 118 | * Return a full token given a string token. 119 | * 120 | * @param token 121 | * @return 122 | */ 123 | FullToken getTokenFromString(final String token); 124 | 125 | /** 126 | * Get the list of tokens from the code. 127 | * 128 | * @param code 129 | * @return 130 | */ 131 | List getTokenListFromCode(final char[] code); 132 | 133 | /** 134 | * Get the list of tokens from the code. 135 | * 136 | * @param code 137 | * @return 138 | */ 139 | List getTokenListFromCode(final File codeFile) 140 | throws IOException; 141 | 142 | /** 143 | * Tokenize some code. 144 | * 145 | * @param code 146 | * the code 147 | * @return a list of tokens 148 | */ 149 | List tokenListFromCode(final char[] code); 150 | 151 | /** 152 | * Tokenize code given a file. 153 | * 154 | * @param codeFile 155 | * @return 156 | */ 157 | List tokenListFromCode(final File codeFile) throws IOException; 158 | 159 | /** 160 | * Return a list of tokens along with their positions. 161 | * 162 | * @param code 163 | * @return 164 | */ 165 | SortedMap tokenListWithPos(final char[] code); 166 | 167 | /** 168 | * Return a list of tokens along with their positions. 169 | * 170 | * @param file 171 | * @return 172 | * @throws IOException 173 | */ 174 | SortedMap tokenListWithPos(File file) 175 | throws IOException; 176 | 177 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/ParseType.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | public enum ParseType { 4 | COMPILATION_UNIT, CLASS_BODY, METHOD, STATEMENTS, EXPRESSION 5 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/Scope.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools; 2 | 3 | import com.google.common.base.Objects; 4 | import com.google.common.collect.ComparisonChain; 5 | 6 | /** 7 | * A utility class to represent scopes. 8 | * 9 | */ 10 | public class Scope implements Comparable { 11 | 12 | public enum ScopeType { 13 | SCOPE_CLASS, SCOPE_LOCAL, SCOPE_METHOD 14 | } 15 | 16 | public final String code; 17 | 18 | public final ScopeType scopeType; 19 | 20 | public final String type; 21 | 22 | public final int astNodeType; 23 | public final int astParentNodeType; 24 | 25 | public Scope(final String code, final ScopeType scopeType, 26 | final String type, final int astNodeType, 27 | final int astParentNodeType) { 28 | this.code = code; 29 | this.scopeType = scopeType; 30 | this.type = type; 31 | this.astNodeType = astNodeType; 32 | this.astParentNodeType = astParentNodeType; 33 | } 34 | 35 | @Override 36 | public int compareTo(final Scope other) { 37 | return ComparisonChain.start().compare(code, other.code) 38 | .compare(scopeType, other.scopeType).compare(type, other.type) 39 | .compare(astNodeType, other.astNodeType) 40 | .compare(astParentNodeType, other.astParentNodeType).result(); 41 | } 42 | 43 | @Override 44 | public boolean equals(final Object obj) { 45 | if (!(obj instanceof Scope)) { 46 | return false; 47 | } 48 | final Scope other = (Scope) obj; 49 | return other.code.equals(code) && other.scopeType == scopeType 50 | && other.astNodeType == astNodeType 51 | && other.astParentNodeType == astParentNodeType 52 | && other.type.equals(type); 53 | } 54 | 55 | @Override 56 | public int hashCode() { 57 | return Objects.hashCode(code, scopeType, type, astNodeType, 58 | astParentNodeType); 59 | } 60 | 61 | @Override 62 | public String toString() { 63 | return scopeType + " " + code; 64 | } 65 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/TokenizerUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools; 5 | 6 | import static com.google.common.base.Preconditions.checkArgument; 7 | import static com.google.common.base.Preconditions.checkPositionIndex; 8 | 9 | import java.lang.reflect.InvocationTargetException; 10 | import java.util.List; 11 | 12 | import org.apache.commons.lang.StringUtils; 13 | 14 | import codemining.languagetools.ITokenizer.FullToken; 15 | import codemining.util.SettingsLoader; 16 | 17 | /** 18 | * Utility function relevant to tokenization. 19 | * 20 | * @author Miltos Allamanis 21 | * 22 | */ 23 | public class TokenizerUtils { 24 | 25 | public static final int TAB_INDENT_SIZE = (int) SettingsLoader 26 | .getNumericSetting("tabSize", 4); 27 | 28 | /** 29 | * Return the column of the given position. 30 | * 31 | * @param code 32 | * @param position 33 | * @return 34 | */ 35 | public static int getColumnOfPosition(final String code, final int position) { 36 | checkPositionIndex(position, code.length()); 37 | int newLinePosition = code.substring(0, position).lastIndexOf("\n"); 38 | if (newLinePosition == -1) { 39 | newLinePosition = 0; // Start of file. 40 | } 41 | final int tabCount = StringUtils.countMatches( 42 | code.substring(newLinePosition, position), "\t"); 43 | return position - newLinePosition + (TAB_INDENT_SIZE - 1) * tabCount; 44 | } 45 | 46 | /** 47 | * Crudely join tokens together. 48 | * 49 | * @param tokens 50 | * @param sb 51 | * @return 52 | */ 53 | public final static StringBuffer joinFullTokens( 54 | final List tokens, final StringBuffer sb) { 55 | for (final FullToken token : tokens) { 56 | sb.append(token.token); 57 | sb.append(" "); 58 | } 59 | 60 | return sb; 61 | } 62 | 63 | /** 64 | * Crudely join tokens together. 65 | * 66 | * @param tokens 67 | * @param sb 68 | * @return 69 | */ 70 | public final static StringBuffer joinTokens(final List tokens) { 71 | final StringBuffer sb = new StringBuffer(); 72 | for (final String token : tokens) { 73 | sb.append(token); 74 | sb.append(" "); 75 | } 76 | 77 | return sb; 78 | } 79 | 80 | /** 81 | * Crudely join tokens together. 82 | * 83 | * @param tokens 84 | * @param sb 85 | * @return 86 | */ 87 | public final static StringBuffer joinTokens(final List tokens, 88 | final StringBuffer sb) { 89 | for (final String token : tokens) { 90 | sb.append(token); 91 | sb.append(" "); 92 | } 93 | 94 | return sb; 95 | } 96 | 97 | /** 98 | * Remove the sentence start/end FullTokens. 99 | * 100 | * @param tokenSequence 101 | */ 102 | public static final void removeSentenceStartEndFullTokens( 103 | final List tokenSequence) { 104 | checkArgument(tokenSequence.get(0).token 105 | .equals(ITokenizer.SENTENCE_START)); 106 | tokenSequence.remove(0); 107 | checkArgument(tokenSequence.get(tokenSequence.size() - 1).token 108 | .equals(ITokenizer.SENTENCE_END)); 109 | tokenSequence.remove(tokenSequence.size() - 1); 110 | } 111 | 112 | /** 113 | * Remove the sentence start/end tokens. 114 | * 115 | * @param tokenSequence 116 | */ 117 | public static final void removeSentenceStartEndTokens( 118 | final List tokenSequence) { 119 | checkArgument(tokenSequence.get(0).equals(ITokenizer.SENTENCE_START)); 120 | tokenSequence.remove(0); 121 | checkArgument(tokenSequence.get(tokenSequence.size() - 1).equals( 122 | ITokenizer.SENTENCE_END)); 123 | tokenSequence.remove(tokenSequence.size() - 1); 124 | } 125 | 126 | private TokenizerUtils() { 127 | // Utilty class 128 | } 129 | 130 | /** 131 | * @param tokenizerClass 132 | * @param tokenizerArguments 133 | * @return 134 | * @throws InstantiationException 135 | * @throws IllegalAccessException 136 | * @throws IllegalArgumentException 137 | * @throws InvocationTargetException 138 | * @throws NoSuchMethodException 139 | * @throws SecurityException 140 | * @throws ClassNotFoundException 141 | */ 142 | public static ITokenizer tokenizerForClass(final String tokenizerClass, 143 | final String tokenizerArguments) throws InstantiationException, 144 | IllegalAccessException, IllegalArgumentException, 145 | InvocationTargetException, NoSuchMethodException, 146 | SecurityException, ClassNotFoundException { 147 | return (ITokenizer) Class.forName(tokenizerClass) 148 | .getDeclaredConstructor(String.class) 149 | .newInstance(tokenizerArguments); 150 | } 151 | 152 | public static ITokenizer tokenizerForClass(final String tokenizerClass, 153 | final Boolean tokenizerArguments) throws InstantiationException, 154 | IllegalAccessException, IllegalArgumentException, 155 | InvocationTargetException, NoSuchMethodException, 156 | SecurityException, ClassNotFoundException { 157 | return (ITokenizer) Class.forName(tokenizerClass) 158 | .getDeclaredConstructor(Boolean.TYPE) 159 | .newInstance(tokenizerArguments); 160 | } 161 | 162 | /** 163 | * @param tokenizerClass 164 | * @return 165 | * @throws InstantiationException 166 | * @throws IllegalAccessException 167 | * @throws ClassNotFoundException 168 | */ 169 | public static ITokenizer tokenizerForClass(final String tokenizerClass) 170 | throws InstantiationException, IllegalAccessException, 171 | ClassNotFoundException { 172 | return (ITokenizer) Class.forName(tokenizerClass).newInstance(); 173 | } 174 | 175 | } 176 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/bindings/AbstractNameBindingsExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools.bindings; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.List; 9 | import java.util.Set; 10 | 11 | import com.google.common.collect.HashMultimap; 12 | import com.google.common.collect.Multimap; 13 | 14 | /** 15 | * A NameBindings extractor from arbitrary code. 16 | * 17 | * @author Miltos Allamanis 18 | * 19 | */ 20 | public abstract class AbstractNameBindingsExtractor { 21 | 22 | public abstract Set getAvailableFeatures(); 23 | 24 | /** 25 | * Return all the name bindings for file f 26 | * 27 | * @param f 28 | * @return a multimap containing for each name all the relavant name 29 | * bindings in the file. 30 | * @throws IOException 31 | */ 32 | public Multimap getBindingsForName(final File f) 33 | throws IOException { 34 | return getBindingsForName(getNameBindings(f)); 35 | } 36 | 37 | protected Multimap getBindingsForName( 38 | final List bindings) { 39 | final Multimap toks = HashMultimap.create(); 40 | for (final TokenNameBinding binding : bindings) { 41 | toks.put(binding.getName(), binding); 42 | } 43 | return toks; 44 | } 45 | 46 | /** 47 | * Return the name bindings given the code. 48 | * 49 | * @param code 50 | * @return a multimap containing for each name all the relavant name 51 | * bindings in the code snippet. 52 | */ 53 | public Multimap getBindingsForName( 54 | final String code) { 55 | return getBindingsForName(getNameBindings(code)); 56 | } 57 | 58 | /** 59 | * Get the name bindings for the given file. 60 | * 61 | * @param f 62 | * @return 63 | * @throws IOException 64 | */ 65 | public abstract List getNameBindings(final File f) 66 | throws IOException; 67 | 68 | /** 69 | * Get the name bindings given the code. 70 | * 71 | * @param code 72 | * @return 73 | */ 74 | public abstract List getNameBindings(final String code); 75 | 76 | /** 77 | * Return a ResolvedSourceCode instance for the given code. 78 | * 79 | * @param f 80 | * @return 81 | * @throws IOException 82 | */ 83 | public abstract ResolvedSourceCode getResolvedSourceCode(final File f) 84 | throws IOException; 85 | 86 | /** 87 | * Return a ResolvedSourceCode instance for the given code. 88 | * 89 | * @param code 90 | * @return 91 | */ 92 | public abstract ResolvedSourceCode getResolvedSourceCode(final String code); 93 | 94 | public abstract void setActiveFeatures(Set activeFeatures); 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/bindings/ResolvedSourceCode.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools.bindings; 5 | 6 | import static com.google.common.base.Preconditions.checkArgument; 7 | 8 | import java.util.Collection; 9 | import java.util.List; 10 | 11 | import com.google.common.collect.ArrayListMultimap; 12 | 13 | /** 14 | * A full piece of source code that has the variable bindings resolved. The 15 | * variable bindings are "attached" to the source code, so any changes in the 16 | * token stream, will be reflected to the bindings. 17 | * 18 | * @author Miltos Allamanis 19 | * 20 | */ 21 | public class ResolvedSourceCode { 22 | 23 | public final String name; 24 | 25 | public final List codeTokens; 26 | 27 | private final ArrayListMultimap variableBindings; 28 | 29 | /** 30 | * Assumes that the variable bindings use the same (as in ==) token list. 31 | * 32 | * @param name 33 | * @param codeTokens 34 | * @param variableBindings 35 | */ 36 | public ResolvedSourceCode(final List codeTokens, 37 | final ArrayListMultimap variableBindings) { 38 | this.name = "UnkownSourceCodeName"; 39 | this.codeTokens = codeTokens; 40 | this.variableBindings = variableBindings; 41 | } 42 | 43 | /** 44 | * Assumes that the variable bindings use the same (as in ==) token list. 45 | * 46 | * @param name 47 | * @param codeTokens 48 | * @param variableBindings 49 | */ 50 | public ResolvedSourceCode(final String name, final List codeTokens, 51 | final ArrayListMultimap variableBindings) { 52 | this.name = name; 53 | this.codeTokens = codeTokens; 54 | this.variableBindings = variableBindings; 55 | } 56 | 57 | /** 58 | * Return all the bindings in source code. 59 | * 60 | * @return 61 | */ 62 | public Collection getAllBindings() { 63 | return variableBindings.values(); 64 | } 65 | 66 | /** 67 | * Return the bindings for a single name. 68 | * 69 | * @param name 70 | * @return 71 | */ 72 | public Collection getBindingsForName(final String name) { 73 | return variableBindings.get(name); 74 | } 75 | 76 | /** 77 | * Rename a single bound set of tokens. 78 | * 79 | * @param binding 80 | * @param name 81 | */ 82 | public void renameVariableTo(final TokenNameBinding binding, 83 | final String name) { 84 | checkArgument(variableBindings.values().contains(binding), 85 | "Binding is not pointing to this source code"); 86 | 87 | for (final int position : binding.nameIndexes) { 88 | codeTokens.set(position, name); 89 | } 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/bindings/TokenNameBinding.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools.bindings; 5 | 6 | import static com.google.common.base.Preconditions.checkArgument; 7 | 8 | import java.io.Serializable; 9 | import java.util.Collections; 10 | import java.util.List; 11 | import java.util.Set; 12 | 13 | import com.google.common.base.Objects; 14 | import com.google.common.collect.Lists; 15 | 16 | /** 17 | * A single name binding in source code. A struct-like class. 18 | * 19 | * @author Miltos Allamanis 20 | * 21 | */ 22 | public class TokenNameBinding implements Serializable { 23 | private static final long serialVersionUID = 2020613810485746430L; 24 | 25 | /** 26 | * The tokens of source code. 27 | */ 28 | public final List sourceCodeTokens; 29 | 30 | /** 31 | * The positions in sourceCodeTokens that contain the given name. 32 | */ 33 | public final Set nameIndexes; 34 | 35 | /** 36 | * Features of the binding 37 | */ 38 | public final Set features; 39 | 40 | public TokenNameBinding(final Set nameIndexes, 41 | final List sourceCodeTokens, final Set features) { 42 | checkArgument(nameIndexes.size() > 0); 43 | checkArgument(sourceCodeTokens.size() > 0); 44 | this.nameIndexes = Collections.unmodifiableSet(nameIndexes); 45 | this.sourceCodeTokens = Collections.unmodifiableList(sourceCodeTokens); 46 | this.features = features; 47 | } 48 | 49 | @Override 50 | public boolean equals(final Object obj) { 51 | if (this == obj) { 52 | return true; 53 | } 54 | if (obj == null) { 55 | return false; 56 | } 57 | if (getClass() != obj.getClass()) { 58 | return false; 59 | } 60 | final TokenNameBinding other = (TokenNameBinding) obj; 61 | return Objects.equal(nameIndexes, other.nameIndexes) 62 | && Objects.equal(features, other.features) 63 | && Objects.equal(sourceCodeTokens, other.sourceCodeTokens); 64 | } 65 | 66 | public String getName() { 67 | return sourceCodeTokens.get(nameIndexes.iterator().next()); 68 | } 69 | 70 | @Override 71 | public int hashCode() { 72 | return Objects.hashCode(sourceCodeTokens, nameIndexes, features); 73 | } 74 | 75 | /** 76 | * Rename this name to the given binding. The source code tokens included in 77 | * this struct, now represent the new structure. 78 | * 79 | * @param name 80 | * @return 81 | */ 82 | public TokenNameBinding renameTo(final String name) { 83 | final List renamedCode = Lists.newArrayList(sourceCodeTokens); 84 | for (final int position : nameIndexes) { 85 | renamedCode.set(position, name); 86 | } 87 | return new TokenNameBinding(nameIndexes, renamedCode, features); 88 | } 89 | 90 | @Override 91 | public String toString() { 92 | return getName() + nameIndexes + " " + features; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Interfaces and utilities for language tools. 3 | */ 4 | package codemining.languagetools; 5 | 6 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/tokenizers/whitespace/WhitespaceToTokenConverter.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools.tokenizers.whitespace; 2 | 3 | 4 | /** 5 | * A stateful whitespace to whitespace token converter. 6 | * 7 | * @author Miltos Allamanis 8 | * 9 | */ 10 | public class WhitespaceToTokenConverter { 11 | private int currentIdentationSpaces = 0; 12 | private int currentIdentationTabs = 0; 13 | 14 | /** 15 | * Convert the given symbol to whitespace token. 16 | * 17 | * @param token 18 | * @return 19 | */ 20 | public String toWhiteSpaceSymbol(final String token) { 21 | final String symbol; 22 | int spaces = 0; 23 | int tabs = 0; 24 | int newLines = 0; 25 | for (final char c : token.replace("\r", "").toCharArray()) { 26 | if (c == '\n') { 27 | newLines++; 28 | } else if (c == '\t') { 29 | tabs++; 30 | } else if (c == ' ') { 31 | spaces++; 32 | } 33 | } 34 | 35 | if (newLines == 0) { 36 | symbol = "WS_s" + spaces + "t" + tabs; 37 | } else if (newLines > 0) { 38 | final int spaceDiff = spaces - currentIdentationSpaces; 39 | final int tabDiff = tabs - currentIdentationTabs; 40 | currentIdentationSpaces = spaces; 41 | currentIdentationTabs = tabs; 42 | 43 | if (spaceDiff >= 0 && tabDiff >= 0) { 44 | symbol = "WS_INDENTs" + spaceDiff + "t" + tabDiff + "n" 45 | + newLines; 46 | } else { 47 | symbol = "WS_DEDENTs" + -spaceDiff + "t" + -tabDiff + "n" 48 | + newLines; 49 | } 50 | } else { 51 | throw new IllegalStateException(); 52 | } 53 | return symbol; 54 | } 55 | 56 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/tokenizers/whitespace/WhitespaceTokenConverter.java: -------------------------------------------------------------------------------- 1 | package codemining.languagetools.tokenizers.whitespace; 2 | 3 | import static com.google.common.base.Preconditions.checkArgument; 4 | 5 | import java.util.regex.Matcher; 6 | import java.util.regex.Pattern; 7 | 8 | /** 9 | * A utility stateful class for converting whitespace tokens to whitespace. 10 | * 11 | */ 12 | public final class WhitespaceTokenConverter { 13 | 14 | /** 15 | * A struct class. 16 | * 17 | */ 18 | private static final class Whitespace { 19 | int nTabs; 20 | int nSpace; 21 | int nNewLines; 22 | } 23 | 24 | private int currentSpaceIndentation = 0; 25 | private int currentTabIndentation = 0; 26 | 27 | public static final Pattern INDENT_PATTERN = Pattern 28 | .compile("WS_INDENTs([0-9]+)t([0-9]+)n([0-9]+)"); 29 | 30 | public static final Pattern DEDENT_PATTERN = Pattern 31 | .compile("WS_DEDENTs(-?\\d+)t(-?\\d+)n(\\d+)"); 32 | 33 | public static final Pattern SPACE_PATTERN = Pattern 34 | .compile("WS_s(\\d+)t(\\d+)"); 35 | 36 | /** 37 | * Append whitespace to StringBuffer, given the specifications. 38 | * 39 | * @param nSpace 40 | * @param nTab 41 | * @param startAtNewLine 42 | * @return 43 | */ 44 | public static final void createWhitespace( 45 | final WhitespaceTokenConverter.Whitespace space, final StringBuffer sb) { 46 | for (int i = 0; i < space.nNewLines; i++) { 47 | sb.append(System.getProperty("line.separator")); 48 | } 49 | for (int i = 0; i < space.nSpace; i++) { 50 | sb.append(" "); 51 | } 52 | for (int i = 0; i < space.nTabs; i++) { 53 | sb.append("\t"); 54 | } 55 | } 56 | 57 | /** 58 | * Whitespace token converter. 59 | * 60 | * @param wsToken 61 | * @param buffer 62 | */ 63 | public void appendWS(final String wsToken, final StringBuffer buffer) { 64 | checkArgument(wsToken.startsWith("WS_")); 65 | final WhitespaceTokenConverter.Whitespace space; 66 | if (wsToken.startsWith("WS_INDENT")) { 67 | space = convert(wsToken, INDENT_PATTERN); 68 | currentSpaceIndentation += space.nSpace; 69 | currentTabIndentation += space.nTabs; 70 | space.nSpace = currentSpaceIndentation; 71 | space.nTabs = currentTabIndentation; 72 | 73 | } else if (wsToken.startsWith("WS_DEDENT")) { 74 | space = convert(wsToken, DEDENT_PATTERN); 75 | currentSpaceIndentation -= space.nSpace; 76 | if (currentSpaceIndentation < 0) { 77 | currentSpaceIndentation = 0; 78 | } 79 | currentTabIndentation -= space.nTabs; 80 | if (currentTabIndentation < 0) { 81 | currentTabIndentation = 0; 82 | } 83 | space.nSpace = currentSpaceIndentation; 84 | space.nTabs = currentTabIndentation; 85 | } else { 86 | space = convert(wsToken, SPACE_PATTERN); 87 | } 88 | createWhitespace(space, buffer); 89 | } 90 | 91 | private WhitespaceTokenConverter.Whitespace convert(final String wsToken, 92 | final Pattern patternToMatch) { 93 | final WhitespaceTokenConverter.Whitespace space = new Whitespace(); 94 | final Matcher m = patternToMatch.matcher(wsToken); 95 | checkArgument(m.matches(), "Pattern " + patternToMatch.toString() 96 | + " does not match " + wsToken); 97 | space.nSpace = Integer.parseInt(m.group(1)); 98 | space.nTabs = Integer.parseInt(m.group(2)); 99 | if (m.groupCount() == 3) { 100 | space.nNewLines = Integer.parseInt(m.group(3)); 101 | } 102 | return space; 103 | } 104 | } -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/tui/DistinctTokenCount.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools.tui; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.List; 9 | import java.util.logging.Logger; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.apache.commons.io.filefilter.DirectoryFileFilter; 13 | import org.apache.commons.io.filefilter.RegexFileFilter; 14 | import org.apache.commons.lang.exception.ExceptionUtils; 15 | 16 | import codemining.languagetools.ITokenizer; 17 | import codemining.languagetools.TokenizerUtils; 18 | 19 | import com.google.common.collect.Multiset.Entry; 20 | import com.google.common.collect.TreeMultiset; 21 | 22 | /** 23 | * Print to stdout the total count of all unique tokens in the text. 24 | * 25 | * Used to answer the question: Do we have a zipf-ian distribution of tokens in 26 | * Java Code? 27 | * 28 | * @author Miltos Allamanis 29 | * 30 | */ 31 | public class DistinctTokenCount { 32 | 33 | private static final Logger LOGGER = Logger 34 | .getLogger(DistinctTokenCount.class.getName()); 35 | 36 | /** 37 | * @param args 38 | * @throws ClassNotFoundException 39 | * @throws IllegalAccessException 40 | * @throws InstantiationException 41 | */ 42 | public static void main(final String[] args) throws InstantiationException, 43 | IllegalAccessException, ClassNotFoundException { 44 | 45 | if (args.length != 2) { 46 | System.err.println("Usage: "); 47 | return; 48 | } 49 | 50 | final DistinctTokenCount tokCount = new DistinctTokenCount(args[1]); 51 | for (final File fi : FileUtils.listFiles(new File(args[0]), 52 | new RegexFileFilter(".*\\.java$"), 53 | DirectoryFileFilter.DIRECTORY)) { 54 | try { 55 | tokCount.addTokens(fi); 56 | } catch (final IOException e) { 57 | LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); 58 | } 59 | } 60 | 61 | tokCount.printCounts(); 62 | } 63 | 64 | private final TreeMultiset allTokens = TreeMultiset.create(); 65 | 66 | private final ITokenizer tokenizer; 67 | 68 | public DistinctTokenCount(final String tokenizerClass) 69 | throws InstantiationException, IllegalAccessException, 70 | ClassNotFoundException { 71 | tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass); 72 | } 73 | 74 | public void addTokens(final File file) throws IOException { 75 | LOGGER.finer("Reading file " + file.getAbsolutePath()); 76 | final char[] code = FileUtils.readFileToString(file).toCharArray(); 77 | final List tokens = tokenizer.tokenListFromCode(code); 78 | allTokens.addAll(tokens); 79 | 80 | } 81 | 82 | /** 83 | * Prints the counts. 84 | */ 85 | public void printCounts() { 86 | for (final Entry token : allTokens.entrySet()) { 87 | System.out.println(token.getCount()); 88 | } 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/tui/TokenCounter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools.tui; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.logging.Logger; 9 | 10 | import org.apache.commons.io.FileUtils; 11 | import org.apache.commons.io.filefilter.DirectoryFileFilter; 12 | import org.apache.commons.lang.exception.ExceptionUtils; 13 | 14 | import codemining.languagetools.ITokenizer; 15 | import codemining.languagetools.TokenizerUtils; 16 | 17 | /** 18 | * Utility for counting all the tokens in a folder. 19 | * 20 | * @author Miltos Allamanis 21 | * 22 | */ 23 | public class TokenCounter { 24 | 25 | private static final Logger LOGGER = Logger.getLogger(TokenCounter.class 26 | .getName()); 27 | 28 | /** 29 | * @param args 30 | * @throws IOException 31 | * @throws ClassNotFoundException 32 | * @throws IllegalAccessException 33 | * @throws InstantiationException 34 | */ 35 | public static void main(final String[] args) throws IOException, 36 | InstantiationException, IllegalAccessException, 37 | ClassNotFoundException { 38 | if (args.length != 2) { 39 | System.err.println("Usage "); 40 | return; 41 | } 42 | 43 | long tokenCount = 0; 44 | 45 | final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]); 46 | 47 | for (final File fi : FileUtils.listFiles(new File(args[0]), 48 | tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) { 49 | try { 50 | final char[] code = FileUtils.readFileToString(fi) 51 | .toCharArray(); 52 | tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove 53 | // sentence 54 | // start/end 55 | } catch (final IOException e) { 56 | LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); 57 | } 58 | } 59 | 60 | System.out.println("Tokens: " + tokenCount); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/tui/TokenizerTUI.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools.tui; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.lang.reflect.InvocationTargetException; 9 | import java.util.Collection; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.apache.commons.io.filefilter.DirectoryFileFilter; 13 | import org.eclipse.jdt.core.compiler.InvalidInputException; 14 | 15 | import codemining.languagetools.ITokenizer; 16 | import codemining.languagetools.ITokenizer.FullToken; 17 | import codemining.languagetools.TokenizerUtils; 18 | 19 | import com.google.common.collect.Lists; 20 | 21 | /** 22 | * Print tokenized code of a file to stdout. Each token is placed at a separate 23 | * line. New files are separated by an empty line. 24 | * 25 | * @author Miltos Allamanis 26 | * 27 | */ 28 | public class TokenizerTUI { 29 | public static void main(final String[] args) throws InvalidInputException, 30 | IOException, InstantiationException, IllegalAccessException, 31 | ClassNotFoundException, IllegalArgumentException, 32 | SecurityException, InvocationTargetException, NoSuchMethodException { 33 | if (args.length < 2) { 34 | System.err 35 | .println("Usage [TokenizerArgs]"); 36 | return; 37 | } 38 | 39 | final ITokenizer tok; 40 | final String tokenizerClass = args[1]; 41 | if (args.length == 2) { 42 | tok = TokenizerUtils.tokenizerForClass(tokenizerClass); 43 | } else { 44 | final String tokenizerArguments = args[2]; 45 | tok = TokenizerUtils.tokenizerForClass(tokenizerClass, 46 | tokenizerArguments); 47 | } 48 | 49 | final File baseFile = new File(args[0]); 50 | final Collection allFiles; 51 | if (baseFile.isDirectory()) { 52 | allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(), 53 | DirectoryFileFilter.DIRECTORY); 54 | } else { 55 | allFiles = Lists.newArrayList(baseFile); 56 | } 57 | 58 | for (final File fi : allFiles) { 59 | 60 | final StringBuffer buf = new StringBuffer(); 61 | for (final FullToken token : tok.getTokenListFromCode(fi)) { 62 | buf.append(token); 63 | buf.append(System.getProperty("line.separator")); 64 | } 65 | 66 | System.out.println(buf.toString()); 67 | System.out.println(); 68 | 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/codemining/languagetools/tui/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Command line tools for language tools. 3 | */ 4 | package codemining.languagetools.tui; 5 | 6 | -------------------------------------------------------------------------------- /src/main/java/codemining/python/codeutils/AbstractPythonTokenizer.java: -------------------------------------------------------------------------------- 1 | package codemining.python.codeutils; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.Collection; 6 | import java.util.List; 7 | import java.util.SortedMap; 8 | 9 | import org.apache.commons.io.FileUtils; 10 | import org.apache.commons.io.filefilter.AbstractFileFilter; 11 | import org.apache.commons.io.filefilter.RegexFileFilter; 12 | import org.apache.commons.lang.NotImplementedException; 13 | import org.python.pydev.parser.grammarcommon.ITokenManager; 14 | import org.python.pydev.parser.jython.FastCharStream; 15 | import org.python.pydev.parser.jython.Token; 16 | 17 | import codemining.languagetools.ITokenizer; 18 | 19 | import com.google.common.collect.Lists; 20 | import com.google.common.collect.Maps; 21 | 22 | /** 23 | * An abstract python tokenizer using the PyDev interface. 24 | * 25 | * @author Miltos Allamanis 26 | * 27 | */ 28 | public abstract class AbstractPythonTokenizer implements ITokenizer { 29 | 30 | private static final long serialVersionUID = 5009530263783901964L; 31 | 32 | /** 33 | * A filter for the files being tokenized. 34 | */ 35 | private static final RegexFileFilter pythonCodeFilter = new RegexFileFilter( 36 | ".*\\.py$"); 37 | 38 | public AbstractPythonTokenizer() { 39 | super(); 40 | } 41 | 42 | @Override 43 | public SortedMap fullTokenListWithPos(final char[] code) { 44 | final FastCharStream stream = new FastCharStream(code); 45 | final ITokenManager mng = getPythonTokenizer(stream); 46 | final SortedMap tokens = Maps.newTreeMap(); 47 | 48 | Token nextToken = mng.getNextToken(); 49 | while (nextToken.kind != 0) { 50 | if (shouldAdd(nextToken)) { 51 | // TODO: Bad Heurisitc... 52 | tokens.put( 53 | nextToken.getBeginLine() * 500 54 | + nextToken.getBeginCol(), 55 | new FullToken(nextToken.image, Integer 56 | .toString(nextToken.kind))); 57 | } 58 | nextToken = mng.getNextToken(); 59 | } 60 | 61 | return tokens; 62 | } 63 | 64 | @Override 65 | public AbstractFileFilter getFileFilter() { 66 | return pythonCodeFilter; 67 | } 68 | 69 | @Override 70 | public String getIdentifierType() { 71 | return "92"; // TODO from not hard coded? 72 | } 73 | 74 | /* 75 | * (non-Javadoc) 76 | * 77 | * @see codemining.languagetools.ITokenizer#getKeywordTypes() 78 | */ 79 | @Override 80 | public Collection getKeywordTypes() { 81 | throw new NotImplementedException(); 82 | } 83 | 84 | /* 85 | * (non-Javadoc) 86 | * 87 | * @see codemining.languagetools.ITokenizer#getLiteralTypes() 88 | */ 89 | @Override 90 | public Collection getLiteralTypes() { 91 | throw new NotImplementedException(); 92 | } 93 | 94 | public abstract ITokenManager getPythonTokenizer(final FastCharStream stream); 95 | 96 | @Override 97 | public FullToken getTokenFromString(final String token) { 98 | final FastCharStream stream = new FastCharStream(token.toCharArray()); 99 | final ITokenManager mng = getPythonTokenizer(stream); 100 | final Token pyToken = mng.getNextToken(); 101 | return new FullToken(pyToken.image, Integer.toString(pyToken.kind)); 102 | } 103 | 104 | @Override 105 | public List getTokenListFromCode(final char[] code) { 106 | final FastCharStream stream = new FastCharStream(code); 107 | final ITokenManager mng = getPythonTokenizer(stream); 108 | final List tokens = Lists.newArrayList(); 109 | 110 | Token nextToken = mng.getNextToken(); 111 | while (nextToken.kind != 0) { 112 | if (shouldAdd(nextToken)) { 113 | tokens.add(new FullToken(nextToken.image, Integer 114 | .toString(nextToken.kind))); 115 | } 116 | nextToken = mng.getNextToken(); 117 | } 118 | 119 | return tokens; 120 | } 121 | 122 | @Override 123 | public List getTokenListFromCode(final File codeFile) 124 | throws IOException { 125 | return getTokenListFromCode(FileUtils.readFileToString(codeFile) 126 | .toCharArray()); 127 | } 128 | 129 | /** 130 | * @param nextToken 131 | * @return 132 | */ 133 | public boolean shouldAdd(final Token nextToken) { 134 | // disallow whitespace, indent and docstrings 135 | return nextToken.kind != 6 && nextToken.kind != 14 136 | && nextToken.kind != 13 && nextToken.kind != 115; 137 | } 138 | 139 | @Override 140 | public List tokenListFromCode(final char[] code) { 141 | final FastCharStream stream = new FastCharStream(code); 142 | final ITokenManager mng = getPythonTokenizer(stream); 143 | final List tokens = Lists.newArrayList(); 144 | 145 | Token nextToken = mng.getNextToken(); 146 | while (nextToken.kind != 0) { 147 | if (shouldAdd(nextToken)) { 148 | tokens.add(nextToken.image); 149 | } 150 | nextToken = mng.getNextToken(); 151 | } 152 | 153 | return tokens; 154 | } 155 | 156 | @Override 157 | public List tokenListFromCode(final File codeFile) 158 | throws IOException { 159 | return tokenListFromCode(FileUtils.readFileToString(codeFile) 160 | .toCharArray()); 161 | } 162 | 163 | @Override 164 | public SortedMap tokenListWithPos(final char[] code) { 165 | final FastCharStream stream = new FastCharStream(code); 166 | final ITokenManager mng = getPythonTokenizer(stream); 167 | final SortedMap tokens = Maps.newTreeMap(); 168 | 169 | Token nextToken = mng.getNextToken(); 170 | while (nextToken.kind != 0) { 171 | if (shouldAdd(nextToken)) { 172 | // TODO: Bad Heurisitc... 173 | tokens.put( 174 | nextToken.getBeginLine() * 500 175 | + nextToken.getBeginCol(), nextToken.image); 176 | } 177 | nextToken = mng.getNextToken(); 178 | } 179 | 180 | return tokens; 181 | } 182 | 183 | @Override 184 | public SortedMap tokenListWithPos(final File file) 185 | throws IOException { 186 | return fullTokenListWithPos(FileUtils.readFileToString(file) 187 | .toCharArray()); 188 | } 189 | 190 | } 191 | -------------------------------------------------------------------------------- /src/main/java/codemining/python/codeutils/Python27Tokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.python.codeutils; 5 | 6 | import org.python.pydev.parser.grammar27.PythonGrammar27TokenManager; 7 | import org.python.pydev.parser.grammarcommon.ITokenManager; 8 | import org.python.pydev.parser.jython.FastCharStream; 9 | 10 | /** 11 | * A Python 2.7 tokenizer. 12 | * 13 | * @author Miltos Allamanis 14 | * 15 | */ 16 | public class Python27Tokenizer extends AbstractPythonTokenizer { 17 | 18 | /* 19 | * (non-Javadoc) 20 | * 21 | * @see 22 | * codemining.python.codeutils.AbstractPythonTokenizer#getPythonTokenizer 23 | * (org.python.pydev.parser.jython.FastCharStream) 24 | */ 25 | @Override 26 | public ITokenManager getPythonTokenizer(FastCharStream stream) { 27 | final ITokenManager mng = new PythonGrammar27TokenManager(stream); 28 | return mng; 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/codemining/python/codeutils/Python30Tokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.python.codeutils; 5 | 6 | import org.python.pydev.parser.grammar30.PythonGrammar30TokenManager; 7 | import org.python.pydev.parser.grammarcommon.ITokenManager; 8 | import org.python.pydev.parser.jython.FastCharStream; 9 | 10 | /** 11 | * @author Miltos Allamanis 12 | * 13 | */ 14 | public class Python30Tokenizer extends AbstractPythonTokenizer { 15 | 16 | private static final long serialVersionUID = 6944634686739086853L; 17 | 18 | /** 19 | * @param stream 20 | * @return 21 | */ 22 | @Override 23 | public ITokenManager getPythonTokenizer(final FastCharStream stream) { 24 | final ITokenManager mng = new PythonGrammar30TokenManager(stream); 25 | return mng; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/JavaApproximateTypeInferencerTest.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codeutils; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.util.Map; 8 | 9 | import org.apache.commons.io.FileUtils; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import codemining.languagetools.ParseType; 14 | 15 | public class JavaApproximateTypeInferencerTest { 16 | 17 | String classContent; 18 | 19 | @Before 20 | public void setUp() throws IOException { 21 | classContent = FileUtils.readFileToString(new File( 22 | JavaAstExtractorTest.class.getClassLoader() 23 | .getResource("SampleClass3.txt").getFile())); 24 | } 25 | 26 | @Test 27 | public void test() { 28 | JavaASTExtractor ex = new JavaASTExtractor(false); 29 | JavaApproximateTypeInferencer jati = new JavaApproximateTypeInferencer( 30 | ex.getAST(classContent, ParseType.COMPILATION_UNIT)); 31 | jati.infer(); 32 | final Map vars = jati.getVariableTypes(); 33 | assertEquals(vars.get("anInstance"), "my.pack.SomeName"); 34 | assertEquals(vars.get("arrayOfInt"), "int[]"); 35 | assertEquals(vars.get("aNumber"), "long"); 36 | assertEquals(vars.get("singleObject"), "your.pack.Blah"); 37 | assertEquals(vars.get("arrayOfObjects"), "your.pack.Blah[]"); 38 | assertEquals(vars.get("listOfInt"), "java.util.List"); 39 | assertEquals( 40 | vars.get("complexParamType"), 41 | "java.util.Map>>"); 42 | assertEquals(vars.get("paraType"), 43 | "your.pack2.ParamType"); 44 | assertEquals(vars.get("lowerBoundPa"), 45 | "your.pack2.ParamType"); 46 | assertEquals(vars.get("upperBoundPa"), 47 | "your.pack2.ParamType"); 48 | assertEquals(vars.get("upperBoundPa2"), 49 | "your.pack2.ParamType>"); 50 | assertEquals(vars.get("e"), 51 | "java.io.IOException | java.lang.ArithmeticException"); 52 | 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/JavaAstExtractorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils; 5 | 6 | import static org.junit.Assert.assertTrue; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.List; 11 | 12 | import org.apache.commons.io.FileUtils; 13 | import org.eclipse.jdt.core.dom.ASTNode; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | import codemining.java.tokenizers.JavaTokenizer; 18 | import codemining.languagetools.ParseType; 19 | 20 | /** 21 | * @author Miltos Allamanis 22 | * 23 | */ 24 | public class JavaAstExtractorTest { 25 | 26 | String classContent; 27 | String methodContent; 28 | 29 | @Before 30 | public void setUp() throws IOException { 31 | classContent = FileUtils.readFileToString(new File( 32 | JavaAstExtractorTest.class.getClassLoader() 33 | .getResource("SampleClass.txt").getFile())); 34 | 35 | methodContent = FileUtils.readFileToString(new File( 36 | JavaAstExtractorTest.class.getClassLoader() 37 | .getResource("SampleMethod.txt").getFile())); 38 | } 39 | 40 | /** 41 | * Test method for 42 | * {@link codemining.java.codeutils.JavaASTExtractor#getBestEffortAst(java.lang.String)} 43 | * . 44 | * 45 | * @throws IOException 46 | */ 47 | @Test 48 | public void testGetASTString() { 49 | final JavaASTExtractor ex = new JavaASTExtractor(false); 50 | assertTrue(classContent.length() > 0); 51 | final ASTNode classCU = ex.getASTNode(classContent, 52 | ParseType.COMPILATION_UNIT); 53 | assertTrue(snippetMatchesAstTokens(classContent, classCU)); 54 | 55 | assertTrue(methodContent.length() > 0); 56 | final ASTNode methodCU = ex.getASTNode(methodContent, 57 | ParseType.METHOD); 58 | assertTrue(snippetMatchesAstTokens(methodContent, methodCU)); 59 | } 60 | 61 | private boolean snippetMatchesAstTokens(final String snippetCode, 62 | final ASTNode node) { 63 | final JavaTokenizer tokenizer = new JavaTokenizer(); 64 | final List snippetTokens = tokenizer 65 | .tokenListFromCode(snippetCode.toCharArray()); 66 | final List astTokens = tokenizer.tokenListFromCode(node 67 | .toString().toCharArray()); 68 | return astTokens.equals(snippetTokens); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/JavaWhitespaceTokenizerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils; 5 | 6 | import static org.junit.Assert.*; 7 | 8 | import java.io.File; 9 | import java.util.List; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import codemining.java.tokenizers.JavaWhitespaceTokenizer; 16 | 17 | import com.google.common.collect.Lists; 18 | 19 | /** 20 | * @author Miltos Allamanis 21 | * 22 | */ 23 | public class JavaWhitespaceTokenizerTest { 24 | 25 | private String code; 26 | private List correctTokens; 27 | 28 | /** 29 | * @throws java.lang.Exception 30 | */ 31 | @Before 32 | public void setUp() throws Exception { 33 | code = FileUtils.readFileToString(new File( 34 | JavaAstExtractorTest.class.getClassLoader() 35 | .getResource("SampleClass2.txt").getFile())); 36 | 37 | correctTokens = Lists.newArrayList(FileUtils.readFileToString( 38 | new File(JavaAstExtractorTest.class.getClassLoader() 39 | .getResource("SampleClass2WhitespaceTokens.txt") 40 | .getFile())).split("\n")); 41 | } 42 | 43 | @Test 44 | public void test() { 45 | final JavaWhitespaceTokenizer tokenizer = new JavaWhitespaceTokenizer(); 46 | final List tokens = tokenizer.tokenListFromCode(code 47 | .toCharArray()); 48 | for (int i = 0; i < correctTokens.size(); i++) { 49 | assertEquals("Does not match at position " + i, tokens.get(i), 50 | correctTokens.get(i)); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/TokenizeJavaCodeTest.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codeutils; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertTrue; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import org.eclipse.jdt.core.compiler.ITerminalSymbols; 10 | import org.junit.Test; 11 | 12 | import codemining.java.tokenizers.JavaTokenizer; 13 | import codemining.languagetools.ITokenizer; 14 | 15 | public class TokenizeJavaCodeTest { 16 | 17 | private static final char[] CODE_SAMPLE1 = "int x=2;".toCharArray(); 18 | 19 | private static final String[] TOKENS_SAMPLE1 = { ITokenizer.SENTENCE_START, 20 | "int", "x", "=", "2", ";", ITokenizer.SENTENCE_END }; 21 | private static final int[] TOKEN_POS_SAMPLE1 = { -1, 0, 4, 5, 6, 7, 22 | Integer.MAX_VALUE }; 23 | 24 | private static final char[] CODE_SAMPLE2 = "if (y>0) {\n a += 2;\n}" 25 | .toCharArray(); 26 | 27 | private static final String[] TOKENS_SAMPLE2 = { ITokenizer.SENTENCE_START, 28 | "if", "(", "y", ">", "0", ")", "{", "a", "+=", "2", ";", "}", 29 | ITokenizer.SENTENCE_END }; 30 | 31 | public static final char[] CODE_SAMPLE3 = "int x=2; // this is a test\n" 32 | .toCharArray(); 33 | 34 | @Test 35 | public void testSample1() { 36 | ITokenizer tokenizer = new JavaTokenizer(); 37 | testSample1(tokenizer); 38 | } 39 | 40 | /** 41 | * @param tokenizer 42 | */ 43 | protected void testSample1(ITokenizer tokenizer) { 44 | final List tok = tokenizer.tokenListFromCode(CODE_SAMPLE1); 45 | for (int i = 0; i < TOKENS_SAMPLE1.length; i++) { 46 | assertEquals(tok.get(i), TOKENS_SAMPLE1[i]); 47 | } 48 | assertEquals(tok.size(), TOKENS_SAMPLE1.length); 49 | } 50 | 51 | /** 52 | * @param tokenizer 53 | */ 54 | protected void testSample1Position(ITokenizer tokenizer) { 55 | final Map toks = tokenizer 56 | .tokenListWithPos(CODE_SAMPLE1); 57 | for (int i = 0; i < TOKEN_POS_SAMPLE1.length; i++) { 58 | assertTrue(toks.containsKey(TOKEN_POS_SAMPLE1[i])); 59 | assertEquals(toks.get(TOKEN_POS_SAMPLE1[i]), TOKENS_SAMPLE1[i]); 60 | } 61 | assertEquals(toks.size(), TOKENS_SAMPLE1.length); 62 | } 63 | 64 | @Test 65 | public void testSample1postion() { 66 | ITokenizer tokenizer = new JavaTokenizer(); 67 | testSample1Position(tokenizer); 68 | } 69 | 70 | @Test 71 | public void testSample2() { 72 | ITokenizer tokenizer = new JavaTokenizer(); 73 | testSample2(tokenizer); 74 | } 75 | 76 | /** 77 | * @param tokenizer 78 | */ 79 | protected void testSample2(ITokenizer tokenizer) { 80 | final List tok = tokenizer.tokenListFromCode(CODE_SAMPLE2); 81 | 82 | for (int i = 0; i < TOKENS_SAMPLE2.length; i++) { 83 | assertEquals(tok.get(i), TOKENS_SAMPLE2[i]); 84 | } 85 | } 86 | 87 | @Test 88 | public void testSample3() { 89 | ITokenizer tokenizer = new JavaTokenizer(); 90 | testSample3(tokenizer); 91 | } 92 | 93 | /** 94 | * @param tokenizer 95 | */ 96 | protected void testSample3(ITokenizer tokenizer) { 97 | final List tok = tokenizer.tokenListFromCode(CODE_SAMPLE3); 98 | for (int i = 0; i < TOKENS_SAMPLE1.length; i++) { 99 | assertEquals(tok.get(i), TOKENS_SAMPLE1[i]); 100 | } 101 | assertEquals(tok.size(), TOKENS_SAMPLE1.length); 102 | } 103 | 104 | @Test 105 | public void testTokenTypes() { 106 | ITokenizer tokenizer = new JavaTokenizer(); 107 | assertEquals( 108 | tokenizer.getTokenFromString("hello"), 109 | new ITokenizer.FullToken("hello", tokenizer.getIdentifierType())); 110 | assertEquals( 111 | tokenizer.getTokenFromString("{"), 112 | new ITokenizer.FullToken("{", Integer 113 | .toString(ITerminalSymbols.TokenNameLBRACE))); 114 | 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/binding/BindingTester.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.binding; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | import static org.junit.Assert.assertFalse; 8 | 9 | import java.util.List; 10 | import java.util.Set; 11 | 12 | import codemining.languagetools.bindings.TokenNameBinding; 13 | 14 | import com.google.common.collect.Sets; 15 | 16 | /** 17 | * Utility class for testing bindings. 18 | * 19 | * @author Miltos Allamanis 20 | * 21 | */ 22 | public class BindingTester { 23 | 24 | private BindingTester() { 25 | } 26 | 27 | public static void checkAllBindings(final List bindings) { 28 | final Set indexes = Sets.newHashSet(); 29 | for (final TokenNameBinding binding : bindings) { 30 | BindingTester.checkBinding(binding); 31 | assertFalse("Indexes appear only once", 32 | indexes.removeAll(binding.nameIndexes)); 33 | indexes.addAll(binding.nameIndexes); 34 | } 35 | } 36 | 37 | public static void checkBinding(final TokenNameBinding binding) { 38 | final String tokenName = binding.sourceCodeTokens 39 | .get(binding.nameIndexes.iterator().next()); 40 | for (final int idx : binding.nameIndexes) { 41 | assertEquals(tokenName, binding.sourceCodeTokens.get(idx)); 42 | } 43 | }; 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/binding/JavaApproximateVariableBindingExtractorTest.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codeutils.binding; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertTrue; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Collection; 9 | import java.util.List; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import codemining.java.codeutils.JavaAstExtractorTest; 16 | import codemining.languagetools.bindings.TokenNameBinding; 17 | 18 | public class JavaApproximateVariableBindingExtractorTest { 19 | 20 | private static void allAreContained(final Collection collection, 21 | final Collection in) { 22 | for (final T element : collection) { 23 | assertTrue(in.contains(element)); 24 | } 25 | } 26 | 27 | File classContent; 28 | 29 | File classContent2; 30 | 31 | String methodContent; 32 | 33 | @Before 34 | public void setUp() throws IOException { 35 | classContent = new File(JavaAstExtractorTest.class.getClassLoader() 36 | .getResource("SampleClass.txt").getFile()); 37 | classContent2 = new File(JavaAstExtractorTest.class.getClassLoader() 38 | .getResource("SampleClass2.txt").getFile()); 39 | 40 | methodContent = FileUtils.readFileToString(new File( 41 | JavaAstExtractorTest.class.getClassLoader() 42 | .getResource("SampleMethod.txt").getFile())); 43 | } 44 | 45 | @Test 46 | public void testClassBindings() throws IOException { 47 | final JavaApproximateVariableBindingExtractor jabe = new JavaApproximateVariableBindingExtractor(); 48 | final JavaExactVariableBindingsExtractor jbe = new JavaExactVariableBindingsExtractor(); 49 | 50 | final List classVariableBindings = jabe 51 | .getNameBindings(classContent); 52 | final List classVariableBindingsExact = jbe 53 | .getNameBindings(classContent); 54 | 55 | BindingTester 56 | .checkAllBindings(classVariableBindings); 57 | assertEquals(classVariableBindings.size(), 5); 58 | 59 | final List classVariableBindings2 = jabe 60 | .getNameBindings(classContent2); 61 | final List classVariableBindings2Exact = jbe 62 | .getNameBindings(classContent2); 63 | 64 | assertEquals(classVariableBindings2.size(), 9); 65 | 66 | allAreContained(classVariableBindingsExact, classVariableBindings); 67 | allAreContained(classVariableBindings2Exact, classVariableBindings2); 68 | } 69 | 70 | @Test 71 | public void testMethodBinding() { 72 | final JavaApproximateVariableBindingExtractor jabe = new JavaApproximateVariableBindingExtractor(); 73 | final List methodVariableBindings = jabe 74 | .getNameBindings(methodContent); 75 | BindingTester 76 | .checkAllBindings(methodVariableBindings); 77 | assertEquals(methodVariableBindings.size(), 3); 78 | 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/binding/JavaExactVariableBindingsExtractorTest.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codeutils.binding; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.util.List; 8 | 9 | import org.junit.Before; 10 | import org.junit.Test; 11 | 12 | import codemining.java.codeutils.JavaAstExtractorTest; 13 | import codemining.languagetools.bindings.TokenNameBinding; 14 | 15 | public class JavaExactVariableBindingsExtractorTest { 16 | 17 | File classContent; 18 | 19 | File classContent2; 20 | 21 | @Before 22 | public void setUp() throws IOException { 23 | classContent = new File(JavaAstExtractorTest.class.getClassLoader() 24 | .getResource("SampleClass.txt").getFile()); 25 | classContent2 = new File(JavaAstExtractorTest.class.getClassLoader() 26 | .getResource("SampleClass2.txt").getFile()); 27 | } 28 | 29 | @Test 30 | public void testClassBindings() throws IOException { 31 | final JavaExactVariableBindingsExtractor jbe = new JavaExactVariableBindingsExtractor(); 32 | final List classVariableBindings = jbe 33 | .getNameBindings(classContent); 34 | BindingTester.checkAllBindings(classVariableBindings); 35 | assertEquals(classVariableBindings.size(), 5); 36 | 37 | final List classVariableBindings2 = jbe 38 | .getNameBindings(classContent2); 39 | 40 | assertEquals(classVariableBindings2.size(), 9); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/binding/JavaMethodBindingExtractorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.java.codeutils.binding; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.List; 11 | 12 | import org.apache.commons.io.FileUtils; 13 | import org.junit.Before; 14 | import org.junit.Test; 15 | 16 | import codemining.java.codeutils.JavaAstExtractorTest; 17 | import codemining.languagetools.bindings.TokenNameBinding; 18 | 19 | public class JavaMethodBindingExtractorTest { 20 | 21 | File classContent; 22 | 23 | File classContent2; 24 | 25 | String methodContent; 26 | 27 | @Before 28 | public void setUp() throws IOException { 29 | classContent = new File(JavaAstExtractorTest.class.getClassLoader() 30 | .getResource("SampleClass.txt").getFile()); 31 | classContent2 = new File(JavaAstExtractorTest.class.getClassLoader() 32 | .getResource("SampleClass2.txt").getFile()); 33 | 34 | methodContent = FileUtils.readFileToString(new File( 35 | JavaAstExtractorTest.class.getClassLoader() 36 | .getResource("SampleMethod.txt").getFile())); 37 | } 38 | 39 | @Test 40 | public void testClassLevelBindings() throws IOException { 41 | final JavaMethodInvocationBindingExtractor jame = new JavaMethodInvocationBindingExtractor(); 42 | 43 | final List classMethodBindings = jame 44 | .getNameBindings(classContent); 45 | 46 | BindingTester.checkAllBindings(classMethodBindings); 47 | assertEquals(classMethodBindings.size(), 7); 48 | 49 | final List classMethodBindings2 = jame 50 | .getNameBindings(classContent2); 51 | BindingTester.checkAllBindings(classMethodBindings2); 52 | 53 | assertEquals(classMethodBindings2.size(), 6); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/test/java/codemining/java/codeutils/binding/JavaTypeBindingExtractorTest.java: -------------------------------------------------------------------------------- 1 | package codemining.java.codeutils.binding; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.util.List; 8 | 9 | import org.apache.commons.io.FileUtils; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import codemining.java.codeutils.JavaAstExtractorTest; 14 | import codemining.languagetools.bindings.TokenNameBinding; 15 | 16 | public class JavaTypeBindingExtractorTest { 17 | 18 | File classContent; 19 | 20 | File classContent2; 21 | 22 | String methodContent; 23 | 24 | @Before 25 | public void setUp() throws IOException { 26 | classContent = new File(JavaAstExtractorTest.class.getClassLoader() 27 | .getResource("SampleClass.txt").getFile()); 28 | classContent2 = new File(JavaAstExtractorTest.class.getClassLoader() 29 | .getResource("SampleClass2.txt").getFile()); 30 | 31 | methodContent = FileUtils.readFileToString(new File( 32 | JavaAstExtractorTest.class.getClassLoader() 33 | .getResource("SampleMethod.txt").getFile())); 34 | } 35 | 36 | @Test 37 | public void testClassLevelBindings() throws IOException { 38 | final JavaTypeDeclarationBindingExtractor jame = new JavaTypeDeclarationBindingExtractor(); 39 | 40 | final List classTypeindings = jame 41 | .getNameBindings(classContent); 42 | 43 | BindingTester.checkAllBindings(classTypeindings); 44 | assertEquals(classTypeindings.size(), 1); 45 | 46 | final List classTypeBindings2 = jame 47 | .getNameBindings(classContent2); 48 | BindingTester.checkAllBindings(classTypeBindings2); 49 | 50 | assertEquals(classTypeBindings2.size(), 1); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/codemining/js/codeutils/JavascriptASTExtractorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.js.codeutils; 5 | 6 | import static org.junit.Assert.assertTrue; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.List; 11 | 12 | import org.apache.commons.io.FileUtils; 13 | import org.eclipse.wst.jsdt.core.dom.ASTNode; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | import codemining.languagetools.ParseType; 18 | 19 | /** 20 | * @author Miltos Allamanis 21 | * 22 | */ 23 | // FIXME Javascript AST parser is quite buggy: For SampleJavascript the === is 24 | // printed as + and for SampleJavascript2 it prints a random semicolon at line 7 25 | // Is this just a buggy toString method issue? Tests commented out until fixed. 26 | public class JavascriptASTExtractorTest { 27 | 28 | String classContent; 29 | String methodContent; 30 | 31 | @Before 32 | public void setUp() throws IOException { 33 | classContent = FileUtils.readFileToString(new File( 34 | JavascriptASTExtractorTest.class.getClassLoader() 35 | .getResource("SampleJavascript2.txt").getFile())); 36 | 37 | methodContent = FileUtils.readFileToString(new File( 38 | JavascriptASTExtractorTest.class.getClassLoader() 39 | .getResource("SampleJavascript.txt").getFile())); 40 | } 41 | 42 | /** 43 | * Test method for 44 | * {@link codemining.java.codeutils.JavaASTExtractor#getBestEffortAst(java.lang.String)} 45 | * . 46 | * 47 | * @throws IOException 48 | */ 49 | @Test 50 | public void testGetASTString() { 51 | final JavascriptASTExtractor ex = new JavascriptASTExtractor(false); 52 | assertTrue(classContent.length() > 0); 53 | final ASTNode classCU = ex.getASTNode(classContent, 54 | ParseType.COMPILATION_UNIT); 55 | // assertTrue(snippetMatchesAstTokens(classContent, classCU)); 56 | 57 | assertTrue(methodContent.length() > 0); 58 | final ASTNode methodCU = ex.getASTNode(methodContent, ParseType.METHOD); 59 | // assertTrue(snippetMatchesAstTokens(methodContent, methodCU)); 60 | } 61 | 62 | private boolean snippetMatchesAstTokens(final String snippetCode, 63 | final ASTNode node) { 64 | final JavascriptTokenizer tokenizer = new JavascriptTokenizer(); 65 | final List snippetTokens = tokenizer 66 | .tokenListFromCode(snippetCode.toCharArray()); 67 | final List astTokens = tokenizer.tokenListFromCode(node 68 | .toString().toCharArray()); 69 | return astTokens.equals(snippetTokens); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/test/java/codemining/js/codeutils/TokenizeJavascriptCodeTest.java: -------------------------------------------------------------------------------- 1 | package codemining.js.codeutils; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertTrue; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import org.eclipse.wst.jsdt.core.compiler.ITerminalSymbols; 10 | import org.junit.Test; 11 | 12 | import codemining.languagetools.ITokenizer; 13 | 14 | public class TokenizeJavascriptCodeTest { 15 | 16 | private static final char[] CODE_SAMPLE1 = "var x=2;".toCharArray(); 17 | 18 | private static final String[] TOKENS_SAMPLE1 = { ITokenizer.SENTENCE_START, 19 | "var", "x", "=", "2", ";", ITokenizer.SENTENCE_END }; 20 | private static final int[] TOKEN_POS_SAMPLE1 = { -1, 0, 4, 5, 6, 7, 21 | Integer.MAX_VALUE }; 22 | 23 | private static final char[] CODE_SAMPLE2 = "if (y>0) {\n a += 2;\n}" 24 | .toCharArray(); 25 | 26 | private static final String[] TOKENS_SAMPLE2 = { ITokenizer.SENTENCE_START, 27 | "if", "(", "y", ">", "0", ")", "{", "a", "+=", "2", ";", "}", 28 | ITokenizer.SENTENCE_END }; 29 | 30 | public static final char[] CODE_SAMPLE3 = "var x=2; // this is a test\n" 31 | .toCharArray(); 32 | 33 | @Test 34 | public void testSample1() { 35 | final ITokenizer tokenizer = new JavascriptTokenizer(); 36 | testSample1(tokenizer); 37 | } 38 | 39 | /** 40 | * @param tokenizer 41 | */ 42 | protected void testSample1(final ITokenizer tokenizer) { 43 | final List tok = tokenizer.tokenListFromCode(CODE_SAMPLE1); 44 | for (int i = 0; i < TOKENS_SAMPLE1.length; i++) { 45 | assertEquals(tok.get(i), TOKENS_SAMPLE1[i]); 46 | } 47 | assertEquals(tok.size(), TOKENS_SAMPLE1.length); 48 | } 49 | 50 | /** 51 | * @param tokenizer 52 | */ 53 | protected void testSample1Position(final ITokenizer tokenizer) { 54 | final Map toks = tokenizer 55 | .tokenListWithPos(CODE_SAMPLE1); 56 | for (int i = 0; i < TOKEN_POS_SAMPLE1.length; i++) { 57 | assertTrue(toks.containsKey(TOKEN_POS_SAMPLE1[i])); 58 | assertEquals(toks.get(TOKEN_POS_SAMPLE1[i]), TOKENS_SAMPLE1[i]); 59 | } 60 | assertEquals(toks.size(), TOKENS_SAMPLE1.length); 61 | } 62 | 63 | @Test 64 | public void testSample1postion() { 65 | final ITokenizer tokenizer = new JavascriptTokenizer(); 66 | testSample1Position(tokenizer); 67 | } 68 | 69 | @Test 70 | public void testSample2() { 71 | final ITokenizer tokenizer = new JavascriptTokenizer(); 72 | testSample2(tokenizer); 73 | } 74 | 75 | /** 76 | * @param tokenizer 77 | */ 78 | protected void testSample2(final ITokenizer tokenizer) { 79 | final List tok = tokenizer.tokenListFromCode(CODE_SAMPLE2); 80 | 81 | for (int i = 0; i < TOKENS_SAMPLE2.length; i++) { 82 | assertEquals(tok.get(i), TOKENS_SAMPLE2[i]); 83 | } 84 | } 85 | 86 | @Test 87 | public void testSample3() { 88 | final ITokenizer tokenizer = new JavascriptTokenizer(); 89 | testSample3(tokenizer); 90 | } 91 | 92 | /** 93 | * @param tokenizer 94 | */ 95 | protected void testSample3(final ITokenizer tokenizer) { 96 | final List tok = tokenizer.tokenListFromCode(CODE_SAMPLE3); 97 | for (int i = 0; i < TOKENS_SAMPLE1.length; i++) { 98 | assertEquals(tok.get(i), TOKENS_SAMPLE1[i]); 99 | } 100 | assertEquals(tok.size(), TOKENS_SAMPLE1.length); 101 | } 102 | 103 | @Test 104 | public void testTokenTypes() { 105 | final ITokenizer tokenizer = new JavascriptTokenizer(); 106 | assertEquals( 107 | tokenizer.getTokenFromString("hello"), 108 | new ITokenizer.FullToken("hello", tokenizer.getIdentifierType())); 109 | assertEquals( 110 | tokenizer.getTokenFromString("{"), 111 | new ITokenizer.FullToken("{", Integer 112 | .toString(ITerminalSymbols.TokenNameLBRACE))); 113 | 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/test/java/codemining/js/codeutils/binding/JavascriptApproximateVariableBindingExtractorTest.java: -------------------------------------------------------------------------------- 1 | package codemining.js.codeutils.binding; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertTrue; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Collection; 9 | import java.util.List; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import codemining.java.codeutils.binding.BindingTester; 16 | import codemining.js.codeutils.JavascriptASTExtractorTest; 17 | import codemining.languagetools.bindings.TokenNameBinding; 18 | 19 | public class JavascriptApproximateVariableBindingExtractorTest { 20 | 21 | private static void allAreContained(final Collection collection, 22 | final Collection in) { 23 | for (final T element : collection) { 24 | assertTrue(in.contains(element)); 25 | } 26 | } 27 | 28 | File classContent; 29 | 30 | File classContent2; 31 | 32 | String methodContent; 33 | 34 | @Before 35 | public void setUp() throws IOException { 36 | classContent = new File(JavascriptASTExtractorTest.class 37 | .getClassLoader().getResource("SampleJavascript2.txt") 38 | .getFile()); 39 | classContent2 = new File(JavascriptASTExtractorTest.class 40 | .getClassLoader().getResource("SampleJavascript3.txt") 41 | .getFile()); 42 | 43 | methodContent = FileUtils.readFileToString(new File( 44 | JavascriptASTExtractorTest.class.getClassLoader() 45 | .getResource("SampleJavascript.txt").getFile())); 46 | } 47 | 48 | @Test 49 | public void testClassBindings() throws IOException { 50 | final JavascriptApproximateVariableBindingExtractor jabe = new JavascriptApproximateVariableBindingExtractor(); 51 | final JavascriptExactVariableBindingsExtractor jbe = new JavascriptExactVariableBindingsExtractor(); 52 | 53 | final List classVariableBindings = jabe 54 | .getNameBindings(classContent); 55 | final List classVariableBindingsExact = jbe 56 | .getNameBindings(classContent); 57 | 58 | BindingTester.checkAllBindings(classVariableBindings); 59 | assertEquals(classVariableBindings.size(), 3); 60 | 61 | final List classVariableBindings2 = jabe 62 | .getNameBindings(classContent2); 63 | final List classVariableBindings2Exact = jbe 64 | .getNameBindings(classContent2); 65 | 66 | assertEquals(classVariableBindings2.size(), 13); 67 | 68 | allAreContained(classVariableBindingsExact, classVariableBindings); 69 | allAreContained(classVariableBindings2Exact, classVariableBindings2); 70 | } 71 | 72 | @Test 73 | public void testMethodBinding() { 74 | final JavascriptApproximateVariableBindingExtractor jabe = new JavascriptApproximateVariableBindingExtractor(); 75 | final List methodVariableBindings = jabe 76 | .getNameBindings(methodContent); 77 | BindingTester.checkAllBindings(methodVariableBindings); 78 | assertEquals(methodVariableBindings.size(), 1); 79 | 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/test/java/codemining/js/codeutils/binding/JavascriptExactVariableBindingsExtractorTest.java: -------------------------------------------------------------------------------- 1 | package codemining.js.codeutils.binding; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.List; 6 | 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | import codemining.java.codeutils.binding.BindingTester; 11 | import codemining.js.codeutils.JavascriptASTExtractorTest; 12 | import codemining.languagetools.bindings.TokenNameBinding; 13 | 14 | // FIXME Tests commented out until binding resolution is fixed 15 | public class JavascriptExactVariableBindingsExtractorTest { 16 | 17 | File classContent; 18 | 19 | File classContent2; 20 | 21 | @Before 22 | public void setUp() throws IOException { 23 | classContent = new File(JavascriptASTExtractorTest.class 24 | .getClassLoader().getResource("SampleJavascript.txt").getFile()); 25 | classContent2 = new File(JavascriptASTExtractorTest.class 26 | .getClassLoader().getResource("SampleJavascript2.txt") 27 | .getFile()); 28 | } 29 | 30 | @Test 31 | public void testClassBindings() throws IOException { 32 | final JavascriptExactVariableBindingsExtractor jbe = new JavascriptExactVariableBindingsExtractor(); 33 | final List classVariableBindings = jbe 34 | .getNameBindings(classContent); 35 | BindingTester.checkAllBindings(classVariableBindings); 36 | // assertEquals(classVariableBindings.size(), 1); 37 | 38 | final List classVariableBindings2 = jbe 39 | .getNameBindings(classContent2); 40 | 41 | // assertEquals(classVariableBindings2.size(), 3); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/codemining/languagetools/TokenizerUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package codemining.languagetools; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | 11 | import org.apache.commons.io.FileUtils; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import codemining.java.codeutils.JavaAstExtractorTest; 16 | 17 | /** 18 | * @author Miltos Allamanis 19 | * 20 | */ 21 | public class TokenizerUtilsTest { 22 | 23 | private String classContent; 24 | 25 | @Before 26 | public void setUp() throws IOException { 27 | classContent = FileUtils.readFileToString(new File( 28 | JavaAstExtractorTest.class.getClassLoader() 29 | .getResource("SampleClass.txt").getFile())); 30 | } 31 | 32 | @Test 33 | public void testColumn() { 34 | assertEquals(TokenizerUtils.getColumnOfPosition(classContent, 970), 29); 35 | assertEquals(TokenizerUtils.getColumnOfPosition(classContent, 980), 13); 36 | assertEquals(TokenizerUtils.getColumnOfPosition(classContent, 1565), 17); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/test/resources/SampleClass.txt: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2000, 2007 IBM Corporation and others. 3 | * All rights reserved. This program and the accompanying materials 4 | * are made available under the terms of the Eclipse Public License v1.0 5 | * which accompanies this distribution, and is available at 6 | * http://www.eclipse.org/legal/epl-v10.html 7 | * 8 | * Contributors: 9 | * IBM Corporation - initial API and implementation 10 | *******************************************************************************/ 11 | package org.eclipse.jdt.jeview; 12 | 13 | 14 | import org.eclipse.core.runtime.Assert; 15 | 16 | import org.eclipse.ui.IEditorInput; 17 | import org.eclipse.ui.IEditorPart; 18 | import org.eclipse.ui.IWorkbenchPage; 19 | import org.eclipse.ui.IWorkbenchWindow; 20 | import org.eclipse.ui.texteditor.ITextEditor; 21 | 22 | import org.eclipse.jdt.core.IJavaElement; 23 | import org.eclipse.jdt.core.IOpenable; 24 | 25 | import org.eclipse.jdt.ui.JavaUI; 26 | 27 | /** 28 | * 29 | */ 30 | public class EditorUtility { 31 | private EditorUtility() { 32 | super(); 33 | } 34 | 35 | public static IEditorPart getActiveEditor() { 36 | IWorkbenchWindow window= JEViewPlugin.getDefault().getWorkbench().getActiveWorkbenchWindow(); 37 | if (window != null) { 38 | IWorkbenchPage page= window.getActivePage(); 39 | if (page != null) { 40 | return page.getActiveEditor(); 41 | } 42 | } 43 | return null; 44 | } 45 | 46 | 47 | public static IOpenable getJavaInput(IEditorPart part) { 48 | IEditorInput editorInput= part.getEditorInput(); 49 | if (editorInput != null) { 50 | IJavaElement input= javaUIgetEditorInputJavaElement(editorInput); 51 | if (input instanceof IOpenable) { 52 | return (IOpenable) input; 53 | } 54 | } 55 | return null; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/test/resources/SampleClass2.txt: -------------------------------------------------------------------------------- 1 | package junit.framework; 2 | 3 | import java.util.List; 4 | import java.util.ArrayList; 5 | 6 | /** 7 | * This is just a piece of code from JUnit. Not necessarily working. 8 | * A TestResult collects the results of executing 9 | * a test case. It is an instance of the Collecting Parameter pattern. 10 | * The test framework distinguishes between failures and errors. 11 | * A failure is anticipated and checked for with assertions. Errors are 12 | * unanticipated problems like an {@link ArrayIndexOutOfBoundsException}. 13 | * 14 | * @see Test 15 | */ 16 | public class TestResult extends Object { 17 | protected List fFailures; 18 | 19 | public TestResult() { 20 | fFailures = new ArrayList(); 21 | fErrors = new ArrayList(); 22 | fListeners = new ArrayList(); 23 | fRunTests = 0; 24 | fStop = false; 25 | } 26 | 27 | /** 28 | * Adds an error to the list of errors. The passed in exception 29 | * caused the error. 30 | */ 31 | public synchronized void addError(Test test, Throwable t) { 32 | fErrors.add(new TestFailure(test, t)); 33 | for (TestListener each : cloneListeners()) { 34 | each.addError(test, t); 35 | } 36 | } 37 | 38 | 39 | /** 40 | * Runs a TestCase. 41 | */ 42 | public void runProtected(final Test test, Protectable p) { 43 | try { 44 | p.protect(); 45 | } catch (AssertionFailedError e) { 46 | addFailure(test, e); 47 | } catch (ThreadDeath e) { // don't catch ThreadDeath by accident 48 | throw e; 49 | } catch (Throwable e) { 50 | addError(test, e); 51 | } 52 | } 53 | 54 | /** 55 | * Checks whether the test run should stop 56 | */ 57 | public synchronized boolean shouldStop() { 58 | return fStop; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/test/resources/SampleClass2WhitespaceTokens.txt: -------------------------------------------------------------------------------- 1 | 2 | package 3 | WS_s1t0 4 | IDENTIFIER 5 | . 6 | IDENTIFIER 7 | ; 8 | WS_INDENTs0t0n2 9 | import 10 | WS_s1t0 11 | IDENTIFIER 12 | . 13 | IDENTIFIER 14 | . 15 | IDENTIFIER 16 | ; 17 | WS_INDENTs0t0n1 18 | import 19 | WS_s1t0 20 | IDENTIFIER 21 | . 22 | IDENTIFIER 23 | . 24 | IDENTIFIER 25 | ; 26 | WS_INDENTs0t0n2 27 | COMMENT_JAVADOC 28 | WS_INDENTs0t0n1 29 | public 30 | WS_s1t0 31 | class 32 | WS_s1t0 33 | IDENTIFIER 34 | WS_s1t0 35 | extends 36 | WS_s1t0 37 | IDENTIFIER 38 | WS_s1t0 39 | { 40 | WS_INDENTs4t0n1 41 | protected 42 | WS_s1t0 43 | IDENTIFIER 44 | < 45 | IDENTIFIER 46 | > 47 | WS_s1t0 48 | IDENTIFIER 49 | ; 50 | WS_INDENTs0t0n2 51 | public 52 | WS_s1t0 53 | IDENTIFIER 54 | ( 55 | ) 56 | WS_s1t0 57 | { 58 | WS_INDENTs4t0n1 59 | IDENTIFIER 60 | WS_s1t0 61 | = 62 | WS_s1t0 63 | new 64 | WS_s1t0 65 | IDENTIFIER 66 | < 67 | IDENTIFIER 68 | > 69 | ( 70 | ) 71 | ; 72 | WS_INDENTs0t0n1 73 | IDENTIFIER 74 | WS_s1t0 75 | = 76 | WS_s1t0 77 | new 78 | WS_s1t0 79 | IDENTIFIER 80 | < 81 | IDENTIFIER 82 | > 83 | ( 84 | ) 85 | ; 86 | WS_INDENTs0t0n1 87 | IDENTIFIER 88 | WS_s1t0 89 | = 90 | WS_s1t0 91 | new 92 | WS_s1t0 93 | IDENTIFIER 94 | < 95 | IDENTIFIER 96 | > 97 | ( 98 | ) 99 | ; 100 | WS_INDENTs0t0n1 101 | IDENTIFIER 102 | WS_s1t0 103 | = 104 | WS_s1t0 105 | LITERAL 106 | ; 107 | WS_INDENTs0t0n1 108 | IDENTIFIER 109 | WS_s1t0 110 | = 111 | WS_s1t0 112 | false 113 | ; 114 | WS_DEDENTs4t0n1 115 | } 116 | WS_INDENTs0t0n2 117 | COMMENT_JAVADOC 118 | WS_INDENTs0t0n1 119 | public 120 | WS_s1t0 121 | synchronized 122 | WS_s1t0 123 | void 124 | WS_s1t0 125 | IDENTIFIER 126 | ( 127 | IDENTIFIER 128 | WS_s1t0 129 | IDENTIFIER 130 | , 131 | WS_s1t0 132 | IDENTIFIER 133 | WS_s1t0 134 | IDENTIFIER 135 | ) 136 | WS_s1t0 137 | { 138 | WS_INDENTs4t0n1 139 | IDENTIFIER 140 | . 141 | IDENTIFIER 142 | ( 143 | new 144 | WS_s1t0 145 | IDENTIFIER 146 | ( 147 | IDENTIFIER 148 | , 149 | WS_s1t0 150 | IDENTIFIER 151 | ) 152 | ) 153 | ; 154 | WS_INDENTs0t0n1 155 | for 156 | WS_s1t0 157 | ( 158 | IDENTIFIER 159 | WS_s1t0 160 | IDENTIFIER 161 | WS_s1t0 162 | : 163 | WS_s1t0 164 | IDENTIFIER 165 | ( 166 | ) 167 | ) 168 | WS_s1t0 169 | { 170 | WS_INDENTs4t0n1 171 | IDENTIFIER 172 | . 173 | IDENTIFIER 174 | ( 175 | IDENTIFIER 176 | , 177 | WS_s1t0 178 | IDENTIFIER 179 | ) 180 | ; 181 | WS_DEDENTs4t0n1 182 | } 183 | WS_DEDENTs4t0n1 184 | } 185 | WS_INDENTs0t0n3 186 | COMMENT_JAVADOC 187 | WS_INDENTs0t0n1 188 | public 189 | WS_s1t0 190 | void 191 | WS_s1t0 192 | IDENTIFIER 193 | ( 194 | final 195 | WS_s1t0 196 | IDENTIFIER 197 | WS_s1t0 198 | IDENTIFIER 199 | , 200 | WS_s1t0 201 | IDENTIFIER 202 | WS_s1t0 203 | IDENTIFIER 204 | ) 205 | WS_s1t0 206 | { 207 | WS_INDENTs4t0n1 208 | try 209 | WS_s1t0 210 | { 211 | WS_INDENTs4t0n1 212 | IDENTIFIER 213 | . 214 | IDENTIFIER 215 | ( 216 | ) 217 | ; 218 | WS_DEDENTs4t0n1 219 | } 220 | WS_s1t0 221 | catch 222 | WS_s1t0 223 | ( 224 | IDENTIFIER 225 | WS_s1t0 226 | IDENTIFIER 227 | ) 228 | WS_s1t0 229 | { 230 | WS_INDENTs4t0n1 231 | IDENTIFIER 232 | ( 233 | IDENTIFIER 234 | , 235 | WS_s1t0 236 | IDENTIFIER 237 | ) 238 | ; 239 | WS_DEDENTs4t0n1 240 | } 241 | WS_s1t0 242 | catch 243 | WS_s1t0 244 | ( 245 | IDENTIFIER 246 | WS_s1t0 247 | IDENTIFIER 248 | ) 249 | WS_s1t0 250 | { 251 | WS_s1t0 252 | COMMENT_LINE 253 | WS_INDENTs4t0n1 254 | throw 255 | WS_s1t0 256 | IDENTIFIER 257 | ; 258 | WS_DEDENTs4t0n1 259 | } 260 | WS_s1t0 261 | catch 262 | WS_s1t0 263 | ( 264 | IDENTIFIER 265 | WS_s1t0 266 | IDENTIFIER 267 | ) 268 | WS_s1t0 269 | { 270 | WS_INDENTs4t0n1 271 | IDENTIFIER 272 | ( 273 | IDENTIFIER 274 | , 275 | WS_s1t0 276 | IDENTIFIER 277 | ) 278 | ; 279 | WS_DEDENTs4t0n1 280 | } 281 | WS_DEDENTs4t0n1 282 | } 283 | WS_INDENTs0t0n2 284 | COMMENT_JAVADOC 285 | WS_INDENTs0t0n1 286 | public 287 | WS_s1t0 288 | synchronized 289 | WS_s1t0 290 | boolean 291 | WS_s1t0 292 | IDENTIFIER 293 | ( 294 | ) 295 | WS_s1t0 296 | { 297 | WS_INDENTs4t0n1 298 | return 299 | WS_s1t0 300 | IDENTIFIER 301 | ; 302 | WS_DEDENTs4t0n1 303 | } 304 | WS_DEDENTs4t0n1 305 | } 306 | WS_INDENTs0t0n1 307 | 308 | -------------------------------------------------------------------------------- /src/test/resources/SampleClass3.txt: -------------------------------------------------------------------------------- 1 | package my.pack; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | import java.util.HashMap; 6 | import java.io.IOException; 7 | import your.pack.Blah; 8 | import your.pack2.ParamType; 9 | import com.google.common.collect.Lists; 10 | 11 | private class SomeName { 12 | SomeName anInstance; 13 | 14 | int[] arrayOfInt = new int[10]; 15 | 16 | long aNumber = 7; 17 | 18 | Blah singleObject = new Blah(); 19 | 20 | Blah[] arrayOfObjects = null; 21 | 22 | List listOfInt = Lists.newArrayList(); 23 | 24 | Map>> complexParamType; 25 | 26 | ParamType paraType = new ParamType(listOfInt); 27 | 28 | ParamType lowerBoundPa = null; 29 | 30 | ParamType upperBoundPa = null; 31 | 32 | ParamType> upperBoundPa2 = null; 33 | 34 | void doSomething() { 35 | try { 36 | justDoIt(); 37 | } catch (IOException | ArithmeticException e) { 38 | e.printStackTrace(); 39 | } 40 | } 41 | 42 | } -------------------------------------------------------------------------------- /src/test/resources/SampleJavascript.txt: -------------------------------------------------------------------------------- 1 | function factorial(n) { 2 | if (n === 0) { 3 | return 1; 4 | } 5 | return n * factorial(n - 1); 6 | } -------------------------------------------------------------------------------- /src/test/resources/SampleJavascript2.txt: -------------------------------------------------------------------------------- 1 | var sum = function() { 2 | var i, x = 0; 3 | for (i = 0; i < arguments.length; ++i) { 4 | x += arguments[i]; 5 | } 6 | return x; 7 | } 8 | sum(1, 2, 3); // returns 6 -------------------------------------------------------------------------------- /src/test/resources/SampleJavascript3.txt: -------------------------------------------------------------------------------- 1 | /* Finds the lowest common multiple (LCM) of two numbers */ 2 | function LCMCalculator(x, y) { // constructor function 3 | var checkInt = function (x) { // inner function 4 | if (x % 1 !== 0) { 5 | throw new TypeError(x + " is not an integer"); // throw an exception 6 | } 7 | return x; 8 | }; 9 | this.a = checkInt(x) 10 | // semicolons ^^^^ are optional, a newline is enough 11 | this.b = checkInt(y); 12 | } 13 | // The prototype of object instances created by a constructor is 14 | // that constructor's "prototype" property. 15 | LCMCalculator.prototype = { // object literal 16 | constructor: LCMCalculator, // when reassigning a prototype, set the constructor property appropriately 17 | gcd: function () { // method that calculates the greatest common divisor 18 | // Euclidean algorithm: 19 | var a = Math.abs(this.a), b = Math.abs(this.b), t; 20 | if (a < b) { 21 | // swap variables 22 | t = b; 23 | b = a; 24 | a = t; 25 | } 26 | while (b !== 0) { 27 | t = b; 28 | b = a % b; 29 | a = t; 30 | } 31 | // Only need to calculate GCD once, so "redefine" this method. 32 | // (Actually not redefinition—it's defined on the instance itself, 33 | // so that this.gcd refers to this "redefinition" instead of LCMCalculator.prototype.gcd.) 34 | // Also, 'gcd' === "gcd", this['gcd'] === this.gcd 35 | this['gcd'] = function () { 36 | return a; 37 | }; 38 | return a; 39 | }, 40 | // Object property names can be specified by strings delimited by double (") or single (') quotes. 41 | lcm : function () { 42 | // Variable names don't collide with object properties, e.g. |lcm| is not |this.lcm|. 43 | // not using |this.a * this.b| to avoid FP precision issues 44 | var lcm = this.a / this.gcd() * this.b; 45 | // Only need to calculate lcm once, so "redefine" this method. 46 | this.lcm = function () { 47 | return lcm; 48 | }; 49 | return lcm; 50 | }, 51 | toString: function () { 52 | return "LCMCalculator: a = " + this.a + ", b = " + this.b; 53 | } 54 | }; 55 | 56 | // Define generic output function; this implementation only works for web browsers 57 | function output(x) { 58 | document.body.appendChild(document.createTextNode(x)); 59 | document.body.appendChild(document.createElement('br')); 60 | } 61 | 62 | // Note: Array's map() and forEach() are defined in JavaScript 1.6. 63 | // They are used here to demonstrate JavaScript's inherent functional nature. 64 | [[25, 55], [21, 56], [22, 58], [28, 56]].map(function (pair) { // array literal + mapping function 65 | return new LCMCalculator(pair[0], pair[1]); 66 | }).sort(function (a, b) { // sort with this comparative function 67 | return a.lcm() - b.lcm(); 68 | }).forEach(function (obj) { 69 | output(obj + ", gcd = " + obj.gcd() + ", lcm = " + obj.lcm()); 70 | }); 71 | -------------------------------------------------------------------------------- /src/test/resources/SampleMethod.txt: -------------------------------------------------------------------------------- 1 | public static IOpenable getJavaInput(IEditorPart part) { 2 | IEditorInput editorInput= part.getEditorInput(); 3 | if (editorInput != null) { 4 | IJavaElement input= javaUIgetEditorInputJavaElement(editorInput); 5 | if (input instanceof IOpenable) { 6 | return (IOpenable) input; 7 | } 8 | } 9 | return null; 10 | } 11 | --------------------------------------------------------------------------------