20 | *
21 | */
22 | public class CodePrinter {
23 |
24 | /**
25 | * The tokenizer used to tokenize code.
26 | */
27 | final ITokenizer tokenizer;
28 |
29 | /**
30 | * The background Color of the output HTML document.
31 | */
32 | final Color documentBackgroundColor;
33 |
34 | int lineNumber = 1;
35 |
36 | private final boolean ignoreTokBG = SettingsLoader.getBooleanSetting(
37 | "ignoreTokenBackground", true);
38 |
39 | public static final String CSS_STYLE = "";
44 |
45 | public CodePrinter(final ITokenizer tokenizer,
46 | final Color documentBackgroundColor) {
47 | this.tokenizer = tokenizer;
48 | this.documentBackgroundColor = documentBackgroundColor;
49 | }
50 |
51 | private void addSlack(final String substring, final StringBuffer buf) {
52 | for (final char c : StringEscapeUtils.escapeHtml(substring)
53 | .toCharArray()) {
54 | if (c == '\n') {
55 | appendLineDiv(buf, true);
56 | } else {
57 | buf.append(c);
58 | }
59 | }
60 |
61 | }
62 |
63 | private void appendLineDiv(final StringBuffer buf,
64 | final boolean closePrevious) {
65 | if (closePrevious) {
66 | buf.append("
\n");
67 | }
68 | buf.append("");
69 | lineNumber++;
70 | }
71 |
72 | /**
73 | * Return a StringBuffer with colored tokens as specified from the
74 | * coloredTokens. There should be one-to-one correspondence with the actual
75 | * tokens.
76 | */
77 | public StringBuffer getHTMLwithColors(
78 | final List coloredTokens, final File codeFile)
79 | throws IOException, InstantiationException, IllegalAccessException {
80 | final String code = FileUtils.readFileToString(codeFile);
81 | lineNumber = 1;
82 |
83 | final StringBuffer buf = new StringBuffer();
84 |
85 | final SortedMap toks = tokenizer
86 | .fullTokenListWithPos(code.toCharArray());
87 |
88 | int i = 0;
89 | int prevPos = 0;
90 | buf.append("\n\n\n");
91 | buf.append(CSS_STYLE);
92 | buf.append("\n");
96 | appendLineDiv(buf, false);
97 | for (final Entry entry : toks.entrySet()) {
98 | if (i == 0 || entry.getKey() == Integer.MAX_VALUE) {
99 | i++;
100 | continue;
101 | }
102 | addSlack(code.substring(prevPos, entry.getKey()), buf);
103 | final ColoredToken tok = coloredTokens.get(i);
104 |
105 | buf.append(""
111 | + StringEscapeUtils.escapeHtml(entry.getValue().token)
112 | + "");
113 | i++;
114 | prevPos = entry.getKey() + entry.getValue().token.length();
115 | }
116 | buf.append("
");
117 | return buf;
118 |
119 | }
120 | }
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/ColoredToken.java:
--------------------------------------------------------------------------------
1 | package codemining.languagetools;
2 |
3 | import java.awt.Color;
4 |
5 | /**
6 | * Struct class representing a colored token.
7 | *
8 | */
9 | public final class ColoredToken {
10 | public Color fontColor;
11 | public final Color bgColor;
12 | public final String token;
13 | public String extraStyle;
14 |
15 | /**
16 | * Construct with default bgColor white.
17 | *
18 | * @param token
19 | * @param fontColor
20 | */
21 | public ColoredToken(final String token, final Color fontColor) {
22 | this.token = token;
23 | this.fontColor = fontColor;
24 | bgColor = Color.WHITE;
25 | extraStyle = "";
26 | }
27 |
28 | public ColoredToken(final String token, final Color fontColor,
29 | final Color bgColor, final String extraStyle) {
30 | this.token = token;
31 | this.fontColor = fontColor;
32 | this.bgColor = bgColor;
33 | this.extraStyle = extraStyle;
34 | }
35 |
36 | public void setColor(final Color fontColor) {
37 | this.fontColor = fontColor;
38 | }
39 |
40 | public void setStyle(final String extraStyle) {
41 | this.extraStyle = extraStyle;
42 | }
43 |
44 | }
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/FormattingTokenizer.java:
--------------------------------------------------------------------------------
1 | package codemining.languagetools;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.util.Collection;
6 | import java.util.List;
7 | import java.util.Map.Entry;
8 | import java.util.SortedMap;
9 |
10 | import org.apache.commons.io.FileUtils;
11 | import org.apache.commons.io.filefilter.AbstractFileFilter;
12 |
13 | import com.google.common.collect.Lists;
14 | import com.google.common.collect.Maps;
15 |
16 | /**
17 | * Adds a NO_SPACE between tokens that contain, no space.
18 | *
19 | */
20 | public class FormattingTokenizer implements IFormattingTokenizer {
21 |
22 | public static final String WS_NO_SPACE = "WS_NO_SPACE";
23 | private static final long serialVersionUID = -1736507313790110846L;
24 | final ITokenizer baseTokenizer;
25 |
26 | public FormattingTokenizer(final ITokenizer baseTokenizer) {
27 | this.baseTokenizer = baseTokenizer;
28 | }
29 |
30 | @Override
31 | public SortedMap fullTokenListWithPos(final char[] code) {
32 | throw new IllegalArgumentException("Cannot be implemented");
33 | }
34 |
35 | public ITokenizer getBaseTokenizer() {
36 | return baseTokenizer;
37 | }
38 |
39 | @Override
40 | public AbstractFileFilter getFileFilter() {
41 | return baseTokenizer.getFileFilter();
42 | }
43 |
44 | @Override
45 | public String getIdentifierType() {
46 | return baseTokenizer.getIdentifierType();
47 | }
48 |
49 | @Override
50 | public Collection getKeywordTypes() {
51 | return baseTokenizer.getKeywordTypes();
52 | }
53 |
54 | @Override
55 | public Collection getLiteralTypes() {
56 | return baseTokenizer.getLiteralTypes();
57 | }
58 |
59 | @Override
60 | public FullToken getTokenFromString(final String token) {
61 | return baseTokenizer.getTokenFromString(token);
62 | }
63 |
64 | @Override
65 | public List getTokenListFromCode(final char[] code) {
66 | final List list = Lists.newArrayList();
67 | final List original = baseTokenizer
68 | .getTokenListFromCode(code);
69 | for (int i = 0; i < original.size() - 1; i++) {
70 | final FullToken currentToken = original.get(i);
71 | list.add(currentToken);
72 | final FullToken nextToken = original.get(i + 1);
73 | if (!currentToken.token.startsWith("WS_")
74 | && !nextToken.token.startsWith("WS_")) {
75 | list.add(new FullToken(WS_NO_SPACE, ""));
76 | }
77 | }
78 | list.add(original.get(original.size() - 1));
79 | return list;
80 | }
81 |
82 | @Override
83 | public List getTokenListFromCode(final File codeFile)
84 | throws IOException {
85 | return getTokenListFromCode(FileUtils.readFileToString(codeFile)
86 | .toCharArray());
87 | }
88 |
89 | @Override
90 | public List tokenListFromCode(final char[] code) {
91 | // TODO Duplicate
92 | final List list = Lists.newArrayList();
93 | final List original = baseTokenizer.tokenListFromCode(code);
94 | for (int i = 0; i < original.size() - 1; i++) {
95 | final String currentToken = original.get(i);
96 | list.add(currentToken);
97 | final String nextToken = original.get(i + 1);
98 | if (!currentToken.startsWith("WS_") && !nextToken.startsWith("WS_")) {
99 | list.add(WS_NO_SPACE);
100 | }
101 | }
102 | list.add(original.get(original.size() - 1));
103 | return list;
104 | }
105 |
106 | @Override
107 | public List tokenListFromCode(final File codeFile)
108 | throws IOException {
109 | return tokenListFromCode(FileUtils.readFileToString(codeFile)
110 | .toCharArray());
111 | }
112 |
113 | @Override
114 | public SortedMap tokenListWithPos(final char[] code) {
115 | throw new IllegalArgumentException("Cannot be implemented");
116 | }
117 |
118 | @Override
119 | public SortedMap tokenListWithPos(final File file)
120 | throws IOException {
121 | return fullTokenListWithPos(FileUtils.readFileToString(file)
122 | .toCharArray());
123 | }
124 |
125 | /**
126 | * Return the position of just the whitespaces in the code.
127 | *
128 | * @param code
129 | * @return
130 | */
131 | public SortedMap whitespaceTokenPositions(final char[] code) {
132 | final SortedMap wsPositions = Maps.newTreeMap();
133 | final SortedMap originalPositions = baseTokenizer
134 | .tokenListWithPos(code);
135 |
136 | boolean previousWasWhitespace = true;
137 | for (final Entry tokenEntry : originalPositions
138 | .entrySet()) {
139 | if (tokenEntry.getValue().startsWith(ITokenizer.SENTENCE_START)
140 | || tokenEntry.getValue()
141 | .startsWith(ITokenizer.SENTENCE_END)) {
142 | continue;
143 | }
144 | if (tokenEntry.getValue().startsWith("WS_")) {
145 | wsPositions.put(tokenEntry.getKey(), tokenEntry.getValue());
146 | previousWasWhitespace = true;
147 | } else if (!previousWasWhitespace) {
148 | wsPositions.put(tokenEntry.getKey(), WS_NO_SPACE);
149 | previousWasWhitespace = false;
150 | } else {
151 | previousWasWhitespace = false;
152 | }
153 | }
154 |
155 | return wsPositions;
156 | }
157 | }
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/IAstAnnotatedTokenizer.java:
--------------------------------------------------------------------------------
1 | package codemining.languagetools;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.io.Serializable;
6 | import java.util.List;
7 | import java.util.SortedMap;
8 |
9 | import com.google.common.base.Function;
10 | import com.google.common.base.Objects;
11 |
12 | /**
13 | * A tokenizer interface that returns AST annotated tokens.
14 | *
15 | * @author Miltos Allamanis
16 | *
17 | */
18 | public interface IAstAnnotatedTokenizer extends ITokenizer {
19 |
20 | /**
21 | * A struct class for representing AST annotated tokens.
22 | */
23 | public static class AstAnnotatedToken implements Serializable {
24 |
25 | private static final long serialVersionUID = -8505721476537620929L;
26 |
27 | public static final Function TOKEN_FLATTEN_FUNCTION = new Function() {
28 | @Override
29 | public FullToken apply(final AstAnnotatedToken input) {
30 | if (input.tokenAstNode != null
31 | && input.parentTokenAstNode != null) {
32 | return new FullToken(input.token.token + "->in{"
33 | + input.tokenAstNode + "->"
34 | + input.parentTokenAstNode + "}",
35 | input.token.tokenType);
36 | } else {
37 | return new FullToken(input.token);
38 | }
39 | }
40 | };
41 |
42 | public final FullToken token;
43 | public final String tokenAstNode;
44 | public final String parentTokenAstNode;
45 |
46 | public AstAnnotatedToken(final FullToken token,
47 | final String tokenAstNode, final String parentTokenAstNode) {
48 | this.token = token;
49 | this.tokenAstNode = tokenAstNode;
50 | this.parentTokenAstNode = parentTokenAstNode;
51 | }
52 |
53 | @Override
54 | public boolean equals(final Object obj) {
55 | if (this == obj) {
56 | return true;
57 | }
58 | if (obj == null) {
59 | return false;
60 | }
61 | if (getClass() != obj.getClass()) {
62 | return false;
63 | }
64 | final AstAnnotatedToken other = (AstAnnotatedToken) obj;
65 | return Objects.equal(other.token, token)
66 | && Objects.equal(other.tokenAstNode, tokenAstNode)
67 | && Objects.equal(other.parentTokenAstNode,
68 | parentTokenAstNode);
69 | }
70 |
71 | @Override
72 | public int hashCode() {
73 | return Objects.hashCode(token, tokenAstNode, parentTokenAstNode);
74 | }
75 |
76 | @Override
77 | public String toString() {
78 | return TOKEN_FLATTEN_FUNCTION.apply(this).toString();
79 | }
80 |
81 | }
82 |
83 | public abstract List getAnnotatedTokenListFromCode(
84 | char[] code);
85 |
86 | public abstract List getAnnotatedTokenListFromCode(
87 | File codeFile) throws IOException;
88 |
89 | /**
90 | * @param code
91 | * @return
92 | */
93 | public abstract SortedMap getAnnotatedTokens(
94 | char[] code);
95 |
96 | /**
97 | * Return the base tokenizer whose tokens are annotated.
98 | *
99 | * @return
100 | */
101 | public ITokenizer getBaseTokenizer();
102 |
103 | }
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/IFormattingTokenizer.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package codemining.languagetools;
5 |
6 | /**
7 | * A dummy tokenizer interface, suggesting that all implementors produce
8 | * whitespace tokens.
9 | *
10 | * @author Miltos Allamanis
11 | *
12 | */
13 | public interface IFormattingTokenizer extends ITokenizer {
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/IScopeExtractor.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package codemining.languagetools;
5 |
6 | import java.io.File;
7 | import java.io.IOException;
8 |
9 | import org.eclipse.jdt.core.dom.ASTNode;
10 |
11 | import com.google.common.collect.Multimap;
12 |
13 | /**
14 | * A interface for extracting scoped related information. Scope extractors
15 | * return a multimap that for each scope contains all the identifiers that are
16 | * declared there.
17 | *
18 | * @author Miltos Allamanis
19 | *
20 | */
21 | public interface IScopeExtractor {
22 | Multimap getFromFile(final File file) throws IOException;
23 |
24 | Multimap getFromNode(final ASTNode node);
25 |
26 | Multimap getFromString(final String code,
27 | final ParseType parseType);
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/ITokenizer.java:
--------------------------------------------------------------------------------
1 | package codemining.languagetools;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.io.Serializable;
6 | import java.util.Collection;
7 | import java.util.List;
8 | import java.util.SortedMap;
9 |
10 | import org.apache.commons.io.filefilter.AbstractFileFilter;
11 |
12 | import com.google.common.base.Function;
13 | import com.google.common.base.Objects;
14 |
15 | /**
16 | * Interface of a code tokenizer.
17 | *
18 | * @author Miltos Allamanis
19 | *
20 | */
21 | public interface ITokenizer extends Serializable {
22 |
23 | public static class FullToken implements Serializable {
24 |
25 | private static final long serialVersionUID = -49456240173307314L;
26 |
27 | public static final Function TOKEN_NAME_CONVERTER = new Function() {
28 | @Override
29 | public String apply(final FullToken input) {
30 | return input.token;
31 | }
32 | };
33 |
34 | public final String token;
35 |
36 | public final String tokenType;
37 |
38 | public FullToken(final FullToken other) {
39 | token = other.token;
40 | tokenType = other.tokenType;
41 | }
42 |
43 | public FullToken(final String tokName, final String tokType) {
44 | token = tokName;
45 | tokenType = tokType;
46 | }
47 |
48 | @Override
49 | public boolean equals(final Object obj) {
50 | if (!(obj instanceof FullToken)) {
51 | return false;
52 | }
53 | final FullToken other = (FullToken) obj;
54 | return other.token.equals(token)
55 | && other.tokenType.equals(tokenType);
56 | }
57 |
58 | @Override
59 | public int hashCode() {
60 | return Objects.hashCode(token, tokenType);
61 | }
62 |
63 | @Override
64 | public String toString() {
65 | return token + " (" + tokenType + ")";
66 | }
67 |
68 | }
69 |
70 | /**
71 | * A sentence end (constant) token
72 | */
73 | static final String SENTENCE_END = "";
74 |
75 | /**
76 | * A sentence start (constant) token
77 | */
78 | static final String SENTENCE_START = "";
79 |
80 | /**
81 | * Return a list with the full tokens.
82 | *
83 | * @param code
84 | * @return
85 | */
86 | SortedMap fullTokenListWithPos(final char[] code);
87 |
88 | /**
89 | * Return a file filter, filtering the files that can be tokenized.
90 | *
91 | * @return
92 | *
93 | */
94 | AbstractFileFilter getFileFilter();
95 |
96 | /**
97 | * Return the token type that signifies that a token is an identifier.
98 | *
99 | * @return
100 | */
101 | String getIdentifierType();
102 |
103 | /**
104 | * Return the token types that are keywords.
105 | *
106 | * @return
107 | */
108 | Collection getKeywordTypes();
109 |
110 | /**
111 | * Return the types the represent literals.
112 | *
113 | * @return
114 | */
115 | Collection getLiteralTypes();
116 |
117 | /**
118 | * Return a full token given a string token.
119 | *
120 | * @param token
121 | * @return
122 | */
123 | FullToken getTokenFromString(final String token);
124 |
125 | /**
126 | * Get the list of tokens from the code.
127 | *
128 | * @param code
129 | * @return
130 | */
131 | List getTokenListFromCode(final char[] code);
132 |
133 | /**
134 | * Get the list of tokens from the code.
135 | *
136 | * @param code
137 | * @return
138 | */
139 | List getTokenListFromCode(final File codeFile)
140 | throws IOException;
141 |
142 | /**
143 | * Tokenize some code.
144 | *
145 | * @param code
146 | * the code
147 | * @return a list of tokens
148 | */
149 | List tokenListFromCode(final char[] code);
150 |
151 | /**
152 | * Tokenize code given a file.
153 | *
154 | * @param codeFile
155 | * @return
156 | */
157 | List tokenListFromCode(final File codeFile) throws IOException;
158 |
159 | /**
160 | * Return a list of tokens along with their positions.
161 | *
162 | * @param code
163 | * @return
164 | */
165 | SortedMap tokenListWithPos(final char[] code);
166 |
167 | /**
168 | * Return a list of tokens along with their positions.
169 | *
170 | * @param file
171 | * @return
172 | * @throws IOException
173 | */
174 | SortedMap tokenListWithPos(File file)
175 | throws IOException;
176 |
177 | }
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/ParseType.java:
--------------------------------------------------------------------------------
1 | package codemining.languagetools;
2 |
3 | public enum ParseType {
4 | COMPILATION_UNIT, CLASS_BODY, METHOD, STATEMENTS, EXPRESSION
5 | }
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/Scope.java:
--------------------------------------------------------------------------------
1 | package codemining.languagetools;
2 |
3 | import com.google.common.base.Objects;
4 | import com.google.common.collect.ComparisonChain;
5 |
6 | /**
7 | * A utility class to represent scopes.
8 | *
9 | */
10 | public class Scope implements Comparable {
11 |
12 | public enum ScopeType {
13 | SCOPE_CLASS, SCOPE_LOCAL, SCOPE_METHOD
14 | }
15 |
16 | public final String code;
17 |
18 | public final ScopeType scopeType;
19 |
20 | public final String type;
21 |
22 | public final int astNodeType;
23 | public final int astParentNodeType;
24 |
25 | public Scope(final String code, final ScopeType scopeType,
26 | final String type, final int astNodeType,
27 | final int astParentNodeType) {
28 | this.code = code;
29 | this.scopeType = scopeType;
30 | this.type = type;
31 | this.astNodeType = astNodeType;
32 | this.astParentNodeType = astParentNodeType;
33 | }
34 |
35 | @Override
36 | public int compareTo(final Scope other) {
37 | return ComparisonChain.start().compare(code, other.code)
38 | .compare(scopeType, other.scopeType).compare(type, other.type)
39 | .compare(astNodeType, other.astNodeType)
40 | .compare(astParentNodeType, other.astParentNodeType).result();
41 | }
42 |
43 | @Override
44 | public boolean equals(final Object obj) {
45 | if (!(obj instanceof Scope)) {
46 | return false;
47 | }
48 | final Scope other = (Scope) obj;
49 | return other.code.equals(code) && other.scopeType == scopeType
50 | && other.astNodeType == astNodeType
51 | && other.astParentNodeType == astParentNodeType
52 | && other.type.equals(type);
53 | }
54 |
55 | @Override
56 | public int hashCode() {
57 | return Objects.hashCode(code, scopeType, type, astNodeType,
58 | astParentNodeType);
59 | }
60 |
61 | @Override
62 | public String toString() {
63 | return scopeType + " " + code;
64 | }
65 | }
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/TokenizerUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package codemining.languagetools;
5 |
6 | import static com.google.common.base.Preconditions.checkArgument;
7 | import static com.google.common.base.Preconditions.checkPositionIndex;
8 |
9 | import java.lang.reflect.InvocationTargetException;
10 | import java.util.List;
11 |
12 | import org.apache.commons.lang.StringUtils;
13 |
14 | import codemining.languagetools.ITokenizer.FullToken;
15 | import codemining.util.SettingsLoader;
16 |
17 | /**
18 | * Utility function relevant to tokenization.
19 | *
20 | * @author Miltos Allamanis
21 | *
22 | */
23 | public class TokenizerUtils {
24 |
25 | public static final int TAB_INDENT_SIZE = (int) SettingsLoader
26 | .getNumericSetting("tabSize", 4);
27 |
28 | /**
29 | * Return the column of the given position.
30 | *
31 | * @param code
32 | * @param position
33 | * @return
34 | */
35 | public static int getColumnOfPosition(final String code, final int position) {
36 | checkPositionIndex(position, code.length());
37 | int newLinePosition = code.substring(0, position).lastIndexOf("\n");
38 | if (newLinePosition == -1) {
39 | newLinePosition = 0; // Start of file.
40 | }
41 | final int tabCount = StringUtils.countMatches(
42 | code.substring(newLinePosition, position), "\t");
43 | return position - newLinePosition + (TAB_INDENT_SIZE - 1) * tabCount;
44 | }
45 |
46 | /**
47 | * Crudely join tokens together.
48 | *
49 | * @param tokens
50 | * @param sb
51 | * @return
52 | */
53 | public final static StringBuffer joinFullTokens(
54 | final List tokens, final StringBuffer sb) {
55 | for (final FullToken token : tokens) {
56 | sb.append(token.token);
57 | sb.append(" ");
58 | }
59 |
60 | return sb;
61 | }
62 |
63 | /**
64 | * Crudely join tokens together.
65 | *
66 | * @param tokens
67 | * @param sb
68 | * @return
69 | */
70 | public final static StringBuffer joinTokens(final List tokens) {
71 | final StringBuffer sb = new StringBuffer();
72 | for (final String token : tokens) {
73 | sb.append(token);
74 | sb.append(" ");
75 | }
76 |
77 | return sb;
78 | }
79 |
80 | /**
81 | * Crudely join tokens together.
82 | *
83 | * @param tokens
84 | * @param sb
85 | * @return
86 | */
87 | public final static StringBuffer joinTokens(final List tokens,
88 | final StringBuffer sb) {
89 | for (final String token : tokens) {
90 | sb.append(token);
91 | sb.append(" ");
92 | }
93 |
94 | return sb;
95 | }
96 |
97 | /**
98 | * Remove the sentence start/end FullTokens.
99 | *
100 | * @param tokenSequence
101 | */
102 | public static final void removeSentenceStartEndFullTokens(
103 | final List tokenSequence) {
104 | checkArgument(tokenSequence.get(0).token
105 | .equals(ITokenizer.SENTENCE_START));
106 | tokenSequence.remove(0);
107 | checkArgument(tokenSequence.get(tokenSequence.size() - 1).token
108 | .equals(ITokenizer.SENTENCE_END));
109 | tokenSequence.remove(tokenSequence.size() - 1);
110 | }
111 |
112 | /**
113 | * Remove the sentence start/end tokens.
114 | *
115 | * @param tokenSequence
116 | */
117 | public static final void removeSentenceStartEndTokens(
118 | final List tokenSequence) {
119 | checkArgument(tokenSequence.get(0).equals(ITokenizer.SENTENCE_START));
120 | tokenSequence.remove(0);
121 | checkArgument(tokenSequence.get(tokenSequence.size() - 1).equals(
122 | ITokenizer.SENTENCE_END));
123 | tokenSequence.remove(tokenSequence.size() - 1);
124 | }
125 |
126 | private TokenizerUtils() {
127 | // Utilty class
128 | }
129 |
130 | /**
131 | * @param tokenizerClass
132 | * @param tokenizerArguments
133 | * @return
134 | * @throws InstantiationException
135 | * @throws IllegalAccessException
136 | * @throws IllegalArgumentException
137 | * @throws InvocationTargetException
138 | * @throws NoSuchMethodException
139 | * @throws SecurityException
140 | * @throws ClassNotFoundException
141 | */
142 | public static ITokenizer tokenizerForClass(final String tokenizerClass,
143 | final String tokenizerArguments) throws InstantiationException,
144 | IllegalAccessException, IllegalArgumentException,
145 | InvocationTargetException, NoSuchMethodException,
146 | SecurityException, ClassNotFoundException {
147 | return (ITokenizer) Class.forName(tokenizerClass)
148 | .getDeclaredConstructor(String.class)
149 | .newInstance(tokenizerArguments);
150 | }
151 |
152 | public static ITokenizer tokenizerForClass(final String tokenizerClass,
153 | final Boolean tokenizerArguments) throws InstantiationException,
154 | IllegalAccessException, IllegalArgumentException,
155 | InvocationTargetException, NoSuchMethodException,
156 | SecurityException, ClassNotFoundException {
157 | return (ITokenizer) Class.forName(tokenizerClass)
158 | .getDeclaredConstructor(Boolean.TYPE)
159 | .newInstance(tokenizerArguments);
160 | }
161 |
162 | /**
163 | * @param tokenizerClass
164 | * @return
165 | * @throws InstantiationException
166 | * @throws IllegalAccessException
167 | * @throws ClassNotFoundException
168 | */
169 | public static ITokenizer tokenizerForClass(final String tokenizerClass)
170 | throws InstantiationException, IllegalAccessException,
171 | ClassNotFoundException {
172 | return (ITokenizer) Class.forName(tokenizerClass).newInstance();
173 | }
174 |
175 | }
176 |
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/bindings/AbstractNameBindingsExtractor.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package codemining.languagetools.bindings;
5 |
6 | import java.io.File;
7 | import java.io.IOException;
8 | import java.util.List;
9 | import java.util.Set;
10 |
11 | import com.google.common.collect.HashMultimap;
12 | import com.google.common.collect.Multimap;
13 |
14 | /**
15 | * A NameBindings extractor from arbitrary code.
16 | *
17 | * @author Miltos Allamanis
18 | *
19 | */
20 | public abstract class AbstractNameBindingsExtractor {
21 |
22 | public abstract Set> getAvailableFeatures();
23 |
24 | /**
25 | * Return all the name bindings for file f
26 | *
27 | * @param f
28 | * @return a multimap containing for each name all the relavant name
29 | * bindings in the file.
30 | * @throws IOException
31 | */
32 | public Multimap getBindingsForName(final File f)
33 | throws IOException {
34 | return getBindingsForName(getNameBindings(f));
35 | }
36 |
37 | protected Multimap getBindingsForName(
38 | final List bindings) {
39 | final Multimap toks = HashMultimap.create();
40 | for (final TokenNameBinding binding : bindings) {
41 | toks.put(binding.getName(), binding);
42 | }
43 | return toks;
44 | }
45 |
46 | /**
47 | * Return the name bindings given the code.
48 | *
49 | * @param code
50 | * @return a multimap containing for each name all the relavant name
51 | * bindings in the code snippet.
52 | */
53 | public Multimap getBindingsForName(
54 | final String code) {
55 | return getBindingsForName(getNameBindings(code));
56 | }
57 |
58 | /**
59 | * Get the name bindings for the given file.
60 | *
61 | * @param f
62 | * @return
63 | * @throws IOException
64 | */
65 | public abstract List getNameBindings(final File f)
66 | throws IOException;
67 |
68 | /**
69 | * Get the name bindings given the code.
70 | *
71 | * @param code
72 | * @return
73 | */
74 | public abstract List getNameBindings(final String code);
75 |
76 | /**
77 | * Return a ResolvedSourceCode instance for the given code.
78 | *
79 | * @param f
80 | * @return
81 | * @throws IOException
82 | */
83 | public abstract ResolvedSourceCode getResolvedSourceCode(final File f)
84 | throws IOException;
85 |
86 | /**
87 | * Return a ResolvedSourceCode instance for the given code.
88 | *
89 | * @param code
90 | * @return
91 | */
92 | public abstract ResolvedSourceCode getResolvedSourceCode(final String code);
93 |
94 | public abstract void setActiveFeatures(Set> activeFeatures);
95 | }
96 |
--------------------------------------------------------------------------------
/src/main/java/codemining/languagetools/bindings/ResolvedSourceCode.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package codemining.languagetools.bindings;
5 |
6 | import static com.google.common.base.Preconditions.checkArgument;
7 |
8 | import java.util.Collection;
9 | import java.util.List;
10 |
11 | import com.google.common.collect.ArrayListMultimap;
12 |
13 | /**
14 | * A full piece of source code that has the variable bindings resolved. The
15 | * variable bindings are "attached" to the source code, so any changes in the
16 | * token stream, will be reflected to the bindings.
17 | *
18 | * @author Miltos Allamanis
19 | *
20 | */
21 | public class ResolvedSourceCode {
22 |
23 | public final String name;
24 |
25 | public final List codeTokens;
26 |
27 | private final ArrayListMultimap variableBindings;
28 |
29 | /**
30 | * Assumes that the variable bindings use the same (as in ==) token list.
31 | *
32 | * @param name
33 | * @param codeTokens
34 | * @param variableBindings
35 | */
36 | public ResolvedSourceCode(final List codeTokens,
37 | final ArrayListMultimap variableBindings) {
38 | this.name = "UnkownSourceCodeName";
39 | this.codeTokens = codeTokens;
40 | this.variableBindings = variableBindings;
41 | }
42 |
43 | /**
44 | * Assumes that the variable bindings use the same (as in ==) token list.
45 | *
46 | * @param name
47 | * @param codeTokens
48 | * @param variableBindings
49 | */
50 | public ResolvedSourceCode(final String name, final List codeTokens,
51 | final ArrayListMultimap