├── images ├── overview.png ├── text-based.jpg └── refactoredgraph.png ├── .gitmodules ├── .idea ├── .gitignore ├── vcs.xml ├── encodings.xml ├── misc.xml └── uiDesigner.xml ├── src └── main │ └── java │ └── org │ └── pdgdiff │ ├── matching │ ├── models │ │ ├── vf2 │ │ │ ├── CandidatePair.java │ │ │ ├── VF2Matcher.java │ │ │ └── VF2State.java │ │ ├── ged │ │ │ ├── GEDResult.java │ │ │ ├── HungarianAlgorithm.java │ │ │ └── GEDMatcher.java │ │ ├── GEDGraphMatcher.java │ │ ├── UllmannGraphMatcher.java │ │ ├── heuristic │ │ │ └── JaroWinklerSimilarity.java │ │ ├── VF2GraphMatcher.java │ │ └── ullmann │ │ │ └── UllmannMatcher.java │ ├── GraphMatcher.java │ ├── GraphMatcherFactory.java │ ├── StrategySettings.java │ ├── NodeFeasibility.java │ ├── NodeMapping.java │ ├── GraphMapping.java │ └── DiffEngine.java │ ├── io │ ├── JsonOperationSerializer.java │ ├── OperationFormatter.java │ ├── OperationSerializer.java │ └── JsonOperationFormatter.java │ ├── edit │ ├── model │ │ ├── EditOperation.java │ │ ├── Delete.java │ │ ├── Insert.java │ │ ├── Move.java │ │ ├── Update.java │ │ └── SyntaxDifference.java │ ├── EditDistanceCalculator.java │ ├── ClassMetadataDiffGenerator.java │ ├── SignatureDiffGenerator.java │ └── EditScriptGenerator.java │ ├── export │ ├── ExportUtils.java │ ├── EditScriptExporter.java │ └── DiffGraphExporter.java │ ├── util │ ├── SourceCodeMapper.java │ ├── SootInitializer.java │ └── CodeAnalysisUtils.java │ ├── graph │ ├── PDG.java │ ├── GraphTraversal.java │ ├── GraphExporter.java │ ├── GraphGenerator.java │ └── CycleDetection.java │ └── testclasses │ ├── TestFileBefore.java │ └── TestFileAfter.java ├── .gitignore ├── py-visualise ├── templates │ └── index.html ├── static │ └── css │ │ └── style.css └── app.py ├── pom.xml ├── README.md └── benchmark └── evaluation-scripts └── analysis_line_num_granularity.py /images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syntax/PDGdiff/main/images/overview.png -------------------------------------------------------------------------------- /images/text-based.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syntax/PDGdiff/main/images/text-based.jpg -------------------------------------------------------------------------------- /images/refactoredgraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syntax/PDGdiff/main/images/refactoredgraph.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "benchmark/datasets"] 2 | path = benchmark/datasets 3 | url = https://github.com/syntax/datasets.git 4 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/vf2/CandidatePair.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.vf2; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | class CandidatePair { 6 | PDGNode n1; 7 | PDGNode n2; 8 | 9 | public CandidatePair(PDGNode n1, PDGNode n2) { 10 | this.n1 = n1; 11 | this.n2 = n2; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/ged/GEDResult.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.ged; 2 | 3 | import org.pdgdiff.matching.NodeMapping; 4 | 5 | public class GEDResult { 6 | public final double distance; 7 | public final NodeMapping nodeMapping; 8 | 9 | public GEDResult(double distance, NodeMapping nodeMapping) { 10 | this.distance = distance; 11 | this.nodeMapping = nodeMapping; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/io/JsonOperationSerializer.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.io; 2 | 3 | import org.pdgdiff.edit.model.EditOperation; 4 | import org.pdgdiff.matching.StrategySettings; 5 | 6 | import java.io.Writer; 7 | import java.util.List; 8 | 9 | public class JsonOperationSerializer extends OperationSerializer { 10 | 11 | public JsonOperationSerializer(List editScript, StrategySettings settings) { 12 | super(editScript, settings); 13 | } 14 | 15 | @Override 16 | protected OperationFormatter newFormatter(Writer writer) { 17 | return new JsonOperationFormatter(writer); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/model/EditOperation.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit.model; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | public abstract class EditOperation { 6 | protected PDGNode node; 7 | 8 | public EditOperation(PDGNode node) { 9 | this.node = node; 10 | } 11 | 12 | public PDGNode getNode() { 13 | return node; 14 | } 15 | 16 | public abstract String getName(); 17 | 18 | @Override 19 | public abstract String toString(); 20 | 21 | // following are to prevent duplicate entries in edit scripts 22 | @Override 23 | public abstract boolean equals(Object obj); 24 | 25 | @Override 26 | public abstract int hashCode(); 27 | } 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | !**/src/main/**/target/ 4 | !**/src/test/**/target/ 5 | 6 | ### IntelliJ IDEA ### 7 | .idea/modules.xml 8 | .idea/jarRepositories.xml 9 | .idea/compiler.xml 10 | .idea/libraries/ 11 | *.iws 12 | *.iml 13 | *.ipr 14 | 15 | ### Eclipse ### 16 | .apt_generated 17 | .classpath 18 | .factorypath 19 | .project 20 | .settings 21 | .springBeans 22 | .sts4-cache 23 | 24 | ### NetBeans ### 25 | /nbproject/private/ 26 | /nbbuild/ 27 | /dist/ 28 | /nbdist/ 29 | /.nb-gradle/ 30 | build/ 31 | !**/src/main/**/build/ 32 | !**/src/test/**/build/ 33 | 34 | ### VS Code ### 35 | .vscode/ 36 | 37 | ### Mac OS ### 38 | .DS_Store 39 | 40 | out/* 41 | py-visualise/out/* 42 | py-visualise/testclasses/* -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/io/OperationFormatter.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.io; 2 | 3 | import org.pdgdiff.edit.model.*; 4 | import org.pdgdiff.matching.StrategySettings; 5 | 6 | public interface OperationFormatter { 7 | 8 | void writeInfo(StrategySettings settings) throws Exception; 9 | 10 | void startOutput() throws Exception; 11 | void endOutput() throws Exception; 12 | 13 | void startOperations() throws Exception; 14 | void endOperations() throws Exception; 15 | 16 | void insertOperation(Insert operation) throws Exception; 17 | void deleteOperation(Delete operation) throws Exception; 18 | void updateOperation(Update operation) throws Exception; 19 | void moveOperation(Move operation) throws Exception; 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/EditDistanceCalculator.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit; 2 | 3 | import org.pdgdiff.edit.model.EditOperation; 4 | 5 | import java.util.List; 6 | 7 | public class EditDistanceCalculator { 8 | 9 | public static int calculateEditDistance(List editScript) { 10 | int distance = 0; 11 | for (EditOperation op : editScript) { 12 | switch (op.getName()) { 13 | case "Insert": 14 | case "Delete": 15 | case "Update": 16 | case "Move": 17 | distance += 1; 18 | break; 19 | default: 20 | break; 21 | } 22 | } 23 | return distance; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/export/ExportUtils.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.export; 2 | 3 | import java.nio.charset.StandardCharsets; 4 | import java.security.MessageDigest; 5 | import java.security.NoSuchAlgorithmException; 6 | 7 | public class ExportUtils { 8 | 9 | public static String generateHash(String methodName) { 10 | try { 11 | MessageDigest digest = MessageDigest.getInstance("SHA-256"); 12 | byte[] hashBytes = digest.digest(methodName.getBytes(StandardCharsets.UTF_8)); 13 | StringBuilder hexString = new StringBuilder(); 14 | for (byte b : hashBytes) { 15 | hexString.append(String.format("%02x", b)); 16 | } 17 | return hexString.toString(); 18 | } catch (NoSuchAlgorithmException e) { 19 | e.printStackTrace(); 20 | return methodName + ": failed to generate hash for method name!"; 21 | } 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/GraphMatcher.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching; 2 | 3 | import org.pdgdiff.graph.PDG; 4 | 5 | import java.util.HashSet; 6 | import java.util.List; 7 | 8 | /** 9 | * GraphMatcher abstract class to compare two lists of PDGs and find similarities, aiming to create a 1:1 mapping between 10 | * PDGs from the two lists. A PDGs represents a methods, with each list representing a different class 11 | */ 12 | public abstract class GraphMatcher { 13 | protected final HashSet matchedPDGs; 14 | protected List srcPdgs; 15 | protected List dstPdgs; 16 | protected GraphMapping graphMapping; // to store graph-level and node-level mappings 17 | 18 | public GraphMatcher(List srcPdgs, List dstPdgs) { 19 | this.srcPdgs = srcPdgs; 20 | this.dstPdgs = dstPdgs; 21 | this.graphMapping = new GraphMapping(); 22 | this.matchedPDGs = new HashSet<>(); 23 | } 24 | 25 | public abstract GraphMapping matchPDGLists(); 26 | } -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/GraphMatcherFactory.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching; 2 | 3 | import org.pdgdiff.graph.PDG; 4 | import org.pdgdiff.matching.models.GEDGraphMatcher; 5 | import org.pdgdiff.matching.models.UllmannGraphMatcher; 6 | import org.pdgdiff.matching.models.VF2GraphMatcher; 7 | 8 | import java.util.List; 9 | 10 | public class GraphMatcherFactory { 11 | 12 | public enum MatchingStrategy { 13 | VF2, 14 | ULLMANN, 15 | GED 16 | } 17 | 18 | public static GraphMatcher createMatcher(MatchingStrategy strategy, List srcPDGs, List destPDGs) { 19 | switch (strategy) { 20 | case VF2: 21 | return new VF2GraphMatcher(srcPDGs, destPDGs); 22 | case ULLMANN: 23 | return new UllmannGraphMatcher(srcPDGs, destPDGs); 24 | case GED: 25 | return new GEDGraphMatcher(srcPDGs, destPDGs); 26 | default: 27 | throw new IllegalArgumentException("Unknown matching strategy: " + strategy); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/StrategySettings.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching; 2 | 3 | import org.pdgdiff.edit.RecoveryProcessor; 4 | 5 | public class StrategySettings { 6 | protected RecoveryProcessor.RecoveryStrategy recoveryStrategy; 7 | protected GraphMatcherFactory.MatchingStrategy matchingStrategy; 8 | protected boolean aggregateRecovery; 9 | 10 | public StrategySettings(RecoveryProcessor.RecoveryStrategy recoveryStrategy, GraphMatcherFactory.MatchingStrategy matchingStrategy, boolean aggregateRecovery) { 11 | this.recoveryStrategy = recoveryStrategy; 12 | this.matchingStrategy = matchingStrategy; 13 | this.aggregateRecovery = aggregateRecovery; 14 | } 15 | 16 | public RecoveryProcessor.RecoveryStrategy getRecoveryStrategy() { 17 | return recoveryStrategy; 18 | } 19 | 20 | public GraphMatcherFactory.MatchingStrategy getMatchingStrategy() { 21 | return matchingStrategy; 22 | } 23 | 24 | 25 | public boolean isAggregateRecovery() { 26 | return aggregateRecovery; 27 | } 28 | 29 | public void setRecoveryStrategy(RecoveryProcessor.RecoveryStrategy recoveryStrategy) { 30 | this.recoveryStrategy = recoveryStrategy; 31 | } 32 | 33 | public void setMatchingStrategy(GraphMatcherFactory.MatchingStrategy matchingStrategy) { 34 | this.matchingStrategy = matchingStrategy; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/util/SourceCodeMapper.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.util; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileReader; 5 | import java.io.IOException; 6 | import java.util.HashMap; 7 | 8 | /** 9 | * Maps line numbers to source code snippets for naive parsing when Soot struggles to define a line number for a unit. 10 | * Used for later hositing of elements when constructing the Edit Script at the source level. 11 | */ 12 | public class SourceCodeMapper { 13 | private HashMap lineNumberToCodeMap; 14 | 15 | public SourceCodeMapper(String sourceFilePath) throws IOException { 16 | lineNumberToCodeMap = new HashMap<>(); 17 | loadSourceCode(sourceFilePath); 18 | } 19 | 20 | private void loadSourceCode(String sourceFilePath) throws IOException { 21 | BufferedReader reader = new BufferedReader(new FileReader(sourceFilePath)); 22 | String line; 23 | int lineNumber = 1; 24 | while ((line = reader.readLine()) != null) { 25 | lineNumberToCodeMap.put(lineNumber, line); 26 | lineNumber++; 27 | } 28 | reader.close(); 29 | } 30 | 31 | public String getCodeLine(int lineNumber) { 32 | return lineNumberToCodeMap.getOrDefault(lineNumber, ""); 33 | } 34 | 35 | public int getTotalLines() { 36 | return lineNumberToCodeMap.size(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/model/Delete.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit.model; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | import java.util.Objects; 6 | 7 | public class Delete extends EditOperation { 8 | private int lineNumber; 9 | private String codeSnippet; 10 | 11 | public Delete(PDGNode node, int lineNumber, String codeSnippet) { 12 | super(node); 13 | this.lineNumber = lineNumber; 14 | this.codeSnippet = codeSnippet; 15 | } 16 | 17 | public PDGNode getNode() { return node; } 18 | 19 | public int getLineNumber() { 20 | return lineNumber; 21 | } 22 | 23 | public String getCodeSnippet() { 24 | return codeSnippet; 25 | } 26 | 27 | @Override 28 | public String getName() { 29 | return "Delete"; 30 | } 31 | 32 | @Override 33 | public String toString() { 34 | return String.format("Delete at line %d: %s", lineNumber, codeSnippet); 35 | } 36 | 37 | @Override 38 | public boolean equals(Object obj) { 39 | if (this == obj) return true; 40 | if (!(obj instanceof Delete)) return false; 41 | Delete other = (Delete) obj; 42 | return lineNumber == other.lineNumber && 43 | Objects.equals(codeSnippet, other.codeSnippet); 44 | } 45 | 46 | @Override 47 | public int hashCode() { 48 | return Objects.hash(lineNumber, codeSnippet); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/model/Insert.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit.model; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | import java.util.Objects; 6 | 7 | public class Insert extends EditOperation { 8 | private int lineNumber; 9 | private String codeSnippet; 10 | 11 | public Insert(PDGNode node, int lineNumber, String codeSnippet) { 12 | super(node); 13 | this.lineNumber = lineNumber; 14 | this.codeSnippet = codeSnippet; 15 | } 16 | 17 | public PDGNode getNode() { return node; } 18 | 19 | public int getLineNumber() { 20 | return lineNumber; 21 | } 22 | 23 | public String getCodeSnippet() { 24 | return codeSnippet; 25 | } 26 | 27 | @Override 28 | public String getName() { 29 | return "Insert"; 30 | } 31 | 32 | @Override 33 | public String toString() { 34 | return String.format("Insert at line %d: %s", lineNumber, codeSnippet); 35 | } 36 | 37 | @Override 38 | public boolean equals(Object obj) { 39 | if (this == obj) return true; 40 | if (!(obj instanceof Insert)) return false; 41 | Insert other = (Insert) obj; 42 | return lineNumber == other.lineNumber && 43 | Objects.equals(codeSnippet, other.codeSnippet); 44 | } 45 | 46 | @Override 47 | public int hashCode() { 48 | return Objects.hash(lineNumber, codeSnippet); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /py-visualise/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Code Diff Viewer 7 | 8 | 14 | 15 | 16 |

Code Difference Viewer

17 | 18 |
19 |
20 |

TestFileBefore.java

21 |
{{ class1 | safe }}
22 |
23 |
24 |

TestFileAfter.java

25 |
{{ class2 | safe }}
26 |
27 |
28 | 29 |

Highlighted Differences Summary

30 |
31 | {% for diff in diffs %} 32 |
33 |

Old Line {{ diff.oldLine }}:

34 |
{{ diff.oldCode | safe }}
35 |
36 |
37 |

New Line {{ diff.newLine }}:

38 |
{{ diff.newCode | safe }}
39 |
40 |
41 | {% endfor %} 42 |
43 | 44 | 45 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/graph/PDG.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.graph; 2 | 3 | import soot.toolkits.graph.HashMutableEdgeLabelledDirectedGraph; 4 | import soot.toolkits.graph.UnitGraph; 5 | import soot.toolkits.graph.pdg.PDGNode; 6 | 7 | import java.util.List; 8 | 9 | /** 10 | * Program Dependency Graph (PDG) class that extends the Soot HashMutableEdgeLabelledDirectedGraph. this is 11 | * similar to the soot HashMutablePDG, but removes some abstractions that remove the granularity of the PDG and allows 12 | * for specific edge types to be added to the graph. 13 | */ 14 | public class PDG extends HashMutableEdgeLabelledDirectedGraph { 15 | private UnitGraph cfg = null; 16 | protected PDGNode startNode = null; 17 | 18 | public PDG() { 19 | super(); 20 | } 21 | 22 | public void setCFG(UnitGraph cfg) { 23 | this.cfg = cfg; 24 | } 25 | 26 | public UnitGraph getCFG() { 27 | return cfg; 28 | } 29 | 30 | public PDGNode getStartNode() { 31 | return startNode; 32 | } 33 | 34 | public boolean hasDataEdge(PDGNode src, PDGNode tgt) { 35 | return this.containsEdge(src, tgt, GraphGenerator.DependencyTypes.DATA_DEPENDENCY); 36 | } 37 | 38 | public boolean hasControlEdge(PDGNode src, PDGNode tgt) { 39 | return this.containsEdge(src, tgt, GraphGenerator.DependencyTypes.CONTROL_DEPENDENCY); 40 | } 41 | 42 | public List getEdgeLabels(PDGNode src, PDGNode tgt) { 43 | return this.getLabelsForEdges(src, tgt); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/io/OperationSerializer.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.io; 2 | 3 | import org.pdgdiff.edit.model.*; 4 | import org.pdgdiff.matching.StrategySettings; 5 | 6 | import java.io.Writer; 7 | import java.util.List; 8 | 9 | public abstract class OperationSerializer { 10 | protected List editScript; 11 | protected StrategySettings settings; 12 | 13 | 14 | public OperationSerializer(List editScript, StrategySettings settings) { 15 | this.editScript = editScript; 16 | this.settings = settings; 17 | } 18 | 19 | protected abstract OperationFormatter newFormatter(Writer writer) throws Exception; 20 | 21 | public void writeTo(Writer writer) throws Exception { 22 | OperationFormatter formatter = newFormatter(writer); 23 | 24 | formatter.startOutput(); 25 | 26 | if (settings != null) { 27 | formatter.writeInfo(settings); 28 | } 29 | 30 | formatter.startOperations(); 31 | for (EditOperation op : editScript) { 32 | if (op instanceof Insert) { 33 | formatter.insertOperation((Insert) op); 34 | } else if (op instanceof Delete) { 35 | formatter.deleteOperation((Delete) op); 36 | } else if (op instanceof Update) { 37 | formatter.updateOperation((Update) op); 38 | } else if (op instanceof Move) { 39 | formatter.moveOperation((Move) op); 40 | } 41 | } 42 | formatter.endOperations(); 43 | formatter.endOutput(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/NodeFeasibility.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | public class NodeFeasibility { 6 | public static boolean isSameNodeCategory(PDGNode n1, PDGNode n2) { 7 | // get unit for each node 8 | Object node1 = n1.getNode(); 9 | Object node2 = n2.getNode(); 10 | 11 | // check for abstract syntax categories 12 | return (isStatement(node1) && isStatement(node2)) || 13 | (isDeclaration(node1) && isDeclaration(node2)) || 14 | (isControlFlowNode(node1) && isControlFlowNode(node2)) || 15 | (isDataNode(node1) && isDataNode(node2)); 16 | } 17 | 18 | private static boolean isStatement(Object node) { 19 | return node instanceof soot.jimple.Stmt; 20 | } 21 | 22 | private static boolean isDeclaration(Object node) { 23 | if (node instanceof soot.Value) { 24 | soot.Value value = (soot.Value) node; 25 | 26 | // check for local variables 27 | if (value instanceof soot.jimple.internal.JimpleLocal) { 28 | return true; 29 | } 30 | 31 | // check for field references 32 | return value instanceof soot.jimple.InstanceFieldRef || value instanceof soot.jimple.StaticFieldRef; 33 | } 34 | return false; 35 | } 36 | 37 | private static boolean isControlFlowNode(Object node) { 38 | return node instanceof soot.jimple.IfStmt || node instanceof soot.jimple.SwitchStmt; 39 | } 40 | 41 | private static boolean isDataNode(Object node) { 42 | return node instanceof soot.jimple.AssignStmt || node instanceof soot.jimple.ArrayRef; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/model/Move.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit.model; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | import java.util.Objects; 6 | 7 | /** 8 | * Represents a move operation in the edit script. 9 | */ 10 | public class Move extends EditOperation { 11 | private int oldLineNumber; 12 | private int newLineNumber; 13 | private String codeSnippet; 14 | 15 | public Move(PDGNode node, int oldLineNumber, int newLineNumber, String codeSnippet) { 16 | super(node); 17 | this.oldLineNumber = oldLineNumber; 18 | this.newLineNumber = newLineNumber; 19 | this.codeSnippet = codeSnippet; 20 | } 21 | 22 | public int getOldLineNumber() { 23 | return oldLineNumber; 24 | } 25 | 26 | public int getNewLineNumber() { 27 | return newLineNumber; 28 | } 29 | 30 | public String getCodeSnippet() { 31 | return codeSnippet; 32 | } 33 | 34 | @Override 35 | public String getName() { 36 | return "Move"; 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | return String.format("Move from line %d to line %d: %s", oldLineNumber, newLineNumber, codeSnippet); 42 | } 43 | 44 | @Override 45 | public boolean equals(Object obj) { 46 | if (this == obj) return true; 47 | if (!(obj instanceof Move)) return false; 48 | Move other = (Move) obj; 49 | return oldLineNumber == other.oldLineNumber && 50 | newLineNumber == other.newLineNumber && 51 | Objects.equals(codeSnippet, other.codeSnippet); 52 | } 53 | 54 | @Override 55 | public int hashCode() { 56 | return Objects.hash(oldLineNumber, newLineNumber, codeSnippet); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /py-visualise/static/css/style.css: -------------------------------------------------------------------------------- 1 | .highlight-change1-old { 2 | background-color: #ffcccc; 3 | color: #990000; 4 | } 5 | 6 | .highlight-change1-new { 7 | background-color: #ccffcc; 8 | color: #006600; 9 | } 10 | 11 | .highlight-change2-old { 12 | background-color: #ccccff; 13 | color: #000099; 14 | } 15 | 16 | .highlight-change2-new { 17 | background-color: #ffccf2; 18 | color: #cc0099; 19 | } 20 | 21 | .highlight-change3-old { 22 | background-color: #ffccff; 23 | color: #990099; 24 | } 25 | 26 | .highlight-change3-new { 27 | background-color: #ebccff; 28 | color: #6600cc; 29 | } 30 | 31 | .highlight-change4-old { 32 | background-color: #ffeecc; 33 | color: #996633; 34 | } 35 | 36 | .highlight-change4-new { 37 | background-color: #ccffff; 38 | color: #003366; 39 | } 40 | 41 | .highlight-change5-old { 42 | background-color: #e6e6ff; 43 | color: #3333cc; 44 | } 45 | 46 | .highlight-change5-new { 47 | background-color: #e6ffe6; 48 | color: #339933; 49 | } 50 | 51 | .highlight-change6-old { 52 | background-color: #ffe6e6; 53 | color: #cc3333; 54 | } 55 | 56 | .highlight-change6-new { 57 | background-color: #e6ffff; 58 | color: #339999; 59 | } 60 | 61 | .diff-container { 62 | display: flex; 63 | gap: 20px; 64 | } 65 | 66 | .deleted-line { 67 | text-decoration: line-through; 68 | background-color: #ffccf2; 69 | color: red; 70 | } 71 | 72 | .inserted-line { 73 | background-color: #d4fcbc; 74 | color: green; 75 | } 76 | 77 | .file-content { 78 | width: 45%; 79 | padding: 10px; 80 | border: 1px solid #ddd; 81 | background-color: #f9f9f9; 82 | white-space: pre-wrap; 83 | font-family: monospace; 84 | overflow-x: auto; 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/NodeMapping.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | /** 9 | * NodeMapping class to store mappings between nodes in two PDGs. This class is used to store the mapping between 10 | * nodes in two PDGs that have been matched by the GraphMatcher. 11 | */ 12 | public class NodeMapping { 13 | private Map nodeMapping; 14 | private Map reverseNodeMapping; 15 | 16 | public NodeMapping() { 17 | nodeMapping = new HashMap<>(); 18 | reverseNodeMapping = new HashMap<>(); 19 | } 20 | 21 | // adds a mapping between a source node and a destination node 22 | public void addMapping(PDGNode srcNode, PDGNode dstNode) { 23 | nodeMapping.put(srcNode, dstNode); 24 | reverseNodeMapping.put(dstNode, srcNode); 25 | } 26 | 27 | // exposes the entire node mapping 28 | public Map getNodeMapping() { 29 | return nodeMapping; 30 | } 31 | 32 | // exposes the reverse node mapping, useful for backwarsd traverse 33 | public Map getReverseNodeMapping() { 34 | return reverseNodeMapping; 35 | } 36 | 37 | // print all node mappings for debugging 38 | public void printMappings() { 39 | for (Map.Entry entry : nodeMapping.entrySet()) { 40 | System.out.println("Source Node: " + entry.getKey() 41 | + " --> Mapped to: " + entry.getValue()); 42 | } 43 | } 44 | 45 | public boolean isEmpty() { 46 | return nodeMapping.isEmpty(); 47 | } 48 | 49 | public int size() { 50 | return nodeMapping.size(); 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/GraphMapping.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching; 2 | 3 | import org.pdgdiff.graph.PDG; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | /** 9 | * GraphMapping class to store mappings between PDGs. This class is used to store the mapping between 10 | * PDGs in two lists that have been matched by the GraphMatcher. For each PDG mapping, a node mapping 11 | * is also stored. 12 | */ 13 | public class GraphMapping { 14 | private Map graphMapping; 15 | private Map nodeMappings; 16 | 17 | public GraphMapping() { 18 | this.graphMapping = new HashMap<>(); 19 | this.nodeMappings = new HashMap<>(); 20 | } 21 | 22 | public void addGraphMapping(PDG srcPDG, PDG dstPDG, NodeMapping nodeMapping) { 23 | graphMapping.put(srcPDG, dstPDG); 24 | nodeMappings.put(srcPDG, nodeMapping); 25 | } 26 | 27 | // retrieves the node mapping for a given PDG pair 28 | public NodeMapping getNodeMapping(PDG srcPDG) { 29 | return nodeMappings.get(srcPDG); 30 | } 31 | 32 | // exposes the entire graph mapping 33 | public Map getGraphMapping() { 34 | return graphMapping; 35 | } 36 | 37 | // pretty print all graph mappings for debugging (redundant otherwise) 38 | public void printGraphMappings() { 39 | for (Map.Entry entry : graphMapping.entrySet()) { 40 | System.out.println("Source PDG: " + entry.getKey() + " --> Mapped to: " + entry.getValue()); 41 | NodeMapping nodeMapping = nodeMappings.get(entry.getKey()); 42 | if (nodeMapping != null) { 43 | System.out.println("Node Mappings for this PDG:"); 44 | nodeMapping.printMappings(); 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/graph/GraphTraversal.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.graph; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | import java.util.*; 6 | 7 | /** 8 | * this class is used to traverse the graph using bfs and collect all nodes. This acts as a helper function for other 9 | * methods, especially when order of nodes in the graph is an important consideration. 10 | 11 | */ 12 | public class GraphTraversal { 13 | 14 | private static boolean debug = false; 15 | 16 | public static void setLogging(boolean enable) { 17 | debug = enable; 18 | } 19 | 20 | public static List collectNodesBFS(PDG pdg) { 21 | if (debug) System.out.println("[BFS] Traversing graph"); 22 | 23 | PDGNode start_node = pdg.getStartNode(); 24 | List nodeList = new ArrayList<>(); 25 | 26 | if (start_node == null) { 27 | if (debug) System.out.println("[BFS] No start node found in the PDG."); 28 | return nodeList; 29 | } 30 | 31 | Queue queue = new LinkedList<>(); 32 | Set visited = new HashSet<>(); 33 | 34 | queue.add(start_node); 35 | visited.add(start_node); 36 | nodeList.add(start_node); 37 | 38 | // begin BFS 39 | while (!queue.isEmpty()) { 40 | PDGNode current_node = queue.poll(); 41 | if (debug) System.out.println("[BFS] Visiting node: " + current_node.toShortString()); 42 | 43 | // add dependents to the queue 44 | List dependents = current_node.getDependents(); 45 | for (PDGNode dependent : dependents) { 46 | if (!visited.contains(dependent)) { 47 | queue.add(dependent); 48 | visited.add(dependent); 49 | nodeList.add(dependent); 50 | } 51 | } 52 | } 53 | 54 | if (debug) System.out.println("[BFS] BFS Graph traversal complete."); 55 | return nodeList; 56 | } 57 | 58 | public static int getNodeCount(PDG pdg) { 59 | List nodeList = collectNodesBFS(pdg); 60 | return nodeList.size(); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/vf2/VF2Matcher.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.vf2; 2 | 3 | import org.pdgdiff.matching.NodeMapping; 4 | import org.pdgdiff.graph.PDG; 5 | import soot.toolkits.graph.pdg.PDGNode; 6 | 7 | import java.util.Map; 8 | 9 | /** 10 | * VF2Matcher class to perform graph matching using the VF2 algorithm. This class contains methods to match two PDGs 11 | * using the VF2 algorithm and return the node mappings between the two PDGs. 12 | */ 13 | public class VF2Matcher { 14 | private final PDG srcPdg; 15 | private final PDG dstPdg; 16 | private final NodeMapping nodeMapping; 17 | 18 | public VF2Matcher(PDG srcPdg, PDG dstPdg) { 19 | this.srcPdg = srcPdg; 20 | this.dstPdg = dstPdg; 21 | this.nodeMapping = new NodeMapping(); 22 | } 23 | 24 | public NodeMapping match() { 25 | // Initialize state 26 | VF2State state = new VF2State(srcPdg, dstPdg); 27 | // Start recursive matching 28 | if (matchRecursive(state)) { 29 | return nodeMapping; 30 | } else { 31 | // No isomorphism found 32 | return null; 33 | } 34 | } 35 | 36 | 37 | // TODO: investigate vf2, i believe it to be too strict. need to figure out way of mapping methods -> methods if they 38 | // TODO: are similar 'ish', dont' need exact matches when I am effectively looking at version differences. 39 | private boolean matchRecursive(VF2State state) { 40 | if (state.isComplete()) { 41 | // Mapping is complete, transfer mappings to nodeMapping 42 | for (Map.Entry entry : state.getMapping().entrySet()) { 43 | nodeMapping.addMapping(entry.getKey(), entry.getValue()); 44 | } 45 | return true; 46 | } 47 | 48 | // Generate candidate pairs 49 | for (CandidatePair pair : state.generateCandidates()) { 50 | if (state.isFeasible(pair)) { 51 | state.addPair(pair); 52 | if (matchRecursive(state)) { 53 | return true; 54 | } 55 | state.removePair(pair); 56 | } 57 | } 58 | return false; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/testclasses/TestFileBefore.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.testclasses; 2 | 3 | public class TestFileBefore { 4 | 5 | private int onefield; 6 | 7 | public String anotherfield; 8 | 9 | public static void main(String[] args) { 10 | TestFileBefore test = new TestFileBefore(); 11 | int result = test.addNumbers(5, 10); 12 | System.out.println("Result: " + result); 13 | int res = test.minus(10, 5); 14 | System.out.println("Result: " + res); 15 | 16 | int complexRes = test.detailedComputation(5, 10); 17 | System.out.println("Detailed Computation Result: " + complexRes); 18 | int t = test.identical(5, 10); 19 | System.out.println("identical Result: " + t); 20 | } 21 | 22 | public int identical(int num1, int num2) { 23 | int result = 0; 24 | 25 | // Conditional statements 26 | if (num1 > num2) { 27 | result = num1 + num2; 28 | } else if (num1 < num2) { 29 | result = num1 - num2; 30 | } else { 31 | result = num1 * num2; 32 | } 33 | return result; 34 | } 35 | 36 | public int addNumbers(int a, int b) { 37 | int toadd1 = a; 38 | int toadd2 = b; 39 | int sum = toadd1 + toadd2; 40 | return sum; 41 | } 42 | 43 | public int minus(int a, int b) { 44 | int sum = a - b; 45 | return sum; 46 | } 47 | 48 | // added these more complex classes with more intense control flow and non-matching names to try and catch edge cases 49 | public int detailedComputation(int num1, int num2) { 50 | int result = 0; 51 | 52 | // Conditional statements 53 | if (num1 > num2) { 54 | result = num1 + num2; 55 | } else if (num1 < num2) { 56 | result = num1 - num2; 57 | } else { 58 | result = num1 * num2; 59 | } 60 | 61 | // Loop that performs additional operations 62 | for (int i = 0; i < 4; i++) { 63 | result -= i; 64 | if (result % 3 == 0) { 65 | result /= 3; 66 | } else { 67 | result += i * 2; 68 | } 69 | } 70 | 71 | // Nested conditional inside a loop 72 | for (int i = 0; i < 6; i++) { 73 | if (i % 2 == 1) { 74 | result *= i; 75 | } 76 | } 77 | 78 | return result; 79 | } 80 | 81 | } -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/model/Update.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit.model; 2 | 3 | import soot.toolkits.graph.pdg.PDGNode; 4 | 5 | import java.util.Objects; 6 | 7 | /** 8 | * Represents an update operation in the edit script. 9 | */ 10 | public class Update extends EditOperation { 11 | private int oldLineNumber; 12 | private int newLineNumber; 13 | private String oldCodeSnippet; 14 | private String newCodeSnippet; 15 | private SyntaxDifference syntaxDifference; 16 | 17 | public Update(PDGNode node, int oldLineNumber, int newLineNumber, 18 | String oldCodeSnippet, String newCodeSnippet, 19 | SyntaxDifference syntaxDifference) { 20 | super(node); 21 | this.oldLineNumber = oldLineNumber; 22 | this.newLineNumber = newLineNumber; 23 | this.oldCodeSnippet = oldCodeSnippet; 24 | this.newCodeSnippet = newCodeSnippet; 25 | this.syntaxDifference = syntaxDifference; 26 | } 27 | 28 | public int getOldLineNumber() { 29 | return oldLineNumber; 30 | } 31 | 32 | public int getNewLineNumber() { 33 | return newLineNumber; 34 | } 35 | 36 | public String getOldCodeSnippet() { 37 | return oldCodeSnippet; 38 | } 39 | 40 | public String getNewCodeSnippet() { 41 | return newCodeSnippet; 42 | } 43 | 44 | public SyntaxDifference getSyntaxDifference() { 45 | return syntaxDifference; 46 | } 47 | 48 | @Override 49 | public String getName() { 50 | return "Update"; 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | return String.format("Update at lines %d -> %d:\nOld Code: %s\nNew Code: %s\nDifference: %s", 56 | oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, syntaxDifference); 57 | } 58 | 59 | @Override 60 | public boolean equals(Object obj) { 61 | if (this == obj) return true; 62 | if (!(obj instanceof Update)) return false; 63 | Update other = (Update) obj; 64 | return oldLineNumber == other.oldLineNumber && 65 | newLineNumber == other.newLineNumber && 66 | Objects.equals(oldCodeSnippet, other.oldCodeSnippet) && 67 | Objects.equals(newCodeSnippet, other.newCodeSnippet) && 68 | Objects.equals(syntaxDifference, other.syntaxDifference); 69 | } 70 | 71 | @Override 72 | public int hashCode() { 73 | return Objects.hash(oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, syntaxDifference); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/testclasses/TestFileAfter.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.testclasses; 2 | 3 | public class TestFileAfter { 4 | 5 | private int thefield; 6 | 7 | public static void main(String[] args) { 8 | TestFileAfter test = new TestFileAfter(); 9 | int number1 = 5; 10 | int number2 = 4; 11 | int result = test.addNumbers(number1, number2); 12 | System.out.println("Result: " + result); 13 | int product = test.multiplyNumbers(number1, number2); 14 | System.out.println("Product: " + product); 15 | 16 | 17 | 18 | int complexResult = test.complexCalculation(number1, number2); 19 | int identical_out = test.identical(3,10); 20 | System.out.println("Complex Calculation Result: " + complexResult); 21 | System.out.println("identical Result: " + identical_out); 22 | } 23 | 24 | public int addNumbers(int number, int number2) { 25 | int sum = number + number2; 26 | return sum; 27 | } 28 | 29 | public int multiplyNumbers(int number, int number2) { 30 | int product = number * number2; 31 | return product; 32 | } 33 | 34 | // added these more complex classes with more intense control flow and non-matching names to try and catch edge cases 35 | public int complexCalculation(int num1, int num2) { 36 | int result = 0; 37 | 38 | // Conditional statements 39 | if (num1 > num2) { 40 | result = num1 - num2; 41 | } else if (num1 < num2) { 42 | result = num1 + num2; 43 | } else { 44 | result = num1 * num2; 45 | } 46 | 47 | // Loop that performs additional operations 48 | for (int i = 0; i < 3; i++) { 49 | result += i; 50 | if (result % 2 == 0) { 51 | result /= 2; 52 | } else { 53 | result *= 3; 54 | } 55 | } 56 | 57 | // Nested conditional inside a loop 58 | for (int i = 0; i < 5; i++) { 59 | if (i % 2 == 0) { 60 | result += i; 61 | } else { 62 | result -= i; 63 | } 64 | } 65 | 66 | return result; 67 | } 68 | 69 | public int identical(int num1, int num2) { 70 | int result = 0; 71 | 72 | // Conditional statements 73 | if (num1 > num2) { 74 | result = num1 + num2; 75 | } else if (num1 < num2) { 76 | result = num1 - num2; 77 | } else { 78 | result = num1 * num2; 79 | } 80 | return result; 81 | } 82 | } -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/GEDGraphMatcher.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models; 2 | 3 | import org.pdgdiff.graph.PDG; 4 | import org.pdgdiff.matching.GraphMapping; 5 | import org.pdgdiff.matching.GraphMatcher; 6 | import org.pdgdiff.matching.NodeMapping; 7 | import org.pdgdiff.matching.models.ged.GEDMatcher; 8 | import org.pdgdiff.matching.models.ged.GEDResult; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | /** 14 | * A GraphMatcher that uses a Graph Edit Distance approach to 15 | * match PDGs from the source and dest file. Similar "outer loop" to VF2GraphMatcher, 16 | * but calls GEDMatcher internally for each PDG pair. 17 | */ 18 | public class GEDGraphMatcher extends GraphMatcher { 19 | 20 | public GEDGraphMatcher(List srcPdgs, List dstPdgs) { 21 | super(srcPdgs, dstPdgs); 22 | } 23 | 24 | @Override 25 | public GraphMapping matchPDGLists() { 26 | List unmappedSrcPdgs = new ArrayList<>(srcPdgs); 27 | List unmappedDstPdgs = new ArrayList<>(dstPdgs); 28 | 29 | while (!unmappedSrcPdgs.isEmpty() && !unmappedDstPdgs.isEmpty()) { 30 | double minDistance = Double.POSITIVE_INFINITY; 31 | PDG bestSrcPdg = null; 32 | PDG bestDstPdg = null; 33 | NodeMapping bestNodeMapping = null; 34 | 35 | // for each unmatched PDG in src and dest, compute the minimal graph-edit distance 36 | for (PDG srcPdg : unmappedSrcPdgs) { 37 | for (PDG dstPdg : unmappedDstPdgs) { 38 | GEDMatcher ged = new GEDMatcher(srcPdg, dstPdg); 39 | GEDResult result = ged.match(); // get (distance, nodeMapping) 40 | 41 | if (result != null && result.distance < minDistance) { 42 | minDistance = result.distance; 43 | bestSrcPdg = srcPdg; 44 | bestDstPdg = dstPdg; 45 | bestNodeMapping = result.nodeMapping; 46 | } 47 | } 48 | } 49 | 50 | if (bestSrcPdg != null && bestDstPdg != null) { 51 | // found the "best" pair, remove them from the unmatched sets 52 | unmappedSrcPdgs.remove(bestSrcPdg); 53 | unmappedDstPdgs.remove(bestDstPdg); 54 | 55 | // the chosen mapping added in the global GraphMapping 56 | graphMapping.addGraphMapping(bestSrcPdg, bestDstPdg, bestNodeMapping); 57 | } else { 58 | // no good matches remain 59 | break; 60 | } 61 | } 62 | 63 | return graphMapping; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/UllmannGraphMatcher.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models; 2 | 3 | import org.pdgdiff.graph.GraphTraversal; 4 | import org.pdgdiff.graph.PDG; 5 | import org.pdgdiff.matching.GraphMapping; 6 | import org.pdgdiff.matching.GraphMatcher; 7 | import org.pdgdiff.matching.NodeMapping; 8 | import org.pdgdiff.matching.models.ullmann.UllmannMatcher; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | public class UllmannGraphMatcher extends GraphMatcher { 14 | public UllmannGraphMatcher(List list1, List list2) { 15 | super(list1, list2); 16 | } 17 | 18 | @Override 19 | public GraphMapping matchPDGLists() { 20 | List unmappedPDGs1 = new ArrayList<>(srcPdgs); 21 | List unmappedPDGs2 = new ArrayList<>(dstPdgs); 22 | 23 | while (!unmappedPDGs1.isEmpty() && !unmappedPDGs2.isEmpty()) { 24 | double maxScore = Double.NEGATIVE_INFINITY; 25 | PDG bestPdg1 = null; 26 | PDG bestPdg2 = null; 27 | NodeMapping bestNodeMapping = null; 28 | 29 | // for each pair of unmapped PDGs, compute similarity score 30 | for (PDG pdg1 : unmappedPDGs1) { 31 | for (PDG pdg2 : unmappedPDGs2) { 32 | UllmannMatcher ullmannMatcher = new UllmannMatcher(pdg1, pdg2); 33 | NodeMapping nodeMapping = ullmannMatcher.match(); 34 | 35 | if (nodeMapping != null && !nodeMapping.isEmpty()) { 36 | int mappedNodes = nodeMapping.size(); 37 | int unmappedNodes1 = GraphTraversal.getNodeCount(pdg1) - mappedNodes; 38 | int unmappedNodes2 = GraphTraversal.getNodeCount(pdg2) - mappedNodes; 39 | 40 | // TODO: this is using same score as vf2 matcher, again not sure if this is ideal! 41 | double score = (double) mappedNodes / (mappedNodes + unmappedNodes1 + unmappedNodes2); 42 | 43 | if (score > maxScore) { 44 | maxScore = score; 45 | bestPdg1 = pdg1; 46 | bestPdg2 = pdg2; 47 | bestNodeMapping = nodeMapping; 48 | } 49 | } 50 | } 51 | } 52 | 53 | if (bestPdg1 != null && bestPdg2 != null) { 54 | // map the best pdg pair found 55 | unmappedPDGs1.remove(bestPdg1); 56 | unmappedPDGs2.remove(bestPdg2); 57 | graphMapping.addGraphMapping(bestPdg1, bestPdg2, bestNodeMapping); 58 | } else { 59 | // no more matches found 60 | break; 61 | } 62 | } 63 | 64 | for (PDG pdg1 : unmappedPDGs1) { 65 | System.out.println("No matching PDG found for: " + pdg1.getCFG().getBody().getMethod().getSignature()); 66 | } 67 | 68 | return graphMapping; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/heuristic/JaroWinklerSimilarity.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.heuristic; 2 | 3 | 4 | /** 5 | * calculate % similarity between strings using Jaro-Winkler algorithm 6 | **/ 7 | public class JaroWinklerSimilarity { 8 | 9 | public static double JaroWinklerSimilarity(String s1, String s2) { 10 | double jaro = jaroSimilarity(s1, s2); 11 | int prefixLength = commonPrefixLength(s1, s2); 12 | double SCALING_FACTOR = 0.1; 13 | 14 | return jaro + (prefixLength * SCALING_FACTOR * (1 - jaro)); 15 | } 16 | 17 | // returns a double which is a similarity score between 0 and 1 18 | public static double jaroSimilarity(String s1, String s2) { 19 | if (s1.equals(s2)) { 20 | return 1.0; 21 | } 22 | 23 | if (s1.isEmpty() || s2.isEmpty()) { 24 | return 0.0; 25 | } 26 | 27 | // max distance for matching characters is floor(max(|s1|, |s2|) / 2) - 1 28 | int matchDistance = Math.max(s1.length(), s2.length()) / 2 - 1; 29 | 30 | boolean[] s1Matches = new boolean[s1.length()]; 31 | boolean[] s2Matches = new boolean[s2.length()]; 32 | 33 | // counting matches and transpositions 34 | int matches = 0; 35 | int transpositions = 0; 36 | 37 | for (int i = 0; i < s1.length(); i++) { 38 | int start = Math.max(0, i - matchDistance); 39 | int end = Math.min(i + matchDistance + 1, s2.length()); 40 | 41 | for (int j = start; j < end; j++) { 42 | if (!s2Matches[j] && s1.charAt(i) == s2.charAt(j)) { 43 | s1Matches[i] = true; 44 | s2Matches[j] = true; 45 | matches++; 46 | break; 47 | } 48 | } 49 | } 50 | 51 | if (matches == 0) { 52 | return 0.0; 53 | } 54 | 55 | int k = 0; 56 | for (int i = 0; i < s1.length(); i++) { 57 | if (s1Matches[i]) { 58 | while (!s2Matches[k]) { 59 | k++; 60 | } 61 | if (s1.charAt(i) != s2.charAt(k)) { 62 | transpositions++; 63 | } 64 | k++; 65 | } 66 | } 67 | 68 | transpositions /= 2; 69 | 70 | // final similarity formula 71 | return ((matches / (double) s1.length()) + 72 | (matches / (double) s2.length()) + 73 | ((matches - transpositions) / (double) matches)) / 3.0; 74 | } 75 | 76 | public static int commonPrefixLength(String s1, String s2) { 77 | int prefixLength = 0; 78 | int maxPrefixLength = Math.min(4, Math.min(s1.length(), s2.length())); 79 | 80 | for (int i = 0; i < maxPrefixLength; i++) { 81 | if (s1.charAt(i) == s2.charAt(i)) { 82 | prefixLength++; 83 | } else { 84 | break; 85 | } 86 | } 87 | 88 | return prefixLength; 89 | } 90 | } 91 | 92 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/ged/HungarianAlgorithm.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.ged; 2 | 3 | import java.util.Arrays; 4 | 5 | /** 6 | * implementation of the Hungarian (Kuhn-Munkres) algorithm for the assignment problem on a cost matrix. 7 | * 8 | * For an n x n matrix costMatrix[row][col], 9 | * returns an int[] assignment where assignment[row] = col, 10 | * or -1 if unmatched. 11 | * 12 | * NB matricies must be square hence PDGs have been padded in calling function when needed. 13 | * 14 | * sources; 15 | * https://www.hungarianalgorithm.com/examplehungarianalgorithm.php 16 | * https://en.wikipedia.org/wiki/Hungarian_algorithm 17 | * 18 | */ 19 | 20 | public class HungarianAlgorithm { 21 | 22 | public static int[] minimizeAssignment(double[][] costMatrix) { 23 | int n = costMatrix.length; 24 | int[] assignment = new int[n]; 25 | int[] partialMatch = new int[n]; 26 | int[] trace = new int[n]; 27 | double[] potentialRows = new double[n]; 28 | double[] potentialCols = new double[n]; 29 | 30 | Arrays.fill(assignment, -1); 31 | 32 | for (int i = 1; i < n; i++) { 33 | partialMatch[0] = i; 34 | int currCol = 0; 35 | double[] minCols = new double[n]; 36 | boolean[] used = new boolean[n]; 37 | Arrays.fill(minCols, Double.POSITIVE_INFINITY); 38 | do { 39 | used[currCol] = true; 40 | int currRow = partialMatch[currCol]; 41 | double delta = Double.POSITIVE_INFINITY; 42 | int nextCol = 0; 43 | for (int j = 1; j < n; j++) { 44 | if (!used[j]) { 45 | double cur = costMatrix[currRow][j] - potentialRows[currRow] - potentialCols[j]; 46 | if (cur < minCols[j]) { 47 | minCols[j] = cur; 48 | trace[j] = currCol; 49 | } 50 | if (minCols[j] < delta) { 51 | delta = minCols[j]; 52 | nextCol = j; 53 | } 54 | } 55 | } 56 | for (int j = 0; j < n; j++) { 57 | if (used[j]) { 58 | potentialRows[partialMatch[j]] += delta; 59 | potentialCols[j] -= delta; 60 | } else { 61 | minCols[j] -= delta; 62 | } 63 | } 64 | currCol = nextCol; 65 | } while (partialMatch[currCol] != 0); 66 | 67 | do { 68 | int nextCol = trace[currCol]; 69 | partialMatch[currCol] = partialMatch[nextCol]; 70 | currCol = nextCol; 71 | } while (currCol != 0); 72 | } 73 | 74 | // 'partialMatch[j] = i' means that column j matched to row i 75 | for (int j = 1; j < n; j++) { 76 | assignment[partialMatch[j]] = j; 77 | } 78 | return assignment; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/util/SootInitializer.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.util; 2 | 3 | import soot.G; 4 | import soot.Scene; 5 | import soot.options.Options; 6 | 7 | import java.util.Collections; 8 | 9 | /** 10 | * SootInitializer class to initialize Soot, the static analysis framework of this specific implementation of the 11 | * approach with the necessary configurations for PDG generation. 12 | */ 13 | public class SootInitializer { 14 | 15 | public static void initializeSoot(String dir) { 16 | resetSoot(); 17 | 18 | // setting soot options 19 | Options.v().set_prepend_classpath(true); 20 | Options.v().set_allow_phantom_refs(true); 21 | Options.v().set_output_format(Options.output_format_jimple); 22 | Options.v().set_verbose(true); // Debug output 23 | 24 | // The following phase options are configured to preserve the original code structure, as well as poss. 25 | // read https://www.sable.mcgill.ca/soot/tutorial/phase/phase.html 26 | // in some cases however this is not possible because of how soot constructs Jimple, this is a limitation of 27 | // the implementation of this approach 28 | Options.v().set_keep_line_number(true); 29 | 30 | Options.v().setPhaseOption("jb", "use-original-names:true"); 31 | Options.v().setPhaseOption("jb", "use-original-bytecode:true"); 32 | Options.v().setPhaseOption("jj", "simplify-off:true"); 33 | 34 | Options.v().setPhaseOption("jb.dce", "enabled:false"); // Disable dead code elimination 35 | Options.v().setPhaseOption("jb.dae", "enabled:false"); // Disable dead assignment elimination 36 | Options.v().setPhaseOption("jb.uce", "enabled:false"); // Disable unreachable code elimination 37 | Options.v().setPhaseOption("jb.cp", "enabled:false"); // Disable const propagation 38 | Options.v().setPhaseOption("jb.ule", "enabled:false"); // Disable unused local elimination 39 | Options.v().setPhaseOption("jop", "enabled:false"); // Disable optimizations like const folding 40 | Options.v().setPhaseOption("wjop", "enabled:false"); // Disable whole-program optimizations 41 | 42 | Options.v().setPhaseOption("jb.tr", "enabled:false"); // Disable transformation on control flow 43 | Options.v().setPhaseOption("bb", "enabled:false"); // Disable basic block merging or splitting 44 | Options.v().setPhaseOption("jap", "enabled:false"); // Disable aggregation 45 | Options.v().setPhaseOption("jtp.ls", "enabled:false"); // Disable loop simplification 46 | Options.v().setPhaseOption("jop.uce", "enabled:false"); // Disable unreachable code elimination 47 | Options.v().setPhaseOption("jop.cpf", "enabled:false"); 48 | 49 | 50 | Options.v().set_soot_classpath(dir); 51 | Options.v().set_process_dir(Collections.singletonList(dir)); 52 | 53 | Options.v().set_whole_program(true); 54 | Options.v().set_no_bodies_for_excluded(true); 55 | 56 | 57 | // finally loading necessary classes into the soot scene 58 | Scene.v().loadNecessaryClasses(); 59 | } 60 | 61 | 62 | // reset Soot (clean up) 63 | public static void resetSoot() { 64 | G.reset(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/VF2GraphMatcher.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models; 2 | 3 | import org.pdgdiff.graph.GraphTraversal; 4 | import org.pdgdiff.graph.PDG; 5 | import org.pdgdiff.matching.GraphMapping; 6 | import org.pdgdiff.matching.GraphMatcher; 7 | import org.pdgdiff.matching.NodeMapping; 8 | import org.pdgdiff.matching.models.vf2.VF2Matcher; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | public class VF2GraphMatcher extends GraphMatcher { 14 | public VF2GraphMatcher(List srcPdgs, List dstPdgs) { 15 | super(srcPdgs, dstPdgs); 16 | } 17 | 18 | @Override 19 | public GraphMapping matchPDGLists() { 20 | List unmappedSrcPdgs = new ArrayList<>(srcPdgs); 21 | List unmappedDstPdgs = new ArrayList<>(dstPdgs); 22 | 23 | while (!unmappedSrcPdgs.isEmpty() && !unmappedDstPdgs.isEmpty()) { 24 | double maxScore = Double.NEGATIVE_INFINITY; 25 | PDG bestSrcPdg = null; 26 | PDG bestDstPdg = null; 27 | NodeMapping bestNodeMapping = null; 28 | 29 | // for each pair of unmapped PDGs, compute similarity score 30 | for (PDG srcPdg : unmappedSrcPdgs) { 31 | for (PDG dstPdg : unmappedDstPdgs) { 32 | VF2Matcher vf2Matcher = new VF2Matcher(srcPdg, dstPdg); 33 | NodeMapping nodeMapping = vf2Matcher.match(); 34 | 35 | if (nodeMapping != null && !nodeMapping.isEmpty()) { 36 | int mappedNodes = nodeMapping.size(); 37 | int unmappedSrcNodes = GraphTraversal.getNodeCount(srcPdg) - mappedNodes; 38 | int unmappedDstNodes = GraphTraversal.getNodeCount(dstPdg) - mappedNodes; 39 | 40 | // calculate the score that minimizes unmapped nodes, this is my 'similarity' metric as of rn lol 41 | // this might be to be improved. TODO look into other metrics/ measures. 42 | // TODO might want to add a threshold. possibly not all graphs should be mapped to all graphs! 43 | double score = (double) mappedNodes / (mappedNodes + unmappedSrcNodes + unmappedDstNodes); 44 | 45 | if (score > maxScore) { 46 | maxScore = score; 47 | bestSrcPdg = srcPdg; 48 | bestDstPdg = dstPdg; 49 | bestNodeMapping = nodeMapping; 50 | } 51 | } 52 | } 53 | } 54 | 55 | if (bestSrcPdg != null && bestDstPdg != null) { 56 | unmappedSrcPdgs.remove(bestSrcPdg); 57 | unmappedDstPdgs.remove(bestDstPdg); 58 | graphMapping.addGraphMapping(bestSrcPdg, bestDstPdg, bestNodeMapping); 59 | } else { 60 | break; 61 | } 62 | } 63 | 64 | // handling PDGs in src that were not matched 65 | for (PDG pdg1 : unmappedSrcPdgs) { 66 | System.out.println("No matching PDG found for: " + pdg1.getCFG().getBody().getMethod().getSignature()); 67 | } 68 | 69 | return graphMapping; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/io/JsonOperationFormatter.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.io; 2 | 3 | import org.pdgdiff.edit.model.*; 4 | import com.google.gson.stream.JsonWriter; 5 | import org.pdgdiff.matching.StrategySettings; 6 | 7 | import java.io.IOException; 8 | import java.io.Writer; 9 | 10 | public class JsonOperationFormatter implements OperationFormatter { 11 | private final JsonWriter writer; 12 | 13 | public JsonOperationFormatter(Writer writer) { 14 | this.writer = new JsonWriter(writer); 15 | this.writer.setIndent(" "); 16 | } 17 | 18 | @Override 19 | public void writeInfo(StrategySettings settings) throws Exception { 20 | writer.name("strategySettings").beginObject(); 21 | writer.name("recoveryStrategy").value(settings.getRecoveryStrategy().toString()); 22 | writer.name("matchingStrategy").value(settings.getMatchingStrategy().toString()); 23 | writer.endObject(); 24 | } 25 | 26 | @Override 27 | public void startOutput() throws IOException { 28 | writer.beginObject(); 29 | } 30 | 31 | @Override 32 | public void endOutput() throws IOException { 33 | writer.endObject(); 34 | writer.close(); 35 | } 36 | 37 | @Override 38 | public void startOperations() throws IOException { 39 | writer.name("actions").beginArray(); 40 | } 41 | 42 | @Override 43 | public void endOperations() throws IOException { 44 | writer.endArray(); 45 | } 46 | 47 | @Override 48 | public void insertOperation(Insert operation) throws IOException { 49 | writer.beginObject(); 50 | writer.name("action").value("Insert"); 51 | writer.name("line").value(operation.getLineNumber()); 52 | writer.name("code").value(operation.getCodeSnippet()); 53 | writer.endObject(); 54 | } 55 | 56 | @Override 57 | public void deleteOperation(Delete operation) throws IOException { 58 | writer.beginObject(); 59 | writer.name("action").value("Delete"); 60 | writer.name("line").value(operation.getLineNumber()); 61 | writer.name("code").value(operation.getCodeSnippet()); 62 | writer.endObject(); 63 | } 64 | 65 | @Override 66 | public void updateOperation(Update operation) throws IOException { 67 | writer.beginObject(); 68 | writer.name("action").value("Update"); 69 | writer.name("oldLine").value(operation.getOldLineNumber()); 70 | writer.name("newLine").value(operation.getNewLineNumber()); 71 | writer.name("oldCode").value(operation.getOldCodeSnippet()); 72 | writer.name("newCode").value(operation.getNewCodeSnippet()); 73 | 74 | SyntaxDifference diff = operation.getSyntaxDifference(); 75 | if (diff != null) { 76 | writer.name("difference").beginObject(); 77 | writer.name("message").value(diff.getMessage()); 78 | writer.name("oldJimple").value(diff.getOldJimpleCode()); 79 | writer.name("newJimple").value(diff.getNewJimpleCode()); 80 | writer.endObject(); 81 | } else { 82 | writer.name("difference").value("signature or class metadata change"); 83 | } 84 | 85 | writer.endObject(); 86 | } 87 | 88 | @Override 89 | public void moveOperation(Move operation) throws IOException { 90 | writer.beginObject(); 91 | writer.name("action").value("Move"); 92 | writer.name("oldLine").value(operation.getOldLineNumber()); 93 | writer.name("newLine").value(operation.getNewLineNumber()); 94 | writer.name("code").value(operation.getCodeSnippet()); 95 | writer.endObject(); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 4.0.0 5 | 6 | com.pdgdiff 7 | soot-pdg 8 | 1.0-SNAPSHOT 9 | 10 | 11 | 12 | 13 | org.soot-oss 14 | soot 15 | 4.3.0 16 | 17 | 18 | 19 | 20 | org.slf4j 21 | slf4j-api 22 | 1.7.30 23 | 24 | 25 | org.slf4j 26 | slf4j-simple 27 | 1.7.30 28 | 29 | 30 | 31 | 32 | com.google.code.gson 33 | gson 34 | 2.8.6 35 | 36 | 37 | 38 | 39 | 40 | junit 41 | junit 42 | 4.13.2 43 | test 44 | 45 | 46 | junit 47 | junit 48 | 4.13.2 49 | test 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | org.apache.maven.plugins 58 | maven-compiler-plugin 59 | 3.8.1 60 | 61 | 62 | 63 | default-compile 64 | compile 65 | 66 | compile 67 | 68 | 69 | 1.8 70 | 1.8 71 | 72 | 73 | 74 | 75 | compile-testclasses 76 | compile 77 | 78 | compile 79 | 80 | 81 | 82 | org/pdgdiff/testclasses/** 83 | 84 | 85 | -g 86 | -O 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/model/SyntaxDifference.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit.model; 2 | 3 | import org.pdgdiff.edit.EditScriptGenerator; 4 | import org.pdgdiff.util.SourceCodeMapper; 5 | import soot.Unit; 6 | import soot.tagkit.LineNumberTag; 7 | import soot.toolkits.graph.pdg.PDGNode; 8 | 9 | import java.util.Objects; 10 | 11 | /** 12 | * Represents a syntax difference between two Units or PDGNodes. 13 | */ 14 | public class SyntaxDifference { 15 | private Unit oldUnit; 16 | private Unit newUnit; 17 | private String message; 18 | 19 | private int oldLineNumber; 20 | private int newLineNumber; 21 | private String oldCodeSnippet; 22 | private String newCodeSnippet; 23 | 24 | private String oldJimpleCode; 25 | private String newJimpleCode; 26 | 27 | public SyntaxDifference(Unit oldUnit, Unit newUnit, 28 | SourceCodeMapper oldSourceMapper, SourceCodeMapper newSourceMapper) { 29 | this.oldUnit = oldUnit; 30 | this.newUnit = newUnit; 31 | this.oldLineNumber = getLineNumber(oldUnit); 32 | this.newLineNumber = getLineNumber(newUnit); 33 | this.oldCodeSnippet = oldSourceMapper.getCodeLine(oldLineNumber); 34 | this.newCodeSnippet = newSourceMapper.getCodeLine(newLineNumber); 35 | this.oldJimpleCode = oldUnit != null ? oldUnit.toString() : null; 36 | this.newJimpleCode = newUnit != null ? newUnit.toString() : null; 37 | } 38 | 39 | 40 | // Constructor for general messages 41 | public SyntaxDifference(String message) { 42 | this.message = message; 43 | } 44 | 45 | public String getMessage() { 46 | return message; 47 | } 48 | 49 | public int getOldLineNumber() { 50 | return oldLineNumber; 51 | } 52 | 53 | public int getNewLineNumber() { 54 | return newLineNumber; 55 | } 56 | 57 | public String getOldCodeSnippet() { 58 | return oldCodeSnippet; 59 | } 60 | 61 | public String getNewCodeSnippet() { 62 | return newCodeSnippet; 63 | } 64 | 65 | public String getOldJimpleCode() { 66 | return oldJimpleCode; 67 | } 68 | 69 | public String getNewJimpleCode() { 70 | return newJimpleCode; 71 | } 72 | 73 | @Override 74 | public String toString() { 75 | if (message != null) { 76 | return message; 77 | } else if (oldUnit != null || newUnit != null) { 78 | return String.format( 79 | "Unit Difference at lines %d -> %d:\nOld Code: '%s'\nNew Code: '%s'\nOld Jimple: '%s'\nNew Jimple: '%s'", 80 | oldLineNumber, newLineNumber, 81 | oldCodeSnippet == null ? "null" : oldCodeSnippet.trim(), 82 | newCodeSnippet == null ? "null" : newCodeSnippet.trim(), 83 | oldJimpleCode == null ? "null" : oldJimpleCode.trim(), 84 | newJimpleCode == null ? "null" : newJimpleCode.trim()); 85 | } else { 86 | return "Unknown Difference"; 87 | } 88 | } 89 | 90 | // Helper methods 91 | private int getLineNumber(Unit unit) { 92 | if (unit == null) { 93 | return -1; 94 | } 95 | LineNumberTag tag = (LineNumberTag) unit.getTag("LineNumberTag"); 96 | if (tag != null) { 97 | return tag.getLineNumber(); 98 | } 99 | return -1; 100 | } 101 | 102 | private int getNodeLineNumber(PDGNode node) { 103 | if (node == null) { 104 | return -1; 105 | } 106 | return EditScriptGenerator.getNodeLineNumber(node); 107 | } 108 | 109 | private String getNodeCodeSnippet(PDGNode node, SourceCodeMapper codeMapper) { 110 | int lineNumber = getNodeLineNumber(node); 111 | if (lineNumber != -1) { 112 | return codeMapper.getCodeLine(lineNumber); 113 | } 114 | return null; 115 | } 116 | 117 | @Override 118 | public boolean equals(Object obj) { 119 | if (this == obj) return true; 120 | if (!(obj instanceof SyntaxDifference)) return false; 121 | SyntaxDifference that = (SyntaxDifference) obj; 122 | return oldLineNumber == that.oldLineNumber && 123 | newLineNumber == that.newLineNumber && 124 | Objects.equals(oldCodeSnippet, that.oldCodeSnippet) && 125 | Objects.equals(newCodeSnippet, that.newCodeSnippet) && 126 | Objects.equals(oldJimpleCode, that.oldJimpleCode) && 127 | Objects.equals(newJimpleCode, that.newJimpleCode); 128 | } 129 | 130 | @Override 131 | public int hashCode() { 132 | return Objects.hash(oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, oldJimpleCode, newJimpleCode); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/graph/GraphExporter.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.graph; 2 | 3 | import soot.SootMethod; 4 | import soot.toolkits.graph.UnitGraph; 5 | import soot.toolkits.graph.pdg.PDGNode; 6 | 7 | import java.io.*; 8 | import java.util.HashSet; 9 | import java.util.List; 10 | import java.util.Set; 11 | 12 | public class GraphExporter { 13 | 14 | public static void clearOutputFolder(String folderPath) { 15 | File outputFolder = new File(folderPath); 16 | if (outputFolder.exists()) { 17 | File[] files = outputFolder.listFiles(); 18 | if (files != null) { 19 | for (File file : files) { 20 | file.delete(); 21 | } 22 | } 23 | } 24 | } 25 | 26 | 27 | public static void exportPDG(PDG pdg, String dotFileName, String txtFileName) throws IOException { 28 | UnitGraph cfg = pdg.getCFG(); 29 | SootMethod method = (cfg != null) ? cfg.getBody().getMethod() : null; 30 | 31 | exportPDGToDot(pdg, dotFileName); 32 | 33 | assert method != null; 34 | exportPDGToFile(pdg, txtFileName, method.getName()); 35 | } 36 | 37 | public static void exportPDGToFile(PDG pdg, String fileName, String methodName) throws IOException { 38 | try (PrintWriter writer = new PrintWriter(new FileWriter(fileName, true))) { 39 | writer.println("\n\n---------> Method: " + methodName); 40 | // dump text repr, toString might be overridden in PDG need to check 41 | writer.println(pdg.toString()); 42 | writer.println("---------> End of PDG for method: " + methodName + "\n\n"); 43 | } 44 | } 45 | 46 | public static void exportPDGToDot(PDG pdg, String fileName) { 47 | try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { 48 | writer.println("digraph PDG {"); 49 | writer.println(" graph [ranksep=2, nodesep=0.1];"); 50 | writer.println(" node [shape=ellipse, style=filled, fillcolor=lightgrey, fontname=Arial, fontsize=12];"); 51 | writer.println(" edge [fontname=Arial, fontsize=10];"); 52 | 53 | Set connectedNodes = new HashSet<>(); 54 | 55 | 56 | // for each node, print out edges to its successors 57 | for (PDGNode src : pdg) { 58 | List successors = pdg.getSuccsOf(src); 59 | for (PDGNode tgt : successors) { 60 | // getLabelsForEdges return can contain multiple edge labels 61 | List labels = pdg.getLabelsForEdges(src, tgt); 62 | for (GraphGenerator.DependencyTypes depType : labels) { 63 | String colour = "black"; 64 | String depLabel = "UNKNOWN"; 65 | if (depType == GraphGenerator.DependencyTypes.CONTROL_DEPENDENCY) { 66 | colour = "red"; 67 | depLabel = "CTRL_DEP"; 68 | } else if (depType == GraphGenerator.DependencyTypes.DATA_DEPENDENCY) { 69 | colour = "blue"; 70 | depLabel = "DATA_DEP"; 71 | } 72 | writer.printf(" %s -> %s [label=\"%s\", color=\"%s\"];\n", 73 | getNodeId(src), 74 | getNodeId(tgt), 75 | depLabel, 76 | colour); 77 | connectedNodes.add(src); 78 | connectedNodes.add(tgt); 79 | } 80 | } 81 | } 82 | 83 | for (PDGNode node : connectedNodes) { 84 | String label = escapeSpecialCharacters(removeCFGNodePrefix(node.toString())); 85 | writer.printf(" %s [label=\"%s\"];%n", getNodeId(node), label); 86 | } 87 | 88 | writer.println("}"); 89 | System.out.println("PDG exported to DOT file: " + fileName); 90 | 91 | } catch (IOException e) { 92 | e.printStackTrace(); 93 | } 94 | } 95 | 96 | // helper methods 97 | 98 | private static String getNodeId(PDGNode node) { 99 | return "node_" + System.identityHashCode(node); 100 | } 101 | 102 | private static String removeCFGNodePrefix(String label) { 103 | String prefix = "Type: CFGNODE: "; 104 | if (label.startsWith(prefix)) { 105 | return label.substring(prefix.length()); 106 | } 107 | return label; 108 | } 109 | 110 | // to avoid parse errors, otherwise print("") could ruin some things 111 | private static String escapeSpecialCharacters(String label) { 112 | return label.replace("\"", "\\\""); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/ullmann/UllmannMatcher.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.ullmann; 2 | 3 | import org.pdgdiff.graph.GraphTraversal; 4 | import org.pdgdiff.graph.PDG; 5 | import org.pdgdiff.matching.NodeMapping; 6 | import soot.toolkits.graph.pdg.PDGNode; 7 | 8 | import java.util.*; 9 | 10 | /** 11 | * UllmannMatcher class to perform graph matching using Ullmann's Algorithm. 12 | * This class contains methods to match two PDGs and return the node mappings between them. 13 | */ 14 | public class UllmannMatcher { 15 | private NodeMapping nodeMapping; 16 | 17 | private List srcNodes; 18 | private List dstNodes; 19 | private final int n; 20 | private final int m; 21 | private int[][] compatMatrix; // Compatibility matrix 22 | private Stack matBacklog; 23 | 24 | public UllmannMatcher(PDG srcPdg, PDG dstPdg) { 25 | this.nodeMapping = new NodeMapping(); 26 | 27 | this.srcNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(srcPdg)); 28 | this.dstNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(dstPdg)); 29 | this.n = srcNodes.size(); 30 | this.m = dstNodes.size(); 31 | this.compatMatrix = new int[n][m]; 32 | this.matBacklog = new Stack<>(); 33 | } 34 | 35 | public NodeMapping match() { 36 | if (n > m) { 37 | return null; 38 | } 39 | 40 | initializeM(); 41 | 42 | // start recursive search 43 | if (matchRecursive(0)) { 44 | return nodeMapping; 45 | } else { 46 | return null; 47 | } 48 | } 49 | 50 | private void initializeM() { 51 | for (int i = 0; i < n; i++) { 52 | PDGNode node1 = srcNodes.get(i); 53 | for (int j = 0; j < m; j++) { 54 | PDGNode node2 = dstNodes.get(j); 55 | compatMatrix[i][j] = nodesAreCompatible(node1, node2) ? 1 : 0; 56 | } 57 | } 58 | } 59 | 60 | private boolean matchRecursive(int depth) { 61 | if (depth == n) { 62 | // all nodes have been matched 63 | buildNodeMapping(); 64 | return true; 65 | } 66 | 67 | for (int j = 0; j < m; j++) { 68 | if (compatMatrix[depth][j] == 1) { 69 | if (isFeasible(depth, j)) { 70 | int[][] MBackup = copyMatrix(compatMatrix); 71 | // remove conflicting mappings 72 | for (int k = depth + 1; k < n; k++) { 73 | compatMatrix[k][j] = 0; 74 | } 75 | for (int l = 0; l < m; l++) { 76 | if (l != j) { 77 | compatMatrix[depth][l] = 0; 78 | } 79 | } 80 | compatMatrix[depth][j] = -1; // selected 81 | 82 | matBacklog.push(MBackup); 83 | if (matchRecursive(depth + 1)) { 84 | return true; 85 | } 86 | compatMatrix = matBacklog.pop(); 87 | } 88 | } 89 | } 90 | return false; 91 | } 92 | 93 | private boolean isFeasible(int i, int j) { 94 | // check adjacency compatibility 95 | PDGNode srcNode = srcNodes.get(i); 96 | PDGNode dstNode = dstNodes.get(j); 97 | 98 | // for all prev mapped nodes 99 | for (int k = 0; k < i; k++) { 100 | int mappedIndex = -1; 101 | // find the node in PDG2 that k in pdg1 is mapped to 102 | for (int l = 0; l < m; l++) { 103 | if (compatMatrix[k][l] == -1) { 104 | mappedIndex = l; 105 | break; 106 | } 107 | } 108 | if (mappedIndex != -1) { 109 | PDGNode mappedSrcNode = srcNodes.get(k); 110 | PDGNode mappedDstNode = this.dstNodes.get(mappedIndex); 111 | 112 | // check if adjacency is preserved 113 | boolean adjInPDG1 = areAdjacent(srcNode, mappedSrcNode); 114 | boolean adjInPDG2 = areAdjacent(dstNode, mappedDstNode); 115 | 116 | if (adjInPDG1 != adjInPDG2) { 117 | return false; 118 | } 119 | } 120 | } 121 | return true; 122 | } 123 | 124 | private boolean areAdjacent(PDGNode n1, PDGNode n2) { 125 | // chck if n1 and n2 are adjacent in the PDG 126 | return n1.getDependents().contains(n2) || n1.getBackDependets().contains(n2) 127 | || n2.getDependents().contains(n1) || n2.getBackDependets().contains(n1); 128 | } 129 | 130 | private void buildNodeMapping() { 131 | for (int i = 0; i < n; i++) { 132 | for (int j = 0; j < m; j++) { 133 | if (compatMatrix[i][j] == -1) { 134 | nodeMapping.addMapping(srcNodes.get(i), dstNodes.get(j)); 135 | break; 136 | } 137 | } 138 | } 139 | } 140 | 141 | private boolean nodesAreCompatible(PDGNode n1, PDGNode n2) { 142 | // TODO: add more like VF2 143 | // compare node types and attributes 144 | return n1.getType().equals(n2.getType()) && n1.getAttrib().equals(n2.getAttrib()); 145 | } 146 | 147 | private int[][] copyMatrix(int[][] original) { 148 | int[][] copy = new int[n][m]; 149 | for (int i = 0; i < n; i++) { 150 | System.arraycopy(original[i], 0, copy[i], 0, m); 151 | } 152 | return copy; 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Graph Based Code differencing 2 | 3 | _Objectives of this repository: Create a novel graph-based differencing approach, with the aim of improving code-differencing with respects to semantics, with some corresonding tool that software engineers can use._ 4 | 5 | This application is designed to take two Java classes, and produce a graph-based representation of the differences between them. This can then be visualised in a traditional text-bsed format, or as a graph. 6 | It represents both classes as a [Program Dependence Graph](https://dl.acm.org/doi/10.1145/24039.24041) (generated through [Soot](https://github.com/soot-oss/soot)) and uses heuristics to perform graph isomorphism. 7 | The application will suggest which methods might have originated from one another and will suggest edit scripts between methods in the source and destination file. The final output will be a singular `diff.json` which represents the aggregated per-method edit scripts. 8 | 9 | Due to the fact that a PDG is often built from an intermediate representation for the sake of proper analysis, such as LLVM for a C/C++ program or Java Bytecode in this case, some syntactic differences won't be captured, and the algorithm will be limited to the granularity of the intermediate representation. For the purpose of a closer and more accurate 10 | differencing, compiler optimisations are disabled for target comparison files both at the java compiler level and the Soot level. In construction of Jimple, Soot's IR, some optimisations are not configurable and this is a described limitation of the approach. 11 | In some specific use cases, such as analysis efficiencies or trying to determine if programs are semantically identical, this tool should (🤞) be more useful than other differencing approaches. 12 | 13 | If you find this interesting, please feel free to read the thesis (link wip) I wrote in conjunction with this tool! 14 | 15 | This approach has been evaluated against a subset of data from [Gumtree's](https://github.com/GumTreeDiff/gumtree) evaluation [dataset](https://github.com/GumTreeDiff/datasets). This modified dataset can be found in the following [repository](https://github.com/syntax/datasets). 16 | 17 | ### How does this work? 18 | 19 | ![Overview](images/overview.png) 20 | 21 | The complete process flow is described in the above visualisation. Data is read in from the source and destination files, and a PDG is generated for each. The PDGs are then compared using a graph isomorphism strategy, and the results are used to generate an pairing between methods. Each method pairing also has a node mapping, which is used to generate a edit script between two methods. A recovery method is applied to this to analyse operations using further heuristics, and these edit scripts are aggregated to create a final delta that summarises the changes between two files. 22 | Current recommended matching strategies, that are proven to work quite well in most cases include VF2 and GED. 23 | 24 | ### How can I visualise the changes? 25 | 26 | After running the program with the preffered matching engine settings, the diff can be visualised in different ways. Most commonly, 27 | one can run the _py-visualise_ Flask app to view the diff in its side-by-side, text-based form. Alternatively, remaning more loyal to the graph-based differencing approach, the delta can be viewed at the Jimple level as a singular _delta_ 28 | graph. This can be used by exporting the dot file created in the delta-graph folder to a png, or similar. 29 | 30 | ![Delta](images/refactoredgraph.png) 31 | **NB**: This delta has been edited slightly to ensure its readable on this README and not too large. This delta graph depicts the changes that transform the method _detailedComputation_ to the method _complexCalculation_, both depicted at the Java level in the text-based visualisation below. 32 | 33 | Alternatively, the delta can be visualised with a more traditional side-by-side view, as shown below. Note that because differencing occurs at the IR level post compiler-optimisation, some syntactic differences may not be captured. As an example, deadcode in addNumbers is not considered. This view can be run by `cd`'ing to the `py-visualise` directory and running `python3 app.py` following a install of the required dependencies. 34 | 35 | ![Side-by-side](images/text-based.jpg) 36 | 37 | Insertions and Deletions are marked with a green and red background respectively. Update operations are marked with a variety of colours, so that changes across the source and destination methods and the node pairings of the algorithm can be easily identified. A list of all the edit operations required to describe the delta is listed below the visualisation. 38 | 39 | ### List of dependencies 40 | - Java 8 (an older version of Java is required to use Soot) 41 | - Maven 3.6.x 42 | - Soot 4.3.0 43 | - Python 3.8.x or later 44 | 45 | ### Getting Started 46 | To run, 47 | 48 | Compile dependencies; 49 | ```bash 50 | mvn compile 51 | ``` 52 | 53 | Run the application; 54 | ```bash 55 | mvn exec:java -Dexec.mainClass="org.pdgdiff.Main" 56 | ``` 57 | 58 | Run the application with arguments for differencing; 59 | 60 | This implementation of the approach (that is, using Soot) requires the following arguments: 61 | - The relative path to the source and destination versions files. 62 | - The relative path to the compiled source and destination versions files. 63 | - The fully qualified name of the class in the source and destination files. 64 | ```bash 65 | mvn exec:java -Dexec.mainClass="org.pdgdiff.Main" -Dexec.args=" " 66 | ``` 67 | 68 | Package the application (necessary for evaluation purposes only); 69 | ```bash 70 | mvn clean package 71 | ``` 72 | 73 | To run the Flask frontend for text-based visualisation; 74 | ```bash 75 | cd py-visualise 76 | python3 app.py 77 | ``` -------------------------------------------------------------------------------- /py-visualise/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template 2 | import json 3 | import os 4 | from difflib import SequenceMatcher 5 | import re 6 | 7 | app = Flask(__name__) 8 | 9 | TEST_CLASSES_PATH = "./testclasses" 10 | OUT_PATH = "./out" 11 | 12 | def read_file(filepath): 13 | with open(filepath, 'r') as file: 14 | return file.read() 15 | 16 | def generate_color_pairs(n): 17 | base_colors = [ 18 | ("highlight-change1-old", "highlight-change1-old"), 19 | ("highlight-change2-old", "highlight-change2-old"), 20 | ("highlight-change3-old", "highlight-change3-old"), 21 | ("highlight-change4-old", "highlight-change4-old"), 22 | ("highlight-change5-old", "highlight-change5-old"), 23 | ("highlight-change6-old", "highlight-change6-old"), 24 | ("highlight-change1-new", "highlight-change1-new"), 25 | ("highlight-change2-new", "highlight-change2-new"), 26 | ("highlight-change3-new", "highlight-change3-new"), 27 | ("highlight-change4-new", "highlight-change4-new"), 28 | ("highlight-change5-new", "highlight-change5-new"), 29 | ("highlight-change6-new", "highlight-change6-new"), 30 | ] 31 | 32 | color_pairs = [] 33 | for i in range(n): 34 | color_pairs.append(base_colors[i % len(base_colors)]) 35 | 36 | return color_pairs 37 | 38 | def highlight_word_differences_with_colors(old_line, new_line, color_pair): 39 | # posibly investigate wierd spacing behaviour on front end output, think this is because of the way the split is done 40 | seperators = r'([.,(){};=+-/*])' 41 | old_words = re.split(seperators, old_line) 42 | new_words = re.split(seperators, new_line) 43 | 44 | old_words = [word for word in old_words if word.strip() != ''] 45 | new_words = [word for word in new_words if word.strip() != ''] 46 | 47 | old_highlight = [] 48 | new_highlight = [] 49 | 50 | matcher = SequenceMatcher(None, old_words, new_words) 51 | 52 | for tag, i1, i2, j1, j2 in matcher.get_opcodes(): 53 | if tag == 'equal': 54 | old_highlight.append(' '.join(old_words[i1:i2])) 55 | new_highlight.append(' '.join(new_words[j1:j2])) 56 | elif tag == 'replace': 57 | old_highlight.append(f"{' '.join(old_words[i1:i2])}") 58 | new_highlight.append(f"{' '.join(new_words[j1:j2])}") 59 | elif tag == 'delete': 60 | old_highlight.append(f"{' '.join(old_words[i1:i2])}") 61 | elif tag == 'insert': 62 | new_highlight.append(f"{' '.join(new_words[j1:j2])}") 63 | 64 | return ' '.join(old_highlight), ' '.join(new_highlight) 65 | 66 | @app.route('/') 67 | def diff_view(): 68 | class1_content = read_file(os.path.join(TEST_CLASSES_PATH, 'TestFileBefore.java')).splitlines() 69 | class2_content = read_file(os.path.join(TEST_CLASSES_PATH, 'TestFileAfter.java')).splitlines() 70 | 71 | with open(os.path.join(OUT_PATH, 'diff.json'), 'r') as diff_file: 72 | diff_data = json.load(diff_file) 73 | 74 | num_actions = len(diff_data["actions"]) 75 | color_pairs = generate_color_pairs(num_actions) 76 | 77 | highlighted_class1 = class1_content[:] 78 | highlighted_class2 = class2_content[:] 79 | 80 | highlighted_diffs = [] 81 | 82 | for i, action in enumerate(diff_data["actions"]): 83 | if action["action"] == "Update": 84 | old_line_number = action["oldLine"] - 1 85 | new_line_number = action["newLine"] - 1 86 | 87 | if 0 <= old_line_number < len(highlighted_class1) and 0 <= new_line_number < len(highlighted_class2): 88 | highlighted_old, highlighted_new = highlight_word_differences_with_colors( 89 | class1_content[old_line_number], class2_content[new_line_number], color_pairs[i % len(color_pairs)] 90 | ) 91 | highlighted_class1[old_line_number] = highlighted_old 92 | highlighted_class2[new_line_number] = highlighted_new 93 | 94 | highlighted_diffs.append({ 95 | "oldLine": action["oldLine"], 96 | "newLine": action["newLine"], 97 | "oldCode": highlighted_old, 98 | "newCode": highlighted_new 99 | }) 100 | 101 | elif action["action"] == "Insert": 102 | new_line_number = action["line"] - 1 103 | 104 | if 0 <= new_line_number < len(highlighted_class2): 105 | highlighted_new = f"+ {class2_content[new_line_number]}" 106 | highlighted_class2[new_line_number] = highlighted_new 107 | 108 | highlighted_diffs.append({ 109 | "oldLine": None, 110 | "newLine": action["line"], 111 | "oldCode": "", 112 | "newCode": highlighted_new 113 | }) 114 | 115 | elif action["action"] == "Delete": 116 | old_line_number = action["line"] - 1 117 | 118 | if 0 <= old_line_number < len(highlighted_class1): 119 | highlighted_old = f"- {class1_content[old_line_number]}" 120 | highlighted_class1[old_line_number] = highlighted_old 121 | 122 | highlighted_diffs.append({ 123 | "oldLine": action["line"], 124 | "newLine": None, 125 | "oldCode": highlighted_old, 126 | "newCode": "" 127 | }) 128 | 129 | highlighted_class1_content = '\n'.join(highlighted_class1) 130 | highlighted_class2_content = '\n'.join(highlighted_class2) 131 | 132 | return render_template('index.html', class1=highlighted_class1_content, class2=highlighted_class2_content, diffs=highlighted_diffs) 133 | 134 | if __name__ == '__main__': 135 | app.run(debug=True) 136 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/graph/GraphGenerator.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.graph; 2 | 3 | import soot.Body; 4 | import soot.SootClass; 5 | import soot.SootMethod; 6 | import soot.Unit; 7 | import soot.toolkits.graph.*; 8 | import soot.toolkits.graph.pdg.MHGDominatorTree; 9 | import soot.toolkits.graph.pdg.PDGNode; 10 | import soot.toolkits.scalar.SimpleLocalDefs; 11 | import soot.toolkits.scalar.SimpleLocalUses; 12 | import soot.toolkits.scalar.UnitValueBoxPair; 13 | 14 | import java.util.ArrayList; 15 | import java.util.HashMap; 16 | import java.util.List; 17 | import java.util.Map; 18 | 19 | /** 20 | * GraphGenerator class to generate a Program Dependency Graph (PDG) for a specific method 21 | */ 22 | public class GraphGenerator { 23 | 24 | // enum for dependency types 25 | public enum DependencyTypes { 26 | CONTROL_DEPENDENCY, 27 | DATA_DEPENDENCY 28 | } 29 | 30 | 31 | public static PDG constructPdg(SootMethod method) { 32 | Body body = method.retrieveActiveBody(); 33 | System.out.println("Generating PDG for method: " + method.getName()); 34 | UnitGraph eug = new ExceptionalUnitGraph(body); 35 | 36 | // soots api for creating postdominator tree 37 | MHGDominatorTree postdominatorTree = new MHGDominatorTree(new MHGPostDominatorsFinder(eug)); 38 | 39 | //get dominance frontiers based on the postdominator tree, equivalent to using it 40 | DominanceFrontier dominanceFrontier = new CytronDominanceFrontier<>(postdominatorTree); 41 | 42 | PDG pdg = new PDG(); 43 | pdg.setCFG(eug); 44 | 45 | SimpleLocalDefs definitions = new SimpleLocalDefs(eug); 46 | SimpleLocalUses uses = new SimpleLocalUses(body, definitions); 47 | 48 | Map unitToNodeMap = new HashMap<>(); 49 | 50 | PDGNode startNode = null; 51 | 52 | // building a list of units in the order they appear to properly detect the backedges 53 | List allUnits = new ArrayList<>(body.getUnits()); 54 | Map unitOrder = new HashMap<>(); 55 | int idx = 0; 56 | for (Unit u : allUnits) { 57 | unitOrder.put(u, idx++); 58 | } 59 | 60 | for (Unit unit : body.getUnits()) { 61 | 62 | PDGNode node = addOrGetNode(pdg, unit, unitToNodeMap); 63 | 64 | //add control dependencies based on dominance frontier 65 | for (DominatorNode dode : dominanceFrontier.getDominanceFrontierOf(postdominatorTree.getDode(unit))) { 66 | Unit frontier = dode.getGode(); 67 | PDGNode frontierNode = addOrGetNode(pdg, frontier, unitToNodeMap); 68 | 69 | if (!pdg.containsEdge(frontierNode, node, DependencyTypes.CONTROL_DEPENDENCY)) { 70 | // TODO: this isnt probably bang on, but need some 'start node' to be set. taking the first unit often leads to disconnected graphs 71 | if (startNode == null) { 72 | startNode = frontierNode; 73 | pdg.startNode = startNode; 74 | } 75 | pdg.addEdge(frontierNode, node, DependencyTypes.CONTROL_DEPENDENCY); 76 | frontierNode.addDependent(node); 77 | 78 | System.out.println("Control Dependency: " + frontierNode + " -> " + node); 79 | 80 | // preliminary but if fronteid is earlier or equal to 'unit', treat it as a backedge 81 | if (isBackEdge(frontier, unit, unitOrder)) { 82 | node.addBackDependent(frontierNode); 83 | System.out.println(" => (Back edge) " + node + " <- " + frontierNode); 84 | } 85 | } 86 | } 87 | 88 | // add data dependencies based on uses 89 | for (UnitValueBoxPair unitValueBoxPair : uses.getUsesOf(unit)) { 90 | Unit useUnit = unitValueBoxPair.unit; 91 | PDGNode useNode = addOrGetNode(pdg, useUnit, unitToNodeMap); 92 | 93 | if (!pdg.containsEdge(node, useNode, DependencyTypes.DATA_DEPENDENCY)) { 94 | // TODO: this isnt probably bang on, but need some 'start node' to be set. taking the first unit often leads to disconnected graphs 95 | if (startNode == null) { 96 | startNode = node; 97 | pdg.startNode = startNode; 98 | } 99 | pdg.addEdge(node, useNode, DependencyTypes.DATA_DEPENDENCY); 100 | node.addDependent(useNode); 101 | System.out.println("Data Dependency: " + node + " -> " + useNode); 102 | 103 | // preliminary but if 'useUnit' is earlier or equal to 'unit', treat it as a backedge 104 | if (isBackEdge(useUnit, unit, unitOrder)) { 105 | useNode.addBackDependent(node); 106 | System.out.println(" => (Data Back edge) " + useNode + " <- " + node); 107 | } 108 | } 109 | } 110 | } 111 | return pdg; 112 | } 113 | 114 | private static boolean isBackEdge(Unit frontier, Unit unit, Map orderMap) { 115 | // return true if 'frontier' is same or earlier in the ordering => likely a backward/loop edge. 116 | return orderMap.get(frontier) <= orderMap.get(unit); 117 | } 118 | 119 | private static PDGNode addOrGetNode(PDG pdg, Unit unit, Map unitToNodeMap) { 120 | PDGNode node = unitToNodeMap.get(unit); 121 | if (node == null) { 122 | // create a new PDGNode for this Unit 123 | node = new PDGNode(unit, PDGNode.Type.CFGNODE); 124 | unitToNodeMap.put(unit, node); 125 | 126 | // add the node to the PDG if it is not already there 127 | if (!pdg.containsNode(node)) { 128 | pdg.addNode(node); 129 | } 130 | } 131 | return node; 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/graph/CycleDetection.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.graph; 2 | 3 | import soot.Unit; 4 | import soot.tagkit.LineNumberTag; 5 | import soot.tagkit.Tag; 6 | import soot.toolkits.graph.pdg.PDGNode; 7 | 8 | import java.util.*; 9 | 10 | // implements Tarjans algorithm for detection of strongly connected components 11 | public class CycleDetection { 12 | 13 | private static boolean debug = true; 14 | private static int index = 0; 15 | // usin IdentityHashMap because PDGNode doesnt implement equals and hashCode 16 | private static Map indices = new IdentityHashMap<>(); 17 | private static Map lowLinks = new IdentityHashMap<>(); 18 | private static Deque stack = new ArrayDeque<>(); 19 | private static Set> stronglyConnectedComponents = new HashSet<>(); 20 | 21 | public static void setLogging(boolean enable) { 22 | debug = enable; 23 | } 24 | 25 | public static boolean hasCycle(PDG pdg) { 26 | if (debug) System.out.println("[CycleDetection] Detecting cycles using Tarjan's Algorithm"); 27 | 28 | index = 0; 29 | indices.clear(); 30 | lowLinks.clear(); 31 | stack.clear(); 32 | stronglyConnectedComponents.clear(); 33 | 34 | List allNodes = GraphTraversal.collectNodesBFS(pdg); 35 | 36 | // tarjan's algorithm starting from each node 37 | for (PDGNode node : allNodes) { 38 | if (!indices.containsKey(node)) { 39 | strongConnect(node, pdg); 40 | } 41 | } 42 | 43 | // check if any strongly connected component is a cycle 44 | boolean hasCycle = false; 45 | int maxCycleSize = 0; 46 | List> maxSizeSCCs = new ArrayList<>(); 47 | 48 | for (Set scc : stronglyConnectedComponents) { 49 | if (scc.size() > 1 || hasSelfLoop(scc, pdg)) { 50 | hasCycle = true; 51 | 52 | int sccSize = scc.size(); 53 | if (sccSize > maxCycleSize) { 54 | maxCycleSize = sccSize; 55 | maxSizeSCCs.clear(); 56 | maxSizeSCCs.add(scc); 57 | } else if (sccSize == maxCycleSize) { 58 | maxSizeSCCs.add(scc); 59 | } 60 | 61 | if (debug) { 62 | System.out.println("[CycleDetection] Cycle detected in SCC:"); 63 | for (PDGNode node : scc) { 64 | int lineNumber = getLineNumberFromPDGNode(node); 65 | if (lineNumber != -1) { 66 | System.out.println(" Node: " + node + " at line " + lineNumber); 67 | } else { 68 | System.out.println(" Node: " + node + " (line number not available)"); 69 | } 70 | } 71 | } 72 | } 73 | } 74 | 75 | if (hasCycle && debug) { 76 | System.out.println("[CycleDetection] Largest cycle size: " + maxCycleSize); 77 | for (Set scc : maxSizeSCCs) { 78 | System.out.println("[CycleDetection] -> Largest cycle detected in this SCC:"); 79 | for (PDGNode node : scc) { 80 | int lineNumber = getLineNumberFromPDGNode(node); 81 | if (lineNumber != -1) { 82 | System.out.println(" Node: " + node + " at line " + lineNumber); 83 | } else { 84 | System.out.println(" Node: " + node + " (line number not available)"); 85 | } 86 | } 87 | } 88 | } 89 | 90 | if (!hasCycle && debug) { 91 | System.out.println("[CycleDetection] No cycles detected in graph"); 92 | } 93 | 94 | return hasCycle; 95 | } 96 | 97 | // gather SCCs 98 | private static void strongConnect(PDGNode node, PDG pdg) { 99 | indices.put(node, index); 100 | lowLinks.put(node, index); 101 | index++; 102 | stack.push(node); 103 | 104 | List successors = pdg.getSuccsOf(node); 105 | 106 | for (PDGNode dependent : successors) { 107 | if (!indices.containsKey(dependent)) { 108 | strongConnect(dependent, pdg); 109 | lowLinks.put(node, Math.min(lowLinks.get(node), lowLinks.get(dependent))); 110 | } else if (stack.contains(dependent)) { 111 | lowLinks.put(node, Math.min(lowLinks.get(node), indices.get(dependent))); 112 | } 113 | } 114 | 115 | // if node is a root node, pop the stack and generate an SCC 116 | if (lowLinks.get(node).equals(indices.get(node))) { 117 | Set scc = new HashSet<>(); 118 | PDGNode w; 119 | do { 120 | w = stack.pop(); 121 | scc.add(w); 122 | } while (w != node); 123 | stronglyConnectedComponents.add(scc); 124 | } 125 | } 126 | 127 | private static boolean hasSelfLoop(Set scc, PDG pdg) { 128 | for (PDGNode node : scc) { 129 | for (PDGNode succ : pdg.getSuccsOf(node)) { 130 | if (node == succ) { // should be identity comparison 131 | return true; 132 | } 133 | } 134 | } 135 | return false; 136 | } 137 | 138 | private static int getLineNumberFromPDGNode(PDGNode node) { 139 | if (node.getType() == PDGNode.Type.CFGNODE) { 140 | Unit headUnit = (Unit) node.getNode(); 141 | if (headUnit != null) { 142 | Tag tag = headUnit.getTag("LineNumberTag"); 143 | if (tag instanceof LineNumberTag) { 144 | LineNumberTag lineNumberTag = (LineNumberTag) tag; 145 | return lineNumberTag.getLineNumber(); 146 | } 147 | } 148 | } 149 | return -1; 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/export/EditScriptExporter.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.export; 2 | 3 | import org.pdgdiff.edit.model.EditOperation; 4 | import org.pdgdiff.graph.PDG; 5 | import org.pdgdiff.io.JsonOperationSerializer; 6 | import org.pdgdiff.io.OperationSerializer; 7 | import org.pdgdiff.matching.GraphMapping; 8 | import org.pdgdiff.matching.StrategySettings; 9 | 10 | import java.io.BufferedWriter; 11 | import java.io.FileWriter; 12 | import java.io.IOException; 13 | import java.io.Writer; 14 | import java.nio.file.Files; 15 | import java.nio.file.Paths; 16 | import java.nio.file.StandardCopyOption; 17 | import java.util.List; 18 | 19 | import static org.pdgdiff.export.ExportUtils.generateHash; 20 | 21 | public class EditScriptExporter { 22 | 23 | private static final int MAX_FILENAME_LENGTH = 255; // probably max, otherwise sometimes have issues with OS FS 24 | 25 | 26 | public static void exportEditScript(List editScript, String method1Signature, String method2Signature, StrategySettings strategySettings) { 27 | // Sanitize method names for use in filenames 28 | String method1Safe = method1Signature.replaceAll("[^a-zA-Z0-9\\.\\-]", "_"); 29 | String method2Safe = method2Signature.replaceAll("[^a-zA-Z0-9\\.\\-]", "_"); 30 | 31 | String outputDir = "out/"; 32 | String filename = outputDir + "editScript_" + method1Safe + "_to_" + method2Safe + ".json"; 33 | 34 | // check if too long, otherwise will fail 35 | if (filename.length() > MAX_FILENAME_LENGTH) { 36 | String method1Abbrev = generateHash(method1Safe); 37 | System.out.println("Method name too big to save to file, hashed;" + method1Safe + " -> " + method1Abbrev); 38 | String method2Abbrev = generateHash(method2Safe); 39 | System.out.println("Method name too big to save to file, hashed;" + method2Safe + " -> " + method2Abbrev); 40 | filename = outputDir + "editScript_" + method1Abbrev + "_to_" + method2Abbrev + "_concat.json"; 41 | } 42 | 43 | try (Writer writer = new FileWriter(filename)) { 44 | OperationSerializer serializer = new JsonOperationSerializer(editScript, strategySettings); 45 | serializer.writeTo(writer); 46 | System.out.println("Edit script exported to: " + filename); 47 | } catch (Exception e) { 48 | System.err.println("Failed to export edit script to " + filename); 49 | e.printStackTrace(); 50 | } 51 | } 52 | 53 | 54 | public static void exportGraphMappings(GraphMapping graphMapping, List pdgList1, List pdgList2, String outputDir) { 55 | String filename = outputDir + "graphMappings.txt"; 56 | 57 | // for multi-class graph matchings, we append to the file 58 | try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename, true))) { 59 | writer.write("Graph Mappings (Before -> After):\n"); 60 | 61 | graphMapping.getGraphMapping().forEach((srcPDG, dstPDG) -> { 62 | try { 63 | String srcMethodSignature = srcPDG.getCFG().getBody().getMethod().getSignature(); 64 | String dstMethodSignature = dstPDG.getCFG().getBody().getMethod().getSignature(); 65 | writer.write(srcMethodSignature + " -> " + dstMethodSignature + "\n"); 66 | } catch (IOException e) { 67 | System.err.println("Error writing mapping to file: " + e.getMessage()); 68 | } 69 | }); 70 | 71 | writer.write("\nUnmatched Graphs in Source:\n"); 72 | pdgList1.stream() 73 | .filter(pdg -> !graphMapping.getGraphMapping().containsKey(pdg)) 74 | .forEach(pdg -> { 75 | try { 76 | String methodSignature = pdg.getCFG().getBody().getMethod().getSignature(); 77 | writer.write(methodSignature + "\n"); 78 | } catch (IOException e) { 79 | System.err.println("Error writing unmatched source graph to file: " + e.getMessage()); 80 | } 81 | }); 82 | 83 | writer.write("\nUnmatched Graphs in Destination:\n"); 84 | pdgList2.stream() 85 | .filter(pdg -> !graphMapping.getGraphMapping().containsValue(pdg)) 86 | .forEach(pdg -> { 87 | try { 88 | String methodSignature = pdg.getCFG().getBody().getMethod().getSignature(); 89 | writer.write(methodSignature + "\n"); 90 | } catch (IOException e) { 91 | System.err.println("Error writing unmatched destination graph to file: " + e.getMessage()); 92 | } 93 | }); 94 | 95 | System.out.println("Graph mappings exported to: " + filename); 96 | } catch (IOException e) { 97 | System.err.println("Failed to export graph mappings to " + filename); 98 | } 99 | } 100 | 101 | 102 | 103 | public static void writeAggregatedEditScript(List aggregatedEditScripts, String filename, StrategySettings strategySettings) { 104 | try (Writer writer = new FileWriter(filename)) { 105 | OperationSerializer serializer = new JsonOperationSerializer(aggregatedEditScripts, strategySettings); 106 | serializer.writeTo(writer); 107 | System.out.println("Edit script exported to: " + filename); 108 | } catch (Exception e) { 109 | System.err.println("Failed to export edit script to " + filename); 110 | e.printStackTrace(); 111 | } 112 | } 113 | 114 | public static void copyResultsToOutput(String beforeSourceDir, String afterSourceDir) { 115 | try { 116 | Files.copy(Paths.get(beforeSourceDir), Paths.get("py-visualise/testclasses/TestFileBefore.java"), StandardCopyOption.REPLACE_EXISTING); 117 | Files.copy(Paths.get(afterSourceDir), Paths.get("py-visualise/testclasses/TestFileAfter.java"), StandardCopyOption.REPLACE_EXISTING); 118 | Files.copy(Paths.get("out/diff.json"), Paths.get("py-visualise/out/diff.json"), StandardCopyOption.REPLACE_EXISTING); 119 | System.out.println(" --> results copied to python visualiser"); 120 | } catch (IOException e) { 121 | System.err.println("An error occurred while copying the source files to the output folder: " + e.getMessage()); 122 | e.printStackTrace(); 123 | 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/ged/GEDMatcher.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.ged; 2 | 3 | import org.pdgdiff.matching.NodeFeasibility; 4 | import org.pdgdiff.graph.GraphTraversal; 5 | import org.pdgdiff.graph.PDG; 6 | import org.pdgdiff.matching.NodeMapping; 7 | import soot.toolkits.graph.pdg.PDGNode; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | import static org.pdgdiff.matching.models.heuristic.JaroWinklerSimilarity.JaroWinklerSimilarity; 14 | 15 | /** 16 | * Performs a Graph Edit Distance node alignment between two PDGs. 17 | * Returns a GEDResult containing the (distance, nodeMapping). 18 | * 19 | */ 20 | public class GEDMatcher { 21 | 22 | private final PDG srcPdg; 23 | private final PDG dstPdg; 24 | 25 | public GEDMatcher(PDG srcPdg, PDG dstPdg) { 26 | this.srcPdg = srcPdg; 27 | this.dstPdg = dstPdg; 28 | } 29 | 30 | // find edit distance and return node mappings 31 | public GEDResult match() { 32 | List srcNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(srcPdg)); 33 | List dstNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(dstPdg)); 34 | 35 | int n1 = srcNodes.size(); 36 | int n2 = dstNodes.size(); 37 | 38 | // create square cost mat of n x n size, must be square for Hungarian algo 39 | // NOTE because its square there is going to be some dummy nodes (where its padded, pdg prob doesnt produce square mat) 40 | int n = Math.max(n1, n2); 41 | double[][] squareMatrix = new double[n][n]; 42 | 43 | // deletion and insertion costs, todo tune these 44 | double insertionCostVal = 1.0; 45 | double deletionCostVal = 1.0; 46 | 47 | // fill the "real" submatrix of the cost matrix (where i < n1 and j < n2) with substitution costs for each node pair 48 | for (int i = 0; i < n1; i++) { 49 | for (int j = 0; j < n2; j++) { 50 | squareMatrix[i][j] = substitutionCost(srcNodes.get(i), dstNodes.get(j)); 51 | } 52 | } 53 | 54 | // fill extra non-match rows/columns with insertion/deletion costs 55 | for (int i = 0; i < n; i++) { 56 | for (int j = 0; j < n; j++) { 57 | 58 | // if within real submatrix, i.e. no dummies, just covered it in loop above 59 | if (i < n1 && j < n2) { 60 | continue; 61 | } 62 | 63 | // i < n1 but j >= n2 => "dummy" node in PDG2 => old node i must be deleted 64 | if (i < n1 && j >= n2) { 65 | squareMatrix[i][j] = deletionCostVal; 66 | } 67 | // i >= n1 but j < n2 => "dummy" node in PDG1 => new node j must be inserted 68 | else if (i >= n1 && j < n2) { 69 | squareMatrix[i][j] = insertionCostVal; 70 | } 71 | // i >= n1 && j >= n2 => both dummy => set cost = 0 or could change this to some small cost 72 | else if (i >= n1 && j >= n2) { 73 | squareMatrix[i][j] = 0.0; 74 | } 75 | } 76 | } 77 | 78 | // solving the assignment on the n x n square matrix 79 | int[] assignment = HungarianAlgorithm.minimizeAssignment(squareMatrix); 80 | 81 | NodeMapping nodeMapping = new NodeMapping(); 82 | double totalCost = 0.0; 83 | 84 | 85 | // checking for real vs dummy nodes 86 | for (int i = 0; i < n; i++) { 87 | // 'assignment[i] = j' means row i is matched to column j. each i, j in [0..n) 88 | int j = assignment[i]; 89 | if (j < 0) { 90 | continue; 91 | } 92 | double cost = squareMatrix[i][j]; 93 | totalCost += cost; 94 | 95 | // If i < n1 and j < n2 its within range of 'real' submat=> real node match => substitution 96 | if (i < n1 && j < n2) { 97 | nodeMapping.addMapping(srcNodes.get(i), dstNodes.get(j)); 98 | } 99 | // todo: I believe inserts and deletes shouldn't be added to node mapping, 100 | // and that their absence will be handled as insertions/deletions in the final mapping 101 | // unmatched nodes in Nodemapping will be handled as insertions/ deletions 102 | // if i < n1 && j >= n2 => deletion (old node i matched to dummy) 103 | // if i >= n1 && j < n2 => insertion (new node j matched to dummy) 104 | // if both dummy => ignore 105 | } 106 | 107 | // penalise matched nodes that have considerable semantic differences by inspecting edges 108 | double edgePenalty = computeEdgeMismatchPenalty(nodeMapping); 109 | totalCost += edgePenalty; 110 | 111 | return new GEDResult(totalCost, nodeMapping); 112 | } 113 | 114 | /** 115 | * returns substitution cost between two nodes. 116 | * considers node label similarity and node category similarity. 117 | */ 118 | private double substitutionCost(PDGNode n1, PDGNode n2) { 119 | // base cost if categories differ 120 | if (!NodeFeasibility.isSameNodeCategory(n1, n2)) { 121 | return 1.0; // or big penalty 122 | } 123 | 124 | // compare the node "type" or attribute 125 | double attributePenalty = n1.getAttrib().equals(n2.getAttrib()) ? 0.0 : 0.8; 126 | 127 | // get the textual content to compare. 128 | String label1 = extractRelevantLabel(n1); 129 | String label2 = extractRelevantLabel(n2); 130 | 131 | double sim = JaroWinklerSimilarity(label1, label2); // in [0..1], higher=better 132 | double stringCost = 1.0 - sim; // bigger difference -> bigger cost 133 | 134 | double alpha = 0.1; // weighting for syntactic differences, i.e. string difference 135 | double beta = 0.9; // weighting for semantic difference, i.e. attribute difference 136 | 137 | return alpha * stringCost + beta * attributePenalty; 138 | } 139 | 140 | private String extractRelevantLabel(PDGNode node) { 141 | // remove beginning of 'Type: CFGNODE: ' 142 | return node.toString().substring(15); 143 | } 144 | 145 | /** 146 | * check for edges and inforce mismatch penalty 147 | */ 148 | private double computeEdgeMismatchPenalty(NodeMapping mapping) { 149 | double mismatchCost = 0.0; 150 | double edgePenalty = 0.5; // this is complete guess work. 151 | 152 | Map forwardMap = mapping.getNodeMapping(); 153 | 154 | // for each mapped edge (n1->m1) in old, see if (n2->m2) exists in new 155 | for (PDGNode oldSrc : forwardMap.keySet()) { 156 | PDGNode newSrc = forwardMap.get(oldSrc); 157 | 158 | for (PDGNode oldTgt : oldSrc.getDependents()) { 159 | PDGNode newTgt = forwardMap.get(oldTgt); 160 | if (newTgt != null) { 161 | // if the new edge does not exist, penalize 162 | if (!newSrc.getDependents().contains(newTgt)) { 163 | mismatchCost += edgePenalty; 164 | } 165 | } 166 | } 167 | } 168 | // todo possibly add checks for edges that are in new pdg, but not in old pdg (vice versa) 169 | // can use mappings.getReverseNodeMapping() 170 | 171 | return mismatchCost; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /.idea/uiDesigner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/DiffEngine.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching; 2 | 3 | import org.pdgdiff.edit.ClassMetadataDiffGenerator; 4 | import org.pdgdiff.edit.EditDistanceCalculator; 5 | import org.pdgdiff.edit.EditScriptGenerator; 6 | import org.pdgdiff.edit.RecoveryProcessor; 7 | import org.pdgdiff.edit.model.EditOperation; 8 | import org.pdgdiff.export.DiffGraphExporter; 9 | import org.pdgdiff.graph.CycleDetection; 10 | import org.pdgdiff.graph.GraphTraversal; 11 | import org.pdgdiff.graph.PDG; 12 | import soot.SootClass; 13 | 14 | import soot.SootMethod; 15 | 16 | import java.io.*; 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.stream.Collectors; 20 | 21 | import static org.pdgdiff.export.EditScriptExporter.*; 22 | 23 | public class DiffEngine { 24 | 25 | private static final List aggregatedEditScripts = new ArrayList<>(); 26 | private static final boolean debug = false; // setting for development 27 | 28 | 29 | public static void difference(List pdgList1, List pdgList2, 30 | StrategySettings strategySettings, String srcSourceFilePath, String dstSourceFilePath) throws IOException { 31 | 32 | GraphMatcher matcher = GraphMatcherFactory.createMatcher(strategySettings.matchingStrategy, pdgList1, pdgList2); 33 | // for each graph print the size and if it has a cycle (debug mode) 34 | if (debug) pdgList1.forEach(pdg -> { 35 | System.out.println("------"); 36 | System.out.println(pdg.getCFG().getBody().getMethod().getSignature()); 37 | System.out.println("Node count" + GraphTraversal.getNodeCount(pdg)); 38 | CycleDetection.hasCycle(pdg); 39 | }); 40 | // perform the actual graph matching 41 | System.out.println("-> Beginning matching PDGs using strategy: " + strategySettings.matchingStrategy); 42 | GraphMapping graphMapping = matcher.matchPDGLists(); 43 | 44 | // TODO: clean up debug print stmts 45 | System.out.println("--> Graph matching complete using strategy: " + strategySettings.matchingStrategy); 46 | 47 | // handle unmatched graphs, i.e. additions or deletions of methods to the versions 48 | List unmatchedInList1 = pdgList1.stream() 49 | .filter(pdg -> !graphMapping.getGraphMapping().containsKey(pdg)) 50 | .collect(Collectors.toList()); 51 | 52 | List unmatchedInList2 = pdgList2.stream() 53 | .filter(pdg -> !graphMapping.getGraphMapping().containsValue(pdg)) 54 | .collect(Collectors.toList()); 55 | 56 | // generate edit scripts for unmatched methods discovered in above statements 57 | generateEditScriptsForUnmatched(unmatchedInList1, unmatchedInList2, srcSourceFilePath, dstSourceFilePath, strategySettings); 58 | exportGraphMappings(graphMapping, pdgList1, pdgList2, "out/"); 59 | 60 | DiffGraphExporter.exportDiffPDGs( 61 | graphMapping, 62 | pdgList1, 63 | pdgList2, 64 | "out/delta-graphs/" 65 | ); 66 | 67 | graphMapping.getGraphMapping().forEach((srcPDG, dstPDG) -> { 68 | String method1 = srcPDG.getCFG().getBody().getMethod().getSignature(); 69 | String method2 = dstPDG.getCFG().getBody().getMethod().getSignature(); 70 | System.out.println("---\n> PDG from class 1: " + method1 + " is matched with PDG from class 2: " + method2); 71 | if (debug) { 72 | System.out.println(GraphTraversal.getNodeCount(srcPDG)); 73 | CycleDetection.hasCycle(srcPDG); 74 | System.out.println(GraphTraversal.getNodeCount(dstPDG)); 75 | CycleDetection.hasCycle(dstPDG); 76 | } 77 | NodeMapping nodeMapping = graphMapping.getNodeMapping(srcPDG); 78 | if (nodeMapping != null) { 79 | System.out.println("--- Node Mapping:"); 80 | nodeMapping.printMappings(); 81 | 82 | try { 83 | SootMethod srcObj = srcPDG.getCFG().getBody().getMethod(); 84 | SootMethod destObj = dstPDG.getCFG().getBody().getMethod(); 85 | 86 | List editScript = EditScriptGenerator.generateEditScript(srcPDG, dstPDG, graphMapping, 87 | srcSourceFilePath, dstSourceFilePath, srcObj, destObj); 88 | 89 | List recoveredEditScript = RecoveryProcessor.recoverMappings(editScript, strategySettings.recoveryStrategy); 90 | 91 | int editDistance = EditDistanceCalculator.calculateEditDistance(recoveredEditScript); 92 | System.out.println("--- Edit information ---"); 93 | System.out.println("-- Edit Distance: " + editDistance); 94 | 95 | System.out.println("-- Edit Script:"); 96 | for (EditOperation op : recoveredEditScript) { 97 | System.out.println(op); 98 | } 99 | 100 | // serialise and export 101 | aggregatedEditScripts.addAll(recoveredEditScript); 102 | exportEditScript(recoveredEditScript, method1, method2, strategySettings); 103 | } catch (Exception e) { 104 | e.printStackTrace(); 105 | } 106 | } 107 | }); 108 | 109 | // build edit script for class mappings at this point 110 | if (!pdgList1.isEmpty() && !pdgList2.isEmpty()) { 111 | SootClass srcClass = pdgList1.get(0).getCFG().getBody().getMethod().getDeclaringClass(); 112 | SootClass dstClass = pdgList2.get(0).getCFG().getBody().getMethod().getDeclaringClass(); 113 | 114 | // TODO: if one of these is empty, i need to mark it as an insertion or deletion of the entire class. 115 | // so need to do a INSERT all or DELETE all for class metadata, this is currently not handled and only 116 | // approximate. 117 | List metadataScript = ClassMetadataDiffGenerator.generateClassMetadataDiff(srcClass, dstClass, srcSourceFilePath, dstSourceFilePath); 118 | aggregatedEditScripts.addAll(metadataScript); 119 | exportEditScript(metadataScript, "metadata", "metadata", null); 120 | } 121 | 122 | if (strategySettings.isAggregateRecovery()) { 123 | List recAggregatedEditScripts = RecoveryProcessor.recoverMappings(aggregatedEditScripts, strategySettings.recoveryStrategy); 124 | writeAggregatedEditScript(recAggregatedEditScripts, "out/diff.json", strategySettings); 125 | } else { 126 | writeAggregatedEditScript(aggregatedEditScripts, "out/diff.json", strategySettings); 127 | } 128 | } 129 | 130 | private static void generateEditScriptsForUnmatched(List unmatchedInList1, List unmatchedInList2, 131 | String srcSourceFilePath, String dstSourceFilePath, StrategySettings strategySettings) { 132 | unmatchedInList1.forEach(pdg -> { 133 | try { 134 | SootMethod method = pdg.getCFG().getBody().getMethod(); 135 | String methodSignature = pdg.getCFG().getBody().getMethod().getSignature(); 136 | System.out.println("Unmatched method in List 1 (to be deleted): " + methodSignature); 137 | 138 | List editScript = EditScriptGenerator.generateDeleteScript(pdg, srcSourceFilePath, method); 139 | List recoveredEditScript = RecoveryProcessor.recoverMappings(editScript, strategySettings.recoveryStrategy); 140 | aggregatedEditScripts.addAll(recoveredEditScript); 141 | exportEditScript(recoveredEditScript, methodSignature, "DELETION", strategySettings); 142 | } catch (Exception e) { 143 | System.err.println("Failed to generate delete script for unmatched method in List 1"); 144 | e.printStackTrace(); 145 | } 146 | }); 147 | 148 | unmatchedInList2.forEach(pdg -> { 149 | try { 150 | SootMethod method = pdg.getCFG().getBody().getMethod(); 151 | String methodSignature = pdg.getCFG().getBody().getMethod().getSignature(); 152 | System.out.println("Unmatched method in List 2 (to be added): " + methodSignature); 153 | 154 | List editScript = EditScriptGenerator.generateAddScript(pdg, dstSourceFilePath, method); 155 | List recoveredEditScript = RecoveryProcessor.recoverMappings(editScript, strategySettings.recoveryStrategy); 156 | aggregatedEditScripts.addAll(recoveredEditScript); 157 | exportEditScript(recoveredEditScript, "INSERTION", methodSignature, strategySettings); 158 | } catch (Exception e) { 159 | System.err.println("Failed to generate add script for unmatched method in List 2"); 160 | e.printStackTrace(); 161 | } 162 | }); 163 | } 164 | 165 | 166 | } 167 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/ClassMetadataDiffGenerator.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit; 2 | 3 | import org.pdgdiff.edit.model.*; 4 | import org.pdgdiff.util.CodeAnalysisUtils; 5 | import org.pdgdiff.util.SourceCodeMapper; 6 | import soot.Modifier; 7 | import soot.SootClass; 8 | import soot.SootField; 9 | import soot.util.Chain; 10 | 11 | import java.io.IOException; 12 | import java.util.*; 13 | 14 | public class ClassMetadataDiffGenerator { 15 | 16 | public static List generateClassMetadataDiff( 17 | SootClass srcClass, 18 | SootClass dstClass, 19 | String srcSourceFilePath, 20 | String dstSourceFilePath 21 | ) throws IOException { 22 | Set editScriptSet = new HashSet<>(); 23 | 24 | SourceCodeMapper srcCodeMapper = new SourceCodeMapper(srcSourceFilePath); 25 | SourceCodeMapper dstCodeMapper = new SourceCodeMapper(dstSourceFilePath); 26 | 27 | // cmp class metadata 28 | compareClassMetadata(srcClass, dstClass, srcCodeMapper, dstCodeMapper, editScriptSet); 29 | 30 | // cmp fields 31 | compareFields(srcClass, dstClass, srcCodeMapper, dstCodeMapper, editScriptSet); 32 | 33 | return new ArrayList<>(editScriptSet); 34 | } 35 | 36 | private static void compareClassMetadata( 37 | SootClass srcClass, 38 | SootClass dstClass, 39 | SourceCodeMapper srcCodeMapper, 40 | SourceCodeMapper dstCodeMapper, 41 | Set editScriptSet 42 | ) { 43 | // compare class modifiers 44 | if (srcClass.getModifiers() != dstClass.getModifiers()) { 45 | int srcClassLineNumber = CodeAnalysisUtils.getClassLineNumber(srcClass, srcCodeMapper); 46 | int dstClassLineNumber = CodeAnalysisUtils.getClassLineNumber(dstClass, dstCodeMapper); 47 | 48 | String srcClassDeclaration = CodeAnalysisUtils.getClassDeclaration(srcClass, srcCodeMapper); 49 | String dstClassDeclaration = CodeAnalysisUtils.getClassDeclaration(dstClass, dstCodeMapper); 50 | 51 | EditOperation classUpdate = new Update( 52 | null, // no node associated 53 | srcClassLineNumber, 54 | dstClassLineNumber, 55 | srcClassDeclaration, 56 | dstClassDeclaration, 57 | new SyntaxDifference("ClassMetadataDiff: Class modifiers differ") 58 | ); 59 | 60 | editScriptSet.add(classUpdate); 61 | } 62 | } 63 | 64 | 65 | // in an ideal world this would also be able to compare uses of a field in the entire body, then I would be able to 66 | // account for rename refactors in the code base quite cleverly, maybe somethnig to look into. 67 | 68 | private static void compareFields( 69 | SootClass srcClass, 70 | SootClass dstClass, 71 | SourceCodeMapper srcCodeMapper, 72 | SourceCodeMapper dstCodeMapper, 73 | Set editScriptSet 74 | ) { 75 | Chain srcFields = srcClass.getFields(); 76 | Chain dstFields = dstClass.getFields(); 77 | 78 | Map srcFieldMap = new HashMap<>(); 79 | Map dstFieldMap = new HashMap<>(); 80 | 81 | for (SootField field : srcFields) { 82 | srcFieldMap.put(field.getName(), field); 83 | } 84 | 85 | for (SootField field : dstFields) { 86 | dstFieldMap.put(field.getName(), field); 87 | } 88 | 89 | // matching fields by name, type, and modifiers to try and report update instructions where sensible 90 | Set matchedFields = new HashSet<>(); 91 | 92 | // firstly attempting to match by name 93 | for (SootField srcField : srcFields) { 94 | String fieldName = srcField.getName(); 95 | SootField dstField = dstFieldMap.get(fieldName); 96 | 97 | if (dstField != null) { 98 | matchedFields.add(fieldName); 99 | if (!fieldsAreEqual(srcField, dstField)) { 100 | // update if field types or modifiers differ 101 | int oldLineNumber = CodeAnalysisUtils.getFieldLineNumber(srcField, srcCodeMapper); 102 | int newLineNumber = CodeAnalysisUtils.getFieldLineNumber(dstField, dstCodeMapper); 103 | String oldCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(srcField, srcCodeMapper); 104 | String newCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(dstField, dstCodeMapper); 105 | if (oldCodeSnippet.equals(newCodeSnippet)) { 106 | EditOperation fieldMove = new Move( 107 | null, 108 | oldLineNumber, 109 | newLineNumber, 110 | oldCodeSnippet 111 | ); 112 | editScriptSet.add(fieldMove); 113 | } else { 114 | EditOperation fieldUpdate = new Update( 115 | null, 116 | oldLineNumber, 117 | newLineNumber, 118 | oldCodeSnippet, 119 | newCodeSnippet, 120 | new SyntaxDifference("ClassMetadataDiff: Field " + fieldName + " differs") 121 | ); 122 | editScriptSet.add(fieldUpdate); 123 | } 124 | } 125 | } 126 | } 127 | 128 | // secondary matching by type / modifier 129 | for (SootField srcField : srcFields) { 130 | String fieldName = srcField.getName(); 131 | if (matchedFields.contains(fieldName)) continue; 132 | 133 | // look for a destination field with similar properties 134 | SootField bestMatch = null; 135 | for (SootField dstField : dstFields) { 136 | if (matchedFields.contains(dstField.getName())) continue; 137 | 138 | if (fieldsAreSimilar(srcField, dstField)) { 139 | bestMatch = dstField; 140 | break; 141 | } 142 | } 143 | 144 | if (bestMatch != null) { 145 | // field has a close match, so treat as an update 146 | matchedFields.add(bestMatch.getName()); 147 | int oldLineNumber = CodeAnalysisUtils.getFieldLineNumber(srcField, srcCodeMapper); 148 | int newLineNumber = CodeAnalysisUtils.getFieldLineNumber(bestMatch, dstCodeMapper); 149 | String oldCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(srcField, srcCodeMapper); 150 | String newCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(bestMatch, dstCodeMapper); 151 | 152 | EditOperation fieldUpdate = new Update( 153 | null, 154 | oldLineNumber, 155 | newLineNumber, 156 | oldCodeSnippet, 157 | newCodeSnippet, 158 | new SyntaxDifference("ClassMetadataDiff: Field " + fieldName + " differs") 159 | ); 160 | editScriptSet.add(fieldUpdate); 161 | } else { 162 | // no similar field found, treat as a delete 163 | int lineNumber = CodeAnalysisUtils.getFieldLineNumber(srcField, srcCodeMapper); 164 | String codeSnippet = CodeAnalysisUtils.getFieldDeclaration(srcField, srcCodeMapper); 165 | editScriptSet.add(new Delete(null, lineNumber, codeSnippet)); 166 | } 167 | } 168 | 169 | // cleanup with insertion operations 170 | for (SootField dstField : dstFields) { 171 | if (!matchedFields.contains(dstField.getName())) { 172 | int lineNumber = CodeAnalysisUtils.getFieldLineNumber(dstField, dstCodeMapper); 173 | String codeSnippet = CodeAnalysisUtils.getFieldDeclaration(dstField, dstCodeMapper); 174 | editScriptSet.add(new Insert(null, lineNumber, codeSnippet)); 175 | } 176 | } 177 | } 178 | 179 | 180 | private static boolean fieldsAreSimilar(SootField field1, SootField field2) { 181 | // check if same protectness and type 182 | // cannot compare actual objects (getType()) because these are loaded in difference Soot Scenes, and hence dont 183 | // hash as expected with .equals(), so using the string repr of each!!! 184 | // todo check isStatic, isFinal, etc. and consider name, annotations, initial values 185 | return ((field1.getModifiers() & Modifier.PUBLIC) == (field2.getModifiers() & Modifier.PUBLIC) || 186 | (field1.getModifiers() & Modifier.PRIVATE) == (field2.getModifiers() & Modifier.PRIVATE) || 187 | (field1.getModifiers() & Modifier.PROTECTED) == (field2.getModifiers() & Modifier.PROTECTED)) 188 | & (field1.getType().toString().equals(field2.getType().toString())); 189 | } 190 | 191 | private static boolean fieldsAreEqual(SootField field1, SootField field2) { 192 | // cmp field types 193 | if (!field1.getType().equals(field2.getType())) { 194 | return false; 195 | } 196 | // cmp modifiers 197 | if (field1.getModifiers() != field2.getModifiers()) { 198 | return false; 199 | } 200 | // TODO: cmp annotations or initial values if necessary 201 | return true; 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/util/CodeAnalysisUtils.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.util; 2 | 3 | import soot.*; 4 | import soot.tagkit.LineNumberTag; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | 12 | /** 13 | * This class aims to assist with parsing when Soot struggles. 14 | * A lot of these functions are to supplement Soot when it struggles to parse, and have a O(n) complexity. As further 15 | * work this could probably be optimised further. 16 | */ 17 | public class CodeAnalysisUtils { 18 | 19 | public static int getClassLineNumber(SootClass sootClass, SourceCodeMapper codeMapper) { 20 | int lineNumber = sootClass.getJavaSourceStartLineNumber(); 21 | if (lineNumber > 0) { 22 | return lineNumber; 23 | } 24 | 25 | // if line number is not directly available, search for it 26 | String className = sootClass.getShortName(); 27 | String classPattern = String.format(".*\\b(class|interface|enum)\\b\\s+\\b%s\\b.*\\{", Pattern.quote(className)); 28 | Pattern pattern = Pattern.compile(classPattern); 29 | 30 | int totalLines = codeMapper.getTotalLines(); 31 | for (int i = 1; i <= totalLines; i++) { 32 | String line = codeMapper.getCodeLine(i).trim(); 33 | Matcher matcher = pattern.matcher(line); 34 | if (matcher.matches()) { 35 | return i; 36 | } 37 | } 38 | 39 | return -1; 40 | } 41 | 42 | public static String getClassDeclaration(SootClass sootClass, SourceCodeMapper codeMapper) { 43 | int lineNumber = getClassLineNumber(sootClass, codeMapper); 44 | if (lineNumber > 0) { 45 | return codeMapper.getCodeLine(lineNumber).trim(); 46 | } 47 | return ""; 48 | } 49 | 50 | public static int getFieldLineNumber(SootField field, SourceCodeMapper codeMapper) { 51 | int lineNumber = field.getJavaSourceStartLineNumber(); 52 | if (lineNumber > 0) { 53 | return lineNumber; 54 | } 55 | 56 | String fieldName = field.getName(); 57 | String fieldType = field.getType().toString(); 58 | 59 | // parse simple type name without full package declaration (e.g. String instead of java.lang.String) 60 | String simpleFieldType = fieldType.substring(fieldType.lastIndexOf('.') + 1); 61 | // regex pattern, possibility of missed case here 62 | String fieldPattern = String.format( 63 | ".*\\b(?:public|protected|private|static|final|transient|volatile|abstract|synchronized|native|strictfp|\\s)*\\b%s\\s*(?:<[^>]+>)?\\s+%s\\b.*;", 64 | Pattern.quote(simpleFieldType), 65 | Pattern.quote(fieldName) 66 | ); 67 | Pattern pattern = Pattern.compile(fieldPattern); 68 | 69 | int totalLines = codeMapper.getTotalLines(); 70 | for (int i = 1; i <= totalLines; i++) { 71 | String line = codeMapper.getCodeLine(i).trim(); 72 | Matcher matcher = pattern.matcher(line); 73 | if (matcher.matches()) { 74 | return i; 75 | } 76 | } 77 | 78 | return -1; 79 | } 80 | 81 | 82 | public static String getFieldDeclaration(SootField field, SourceCodeMapper codeMapper) { 83 | int lineNumber = getFieldLineNumber(field, codeMapper); 84 | if (lineNumber > 0) { 85 | return codeMapper.getCodeLine(lineNumber).trim(); 86 | } 87 | return ""; 88 | } 89 | 90 | 91 | public static int[] getMethodLineRange(SootMethod method, SourceCodeMapper srcCodeMapper) { 92 | int initialLine = method.getJavaSourceStartLineNumber(); 93 | if (initialLine <= 0) { 94 | return new int[]{-1, -1}; 95 | } 96 | 97 | String methodName = method.getName(); 98 | String methodPattern = String.format(".*\\b%s\\b\\s*\\(.*", Pattern.quote(methodName)); 99 | Pattern signatureStartPattern = Pattern.compile(methodPattern); 100 | 101 | int totalLines = srcCodeMapper.getTotalLines(); 102 | int startLine = initialLine; 103 | int endLine = initialLine; 104 | 105 | for (int i = initialLine; i > 0; i--) { 106 | String line = srcCodeMapper.getCodeLine(i).trim(); 107 | if (line.isEmpty()) continue; 108 | 109 | Matcher m = signatureStartPattern.matcher(line); 110 | if (m.matches()) { 111 | startLine = i; 112 | break; 113 | } 114 | } 115 | 116 | boolean foundBrace = false; 117 | for (int i = startLine; i <= totalLines; i++) { 118 | String line = srcCodeMapper.getCodeLine(i).trim(); 119 | if (line.contains("{")) { 120 | endLine = i; 121 | break; 122 | } 123 | if (!foundBrace) { 124 | endLine = i; 125 | } 126 | } 127 | 128 | return new int[]{startLine, endLine}; 129 | } 130 | 131 | public static List getParamTokensAndLines( 132 | SootMethod method, 133 | SourceCodeMapper mapper, 134 | List paramLinesOut 135 | ) { 136 | paramLinesOut.clear(); 137 | List paramTokens = new ArrayList<>(); 138 | int[] range = getMethodLineRange(method, mapper); 139 | if (range[0] < 0 || range[1] < 0) { 140 | return paramTokens; 141 | } 142 | 143 | int startLine = range[0]; 144 | int endLine = range[1]; 145 | int totalLines = mapper.getTotalLines(); 146 | 147 | // collect the lines for the signature block 148 | StringBuilder sb = new StringBuilder(); 149 | for (int ln = startLine; ln <= Math.min(endLine, totalLines); ln++) { 150 | sb.append(mapper.getCodeLine(ln)).append("\n"); 151 | } 152 | String signatureText = sb.toString(); 153 | 154 | int openParenIndex = signatureText.indexOf('('); 155 | int closeParenIndex = signatureText.lastIndexOf(')'); 156 | if (openParenIndex < 0 || closeParenIndex < 0 || closeParenIndex < openParenIndex) { 157 | return paramTokens; // no parameters 158 | } 159 | 160 | String paramBlock = signatureText.substring(openParenIndex + 1, closeParenIndex).trim(); 161 | if (paramBlock.isEmpty()) { 162 | return paramTokens; 163 | } 164 | 165 | // naive split on commas 166 | String[] rawParams = paramBlock.split(","); 167 | 168 | // which line contains the param substr is assigned to be line num of that param 169 | List lines = new ArrayList<>(); 170 | for (int ln = startLine; ln <= endLine; ln++) { 171 | lines.add(mapper.getCodeLine(ln)); 172 | } 173 | 174 | for (String raw : rawParams) { 175 | String trimmed = raw.trim(); 176 | if (trimmed.isEmpty()) { 177 | continue; 178 | } 179 | int bestLine = startLine; // fallback 180 | for (int offset = 0; offset < lines.size(); offset++) { 181 | if (lines.get(offset).contains(trimmed)) { 182 | bestLine = startLine + offset; 183 | break; 184 | } 185 | } 186 | paramTokens.add(trimmed); 187 | paramLinesOut.add(bestLine); 188 | } 189 | return paramTokens; 190 | } 191 | 192 | public static List getMethodAnnotationsWithLines( 193 | SootMethod method, 194 | SourceCodeMapper codeMapper, 195 | List annoLinesOut 196 | ) { 197 | annoLinesOut.clear(); 198 | List annoTokens = new ArrayList<>(); 199 | int[] range = getMethodLineRange(method, codeMapper); 200 | if (range[0] <= 0 || range[1] <= 0) { 201 | return annoTokens; 202 | } 203 | 204 | int startLine = range[0]; 205 | // climb upward until finding lines not starting with '@' i.e. non annotations 206 | int lineNum = startLine - 1; 207 | while (lineNum > 0) { 208 | String line = codeMapper.getCodeLine(lineNum).trim(); 209 | if (line.startsWith("@")) { 210 | String[] rawAnnos = line.split("\\s+@"); 211 | for (int i = 0; i < rawAnnos.length; i++) { 212 | String annoRaw = (i == 0) ? rawAnnos[i] : "@" + rawAnnos[i]; 213 | annoRaw = annoRaw.trim(); 214 | if (!annoRaw.isEmpty()) { 215 | annoTokens.add(annoRaw); 216 | annoLinesOut.add(lineNum); 217 | } 218 | } 219 | lineNum--; 220 | } else { 221 | break; 222 | } 223 | } 224 | return annoTokens; 225 | } 226 | 227 | public static List getAnnotationsLineNumbers(SootMethod method, SourceCodeMapper codeMapper) { 228 | List annotationLines = new ArrayList<>(); 229 | int[] range = getMethodLineRange(method, codeMapper); 230 | if (range[0] <= 0) { 231 | return annotationLines; 232 | } 233 | int startLine = range[0]; 234 | 235 | // crawl upwards until reaching an empty line or a line that does not start with an @ i.e. non annotations 236 | int lineNum = startLine - 1; 237 | while (lineNum > 0) { 238 | String code = codeMapper.getCodeLine(lineNum).trim(); 239 | if (code.startsWith("@")) { 240 | annotationLines.add(lineNum); 241 | lineNum--; 242 | } else if (code.isEmpty()) { 243 | break; 244 | } else { 245 | break; 246 | } 247 | } 248 | return annotationLines; 249 | } 250 | 251 | public static int getLineNumber(Unit unit) { 252 | if (unit == null) { 253 | return -1; 254 | } 255 | LineNumberTag tag = (LineNumberTag) unit.getTag("LineNumberTag"); 256 | if (tag != null) { 257 | return tag.getLineNumber(); 258 | } 259 | return -1; 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/matching/models/vf2/VF2State.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.matching.models.vf2; 2 | 3 | import org.pdgdiff.matching.NodeFeasibility; 4 | import org.pdgdiff.graph.GraphTraversal; 5 | import org.pdgdiff.graph.PDG; 6 | import soot.toolkits.graph.pdg.PDGNode; 7 | 8 | import java.util.*; 9 | 10 | /** 11 | * VF2State class to store the state of the VF2 algorithm. This class contains methods to store the current state 12 | * of the VF2 algorithm and perform operations on the state. 13 | */ 14 | class VF2State { 15 | private final PDG srcPdg; 16 | private final PDG dstPdg; 17 | private final Map mapping; // The current partial mapping 18 | 19 | private final Set T1; // Nodes in PDG1 that are in the mapping or adjacent to mapped nodes 20 | private final Set T2; // Same for PDG2 21 | 22 | private final Set unmappedSrcNodes; // Unmapped nodes in PDG1 (the source pdg) 23 | private final Set unmappedDstNodes; // Unmapped nodes in PDG2 (the dest pdg) 24 | 25 | public VF2State(PDG srcPdg, PDG dstPdg) { 26 | this.srcPdg = srcPdg; 27 | this.dstPdg = dstPdg; 28 | this.mapping = new LinkedHashMap<>(); 29 | 30 | this.unmappedSrcNodes = new LinkedHashSet<>(GraphTraversal.collectNodesBFS(srcPdg)); 31 | this.unmappedDstNodes = new LinkedHashSet<>(GraphTraversal.collectNodesBFS(dstPdg)); 32 | 33 | this.T1 = new LinkedHashSet<>(); 34 | this.T2 = new LinkedHashSet<>(); 35 | } 36 | 37 | public boolean isComplete() { 38 | // once one of the graphs is fully matched (hence this is subgraph isomorphism) 39 | //TODO: consider allowing this: 40 | // return mapping.size() >= Math.min(GraphTraversal.getNodeCount(srcPdg) * 0.5 , GraphTraversal.getNodeCount(dstPdg) * 0.5); 41 | return mapping.size() >= Math.min(GraphTraversal.getNodeCount(srcPdg), GraphTraversal.getNodeCount(dstPdg)); 42 | } 43 | 44 | public Map getMapping() { 45 | return mapping; 46 | } 47 | 48 | public List generateCandidates() { 49 | // TODO: If non determinism prevails, consider implementing a sort on these candidates 50 | // TODO: probably need to sort by id e.g. CFGNODE 1 sorta thing. should hopefully work, 51 | // If not implementing this here, possibly need to implement it in the matchRecursvie function. 52 | List candidates = new ArrayList<>(); 53 | 54 | if (!T1.isEmpty() && !T2.isEmpty()) { 55 | // Pick nodes from T1 and T2 56 | PDGNode n1 = selectNode(T1); 57 | for (PDGNode n2 : T2) { 58 | if (nodesAreCompatible(n1, n2)) { 59 | candidates.add(new CandidatePair(n1, n2)); 60 | } 61 | } 62 | } else { 63 | // If T1 and T2 are empty, pick any unmapped nodes 64 | PDGNode n1 = selectNode(unmappedSrcNodes); 65 | for (PDGNode n2 : unmappedDstNodes) { 66 | if (nodesAreCompatible(n1, n2)) { 67 | candidates.add(new CandidatePair(n1, n2)); 68 | } 69 | } 70 | } 71 | 72 | return candidates; 73 | } 74 | 75 | public boolean isFeasible(CandidatePair pair) { 76 | // Implement feasibility checks: 77 | // - Syntactic feasibility: node attributes match 78 | // - Semantic feasibility: the mapping is consistent with the graph structure 79 | // TODO arguably there is no point in doing checkSyntacticFeasibility here, 80 | // as this is already tested when generating the candidates. 81 | return checkSyntacticFeasibility(pair) && checkSemanticFeasibility(pair); 82 | } 83 | 84 | public void addPair(CandidatePair pair) { 85 | mapping.put(pair.n1, pair.n2); 86 | unmappedSrcNodes.remove(pair.n1); 87 | unmappedDstNodes.remove(pair.n2); 88 | 89 | // Update T1 and T2 90 | updateTerminalSets(pair.n1, pair.n2); 91 | } 92 | 93 | public void removePair(CandidatePair pair) { 94 | mapping.remove(pair.n1); 95 | unmappedSrcNodes.add(pair.n1); 96 | unmappedDstNodes.add(pair.n2); 97 | 98 | // Recalculate T1 and T2 99 | recalculateTerminalSets(); 100 | } 101 | 102 | // Helper methods... 103 | 104 | private boolean nodesAreCompatible(PDGNode n1, PDGNode n2) { 105 | // check if the nodes are of the same semantic category (Stmt, Decl, etc.), todo should move this into semantic check section. 106 | if (!NodeFeasibility.isSameNodeCategory(n1, n2)) { 107 | return false; 108 | } 109 | // checks from teh following attributes; NORMAL, ENTRY, CONDHEADER, LOOPHEADER 110 | if (!n1.getAttrib().equals(n2.getAttrib())) { 111 | return false; 112 | } 113 | 114 | return true; 115 | } 116 | 117 | 118 | private boolean checkSyntacticFeasibility(CandidatePair pair) { 119 | // Ensure that the nodes can be mapped based on their attributes 120 | return nodesAreCompatible(pair.n1, pair.n2); 121 | } 122 | 123 | private boolean checkSemanticFeasibility(CandidatePair pair) { 124 | // cmp successors in PDG1 vs mapped successors in PDG2 125 | for (PDGNode succInSrcPdg : srcPdg.getSuccsOf(pair.n1)) { 126 | PDGNode succMappedInDstPdg = this.getMapping().get(succInSrcPdg); 127 | if (succMappedInDstPdg != null) { 128 | boolean dataEdge1 = srcPdg.hasDataEdge(pair.n1, succInSrcPdg); 129 | boolean dataEdge2 = dstPdg.hasDataEdge(pair.n2, succMappedInDstPdg); 130 | if (dataEdge1 != dataEdge2) { 131 | return false; 132 | } 133 | 134 | boolean ctrlEdge1 = srcPdg.hasControlEdge(pair.n1, succInSrcPdg); 135 | boolean ctrlEdge2 = dstPdg.hasControlEdge(pair.n2, succMappedInDstPdg); 136 | if (ctrlEdge1 != ctrlEdge2) { 137 | return false; 138 | } 139 | } 140 | } 141 | 142 | // cmp predecessors in PDG1 vs. mapped predecessors in PDG2 143 | for (PDGNode predInSrcPdg : srcPdg.getPredsOf(pair.n1)) { 144 | PDGNode predMappedInDstPdg = this.getMapping().get(predInSrcPdg); 145 | if (predMappedInDstPdg != null) { 146 | boolean dataEdge1 = srcPdg.hasDataEdge(predInSrcPdg, pair.n1); 147 | boolean dataEdge2 = dstPdg.hasDataEdge(predMappedInDstPdg, pair.n2); 148 | if (dataEdge1 != dataEdge2) { 149 | return false; 150 | } 151 | 152 | boolean ctrlEdge1 = srcPdg.hasControlEdge(predInSrcPdg, pair.n1); 153 | boolean ctrlEdge2 = dstPdg.hasControlEdge(predMappedInDstPdg, pair.n2); 154 | if (ctrlEdge1 != ctrlEdge2) { 155 | return false; 156 | } 157 | } 158 | } 159 | 160 | // cross-check every existing mapping pair so that edges from (pair.n1->mappedN1) in PDG1 match edges from (pair.n2->mappedN2) in PDG2. 161 | for (Map.Entry entry : this.getMapping().entrySet()) { 162 | PDGNode alreadyMappedN1 = entry.getKey(); 163 | PDGNode alreadyMappedN2 = entry.getValue(); 164 | 165 | // Forward edges: 166 | // if PDG1 has data/control edge from (pair.n1 -> alreadyMappedN1), then PDG2 must have the same edge type from (pair.n2 -> alreadyMappedN2) 167 | boolean dataEdge1 = srcPdg.hasDataEdge(pair.n1, alreadyMappedN1); 168 | boolean dataEdge2 = dstPdg.hasDataEdge(pair.n2, alreadyMappedN2); 169 | if (dataEdge1 != dataEdge2) { 170 | return false; 171 | } 172 | boolean ctrlEdge1 = srcPdg.hasControlEdge(pair.n1, alreadyMappedN1); 173 | boolean ctrlEdge2 = dstPdg.hasControlEdge(pair.n2, alreadyMappedN2); 174 | if (ctrlEdge1 != ctrlEdge2) { 175 | return false; 176 | } 177 | 178 | // Reverse edges: 179 | // if PDG1 has data/control edge from (alreadyMappedN1 -> pair.n1), then PDG2 must have the same edge type from (alreadyMappedN2 -> pair.n2). 180 | dataEdge1 = srcPdg.hasDataEdge(alreadyMappedN1, pair.n1); 181 | dataEdge2 = dstPdg.hasDataEdge(alreadyMappedN2, pair.n2); 182 | if (dataEdge1 != dataEdge2) { 183 | return false; 184 | } 185 | ctrlEdge1 = srcPdg.hasControlEdge(alreadyMappedN1, pair.n1); 186 | ctrlEdge2 = dstPdg.hasControlEdge(alreadyMappedN2, pair.n2); 187 | if (ctrlEdge1 != ctrlEdge2) { 188 | return false; 189 | } 190 | } 191 | 192 | return true; 193 | 194 | } 195 | 196 | private void updateTerminalSets(PDGNode n1, PDGNode n2) { 197 | // Add neighbours of n1 to T1 if they are not mapped 198 | for (PDGNode neighbour : n1.getDependents()) { 199 | if (!mapping.containsKey(neighbour)) { 200 | T1.add(neighbour); 201 | } 202 | } 203 | for (PDGNode neighbour : n1.getBackDependets()) { 204 | if (!mapping.containsKey(neighbour)) { 205 | T1.add(neighbour); 206 | } 207 | } 208 | 209 | // Same for n2 210 | for (PDGNode neighbour : n2.getDependents()) { 211 | if (!mapping.containsValue(neighbour)) { 212 | T2.add(neighbour); 213 | } 214 | } 215 | for (PDGNode neighbour : n2.getBackDependets()) { 216 | if (!mapping.containsValue(neighbour)) { 217 | T2.add(neighbour); 218 | } 219 | } 220 | 221 | // Remove n1 and n2 from T1 and T2 222 | T1.remove(n1); 223 | T2.remove(n2); 224 | } 225 | 226 | private void recalculateTerminalSets() { 227 | T1.clear(); 228 | T2.clear(); 229 | for (PDGNode mappedNode1 : mapping.keySet()) { 230 | for (PDGNode neighbour : mappedNode1.getDependents()) { 231 | if (!mapping.containsKey(neighbour)) { 232 | T1.add(neighbour); 233 | } 234 | } 235 | for (PDGNode neighbour : mappedNode1.getBackDependets()) { 236 | if (!mapping.containsKey(neighbour)) { 237 | T1.add(neighbour); 238 | } 239 | } 240 | } 241 | for (PDGNode mappedNode2 : mapping.values()) { 242 | for (PDGNode neighbour : mappedNode2.getDependents()) { 243 | if (!mapping.containsValue(neighbour)) { 244 | T2.add(neighbour); 245 | } 246 | } 247 | for (PDGNode neighbour : mappedNode2.getBackDependets()) { 248 | if (!mapping.containsValue(neighbour)) { 249 | T2.add(neighbour); 250 | } 251 | } 252 | } 253 | } 254 | 255 | private PDGNode selectNode(Set nodeSet) { 256 | // TODO: implement a more sophisticated node selection strategy here 257 | // ATM return any node from the set 258 | return nodeSet.iterator().next(); 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /benchmark/evaluation-scripts/analysis_line_num_granularity.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import ast 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import seaborn as sns 6 | 7 | df = pd.read_csv("diff_results_gumtree_indiv_line_nums.csv") 8 | 9 | src_del = 'Deleted Lines (Src) (SootOK)' 10 | src_upd = 'Updated Lines (Src) (SootOk)' 11 | src_move = 'Moved Lines (Src) (SootOk)' 12 | 13 | dest_ins = 'Inserted Lines (Dst) (SootOK)' 14 | dest_upd = 'Updated Lines (Dst) (SootOk)' 15 | dest_move = 'Moved Lines (Dst) (SootOk)' 16 | 17 | # helper function to safely convert a string representation of a list into an actual list 18 | def parse_list(cell): 19 | if pd.isna(cell) or cell == "": 20 | return [] 21 | try: 22 | return ast.literal_eval(cell) if isinstance(cell, str) else cell 23 | except Exception: 24 | return [] 25 | 26 | for col in [src_del, src_upd, src_move, dest_ins, dest_upd, dest_move]: 27 | df[col] = df[col].apply(parse_list) 28 | 29 | def aggregate_sootok(row): 30 | # for gumtree, consider moved lines as well, for pdg we dont. 31 | if row["Approach"] == "GumTree": 32 | src = row[src_del] + row[src_upd] + row[src_move] 33 | dest = row[dest_ins] + row[dest_upd] + row[dest_move] 34 | else: 35 | src = row[src_del] + row[src_upd] 36 | dest = row[dest_ins] + row[dest_upd] 37 | return pd.Series({ 38 | 'Aggregated_Src_SootOk': sorted(set(src)), 39 | 'Aggregated_Dest_SootOk': sorted(set(dest)) 40 | }) 41 | 42 | df[['Aggregated_Src_SootOk', 'Aggregated_Dest_SootOk']] = df.apply(aggregate_sootok, axis=1) 43 | 44 | results = [] 45 | 46 | for (file, commit), group in df.groupby(["Changed File", "Commit ID"]): 47 | baseline = group[group["Approach"] == "GumTree"] 48 | if baseline.empty: 49 | continue 50 | baseline_row = baseline.iloc[0] 51 | 52 | baseline_lines = set(baseline_row["Aggregated_Src_SootOk"]) | set(baseline_row["Aggregated_Dest_SootOk"]) 53 | baseline_src = set(baseline_row["Aggregated_Src_SootOk"]) 54 | baseline_dest = set(baseline_row["Aggregated_Dest_SootOk"]) 55 | 56 | for idx, row in group.iterrows(): 57 | if row["Approach"] == "GumTree": 58 | continue # skip the baseline itself 59 | 60 | # union of lines for the current approach 61 | approach_lines = set(row["Aggregated_Src_SootOk"]) | set(row["Aggregated_Dest_SootOk"]) 62 | approach_src = set(row["Aggregated_Src_SootOk"]) 63 | approach_dest = set(row["Aggregated_Dest_SootOk"]) 64 | 65 | misses_src = baseline_src - approach_src 66 | misses_dest = baseline_dest - approach_dest 67 | hallucinations_src = approach_src - baseline_src 68 | hallucinations_dest = approach_dest - baseline_dest 69 | 70 | # lines that GumTree reports but the approach does not: MISS 71 | misses = baseline_lines - approach_lines 72 | # lines that the approach reports but are not in GumTree: HALLUCINATIONS 73 | hallucinations = approach_lines - baseline_lines 74 | 75 | results.append({ 76 | "Changed File": file, 77 | "Commit ID": commit, 78 | "Approach": row["Approach"], 79 | "GumTree_Count": len(baseline_lines), 80 | "Approach_Count": len(approach_lines), 81 | "Misses": len(misses), 82 | "Hallucinations": len(hallucinations), 83 | "Misses_Src": len(misses_src), 84 | "Misses_Dest": len(misses_dest), 85 | "Hallucinations_Src": len(hallucinations_src), 86 | "Hallucinations_Dest": len(hallucinations_dest), 87 | }) 88 | 89 | diff_df = pd.DataFrame(results) 90 | 91 | hybrid_rows = [] 92 | for (file, commit), group in diff_df.groupby(["Changed File", "Commit ID"]): 93 | pdg_vf2 = group[group["Approach"] == "PDGdiff-VF2"] # vf2 94 | pdg_ged = group[group["Approach"] == "PDGdiff-GED"] # ged 95 | if not pdg_vf2.empty and not pdg_ged.empty: 96 | vf2_err = pdg_vf2.iloc[0]["Misses"] + pdg_vf2.iloc[0]["Hallucinations"] 97 | ged_err = pdg_ged.iloc[0]["Misses"] + pdg_ged.iloc[0]["Hallucinations"] 98 | chosen_row = pdg_vf2.iloc[0] if vf2_err <= ged_err else pdg_ged.iloc[0] 99 | chosen_row = chosen_row.copy() 100 | chosen_row["Approach"] = "PDG-Hybrid" 101 | hybrid_rows.append(chosen_row) 102 | 103 | hybrid_df = pd.DataFrame(hybrid_rows) 104 | diff_df = pd.concat([diff_df, hybrid_df], ignore_index=True) 105 | 106 | # agg summary statistics per approach 107 | summary = diff_df.groupby("Approach").agg({ 108 | "Misses": ["mean", "sum"], 109 | "Hallucinations": ["mean", "sum"] 110 | }) 111 | print("\nSummary statistics by approach:") 112 | print(summary) 113 | 114 | print("\nOverall Misses/Hallucinations describe():") 115 | print(diff_df[["Misses", "Hallucinations"]].describe()) 116 | 117 | # sanity check only 118 | print("\nRows with negative Misses or Hallucinations (should be empty):") 119 | print(diff_df[(diff_df["Misses"] < 0) | (diff_df["Hallucinations"] < 0)]) 120 | 121 | 122 | for approach in diff_df["Approach"].unique(): 123 | group = diff_df[diff_df["Approach"] == approach] 124 | count = group.shape[0] 125 | mean_miss = group["Misses"].mean() 126 | mean_halluc = group["Hallucinations"].mean() 127 | median_miss = group["Misses"].median() 128 | median_halluc = group["Hallucinations"].median() 129 | pct80_miss = group["Misses"].quantile(0.8) 130 | pct80_halluc = group["Hallucinations"].quantile(0.8) 131 | pct90_miss = group["Misses"].quantile(0.9) 132 | pct90_halluc = group["Hallucinations"].quantile(0.9) 133 | 134 | # cmp Pearson correlation with GumTree counts. TODO: arguably this is a bit primitive as we only take counts 135 | pearson_corr = group["Approach_Count"].corr(group["GumTree_Count"]) 136 | 137 | print(f"-- {approach} --") 138 | print(f"Count (rows) : {count}") 139 | print(f"Mean Abs Error (Misses) : {mean_miss:.2f}") 140 | print(f"Median Abs Error (Misses) : {median_miss:.2f}") 141 | print(f"80th pct Abs Error (Misses): {pct80_miss:.2f}") 142 | print(f"90th pct Abs Error (Misses): {pct90_miss:.2f}") 143 | print(f"Mean Abs Error (Halluc) : {mean_halluc:.2f}") 144 | print(f"Median Hallucinations : {median_halluc:.2f}") 145 | print(f"80th pct Hallucinations : {pct80_halluc:.2f}") 146 | print(f"90th pct Hallucinations : {pct90_halluc:.2f}") 147 | 148 | print(f"Pearson correlation with GumTree: {pearson_corr:.3f}" if pd.notna(pearson_corr) else 149 | "Pearson correlation with GumTree: N/A (not enough variation or data points)") 150 | print("") 151 | 152 | 153 | approaches = sorted(diff_df["Approach"].unique()) 154 | 155 | # prep data for boxplots/violin plots 156 | data_misses = [diff_df[diff_df["Approach"] == app]["Misses"] for app in approaches] 157 | data_halluc = [diff_df[diff_df["Approach"] == app]["Hallucinations"] for app in approaches] 158 | 159 | plt.figure(figsize=(12, 6)) 160 | 161 | plt.subplot(1, 2, 1) 162 | sns.violinplot(data=diff_df, x="Approach", y="Misses", inner="quartile", hue="Approach", palette="coolwarm", cut=0) 163 | plt.title("Misses Distribution by Approach") 164 | plt.xticks(rotation=45) 165 | 166 | plt.subplot(1, 2, 2) 167 | sns.violinplot(data=diff_df, x="Approach", y="Hallucinations", inner="quartile", hue="Approach", palette="coolwarm", cut=0) 168 | plt.title("Hallucinations Distribution by Approach") 169 | plt.xticks(rotation=45) 170 | 171 | plt.tight_layout() 172 | plt.savefig("plots/violin.png", dpi=600, bbox_inches='tight') 173 | # plt.show() 174 | 175 | percentiles = np.arange(0, 101) 176 | tick_step = 5 177 | 178 | plt.figure(figsize=(12, 6)) 179 | for approach in approaches: 180 | data = diff_df[diff_df["Approach"] == approach]["Misses"] 181 | perc_values = np.percentile(data, percentiles) 182 | sns.lineplot(x=percentiles, y=perc_values, label=approach) 183 | plt.xlabel("Percentile") 184 | plt.ylabel("Misses") 185 | plt.title("Percentile Curve for Misses by Approach") 186 | plt.legend() 187 | ax = plt.gca() 188 | y_max = diff_df["Misses"].max() 189 | ax.set_yticks(np.arange(0, y_max + tick_step, tick_step)) 190 | plt.grid(True) 191 | plt.savefig("plots/misses.png", dpi=600, bbox_inches='tight') 192 | 193 | plt.figure(figsize=(12, 6)) 194 | for approach in approaches: 195 | data = diff_df[diff_df["Approach"] == approach]["Hallucinations"] 196 | perc_values = np.percentile(data, percentiles) 197 | sns.lineplot(x=percentiles, y=perc_values, label=approach) 198 | plt.xlabel("Percentile") 199 | plt.ylabel("Hallucinations") 200 | plt.title("Percentile Curve for Hallucinations by Approach") 201 | plt.legend() 202 | ax = plt.gca() 203 | y_max = diff_df["Hallucinations"].max() 204 | ax.set_yticks(np.arange(0, y_max + tick_step, tick_step)) 205 | plt.grid(True) 206 | plt.savefig("plots/hallucinations.png", dpi=600, bbox_inches='tight') 207 | 208 | summary_src_dest = diff_df.groupby("Approach").agg({ 209 | "Misses_Src": "mean", 210 | "Misses_Dest": "mean", 211 | "Hallucinations_Src": "mean", 212 | "Hallucinations_Dest": "mean" 213 | }).reset_index() 214 | 215 | print("\nAverage Values by Approach (Source vs Destination):") 216 | print(summary_src_dest) 217 | 218 | approaches = summary_src_dest["Approach"] 219 | x = np.arange(len(approaches)) 220 | width = 0.35 221 | 222 | fig, ax = plt.subplots(figsize=(10,6)) 223 | bars_src = ax.bar(x - width/2, summary_src_dest["Misses_Src"], width, label="Source Misses") 224 | bars_dest = ax.bar(x + width/2, summary_src_dest["Misses_Dest"], width, label="Destination Misses") 225 | 226 | ax.set_ylabel("Average Misses (lines)") 227 | ax.set_title("Average Misses by Approach: Source vs Destination") 228 | ax.set_xticks(x) 229 | ax.set_xticklabels(approaches) 230 | ax.legend() 231 | plt.tight_layout() 232 | plt.savefig("plots/avg_misses.png", dpi=600, bbox_inches='tight') 233 | 234 | fig, ax = plt.subplots(figsize=(10,6)) 235 | bars_src = ax.bar(x - width/2, summary_src_dest["Hallucinations_Src"], width, label="Source Hallucinations") 236 | bars_dest = ax.bar(x + width/2, summary_src_dest["Hallucinations_Dest"], width, label="Destination Hallucinations") 237 | 238 | ax.set_ylabel("Average Hallucinations (lines)") 239 | ax.set_title("Average Hallucinations by Approach: Source vs Destination") 240 | ax.set_xticks(x) 241 | ax.set_xticklabels(approaches) 242 | ax.legend() 243 | plt.tight_layout() 244 | plt.savefig("plots/avg_hallucinations.png", dpi=600, bbox_inches='tight') 245 | 246 | all_op_cols = [src_del, dest_ins, src_upd, dest_upd, src_move, dest_move] 247 | all_op_labels = ["Deleted (Src)", "Inserted (Dst)", "Updated (Src)", "Updated (Dst)", "Moved (Src)", "Moved (Dst)"] 248 | 249 | # for pdg-based, exclude moves 250 | non_move_op_cols = [src_del, dest_ins, src_upd, dest_upd] 251 | non_move_op_labels = ["Deleted (Src)", "Inserted (Dst)", "Updated (Src)", "Updated (Dst)"] 252 | 253 | def count_lines(series): 254 | return series.apply(lambda x: len(x) if isinstance(x, list) else 0).sum() 255 | 256 | op_stats = [] 257 | for approach, group in df.groupby("Approach"): 258 | if approach in ["PDGdiff-GED", "PDGdiff-VF2"]: 259 | op_sum = {} 260 | for col, label in zip(non_move_op_cols, non_move_op_labels): 261 | op_sum[label] = count_lines(group[col]) 262 | op_sum["Moved (Src)"] = 0 263 | op_sum["Moved (Dst)"] = 0 264 | total_lines = sum(op_sum.values()) 265 | percentages = {label: (op_sum[label] / total_lines) * 100 if total_lines > 0 else 0 for label in all_op_labels} 266 | else: 267 | op_sum = {} 268 | for col, label in zip(all_op_cols, all_op_labels): 269 | op_sum[label] = count_lines(group[col]) 270 | total_lines = sum(op_sum.values()) 271 | percentages = {label: (op_sum[label] / total_lines) * 100 if total_lines > 0 else 0 for label in all_op_labels} 272 | 273 | record = {"Approach": approach, "Total_Lines": total_lines} 274 | record.update(percentages) 275 | op_stats.append(record) 276 | 277 | op_stats_df = pd.DataFrame(op_stats).set_index("Approach").sort_index() 278 | 279 | print("\n--- Operation Type Percentages by Approach (SootOK columns, excluding moves for GED/VF2) ---") 280 | print(op_stats_df) 281 | 282 | plt.figure(figsize=(10,6)) 283 | approaches = op_stats_df.index.tolist() 284 | x = np.arange(len(approaches)) 285 | bottom = np.zeros(len(approaches)) 286 | 287 | for label in all_op_labels: 288 | perc = op_stats_df[label].values 289 | plt.bar(x, perc, bottom=bottom, label=label) 290 | bottom += perc 291 | 292 | plt.xticks(x, approaches, rotation=45, ha='right') 293 | plt.ylabel("Percentage (%)") 294 | plt.title("Operation Types as Percentage of Total Changed Lines (excluding lines beyond the scope of Soot)") 295 | plt.legend() 296 | plt.tight_layout() 297 | plt.savefig("plots/operation_types_percentage_stacked.png", dpi=600, bbox_inches='tight') 298 | # plt.show() 299 | 300 | 301 | 302 | 303 | operation_cols = [src_del, dest_ins, src_upd, dest_upd] 304 | operation_labels = ["Deleted (Src)", "Inserted (Dst)", "Updated (Src)", "Updated (Dst)"] 305 | 306 | # hlper: count total lines in a column (each cell is a list) 307 | def count_lines(series): 308 | return series.apply(lambda x: len(x) if isinstance(x, list) else 0).sum() 309 | 310 | # calc the total count of each operation type per approach 311 | op_summary = df.groupby("Approach").apply( 312 | lambda group: pd.Series({ 313 | label: count_lines(group[col]) 314 | for label, col in zip(operation_labels, operation_cols) 315 | }) 316 | ) 317 | 318 | op_totals = op_summary.sum(axis=1) 319 | op_percentages = op_summary.div(op_totals, axis=0) * 100 320 | 321 | fig, ax = plt.subplots(figsize=(10, 6)) 322 | approaches = op_percentages.index.tolist() 323 | x = np.arange(len(approaches)) 324 | bottom = np.zeros(len(approaches)) 325 | 326 | for label in operation_labels: 327 | percentages = op_percentages[label].values 328 | ax.bar(x, percentages, bottom=bottom, label=label) 329 | bottom += percentages 330 | 331 | ax.set_xlabel("Approach") 332 | ax.set_ylabel("Percentage (%)") 333 | ax.set_title("Operation Types as Percentage of Total Changed Lines (Excluding Moves)") 334 | ax.set_xticks(x) 335 | ax.set_xticklabels(approaches, rotation=45) 336 | ax.legend(title="Operation Type") 337 | 338 | plt.tight_layout() 339 | plt.savefig("plots/percentage_operations_no_moves_all.png", dpi=600, bbox_inches='tight') 340 | plt.show() -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/SignatureDiffGenerator.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit; 2 | 3 | import org.pdgdiff.edit.model.*; 4 | import org.pdgdiff.matching.models.heuristic.JaroWinklerSimilarity; 5 | import org.pdgdiff.util.CodeAnalysisUtils; 6 | import org.pdgdiff.util.SourceCodeMapper; 7 | import soot.Modifier; 8 | import soot.SootClass; 9 | import soot.SootMethod; 10 | 11 | import java.io.IOException; 12 | import java.util.*; 13 | 14 | public class SignatureDiffGenerator { 15 | 16 | public static class ParsedSignature { 17 | Set modifiers; 18 | String returnType; 19 | String methodName; 20 | List paramTokens; 21 | List annotations; 22 | List thrownExceptions; 23 | 24 | ParsedSignature(Set modifiers, String returnType, String methodName, List paramTokens, List annotations, List thrownExceptions) { 25 | this.modifiers = modifiers; 26 | this.returnType = returnType; 27 | this.methodName = methodName; 28 | this.paramTokens = paramTokens; 29 | this.annotations = annotations; 30 | this.thrownExceptions = thrownExceptions; 31 | } 32 | } 33 | 34 | public static ParsedSignature parseMethodSignature(SootMethod method, SourceCodeMapper mapper) throws IOException { 35 | // convert integer modifiers to a set of strings: e.g. {"public", "static"} 36 | Set modifierSet = new HashSet<>(); 37 | int mods = method.getModifiers(); 38 | String modsString = Modifier.toString(mods); // e.g. "public static final" 39 | if (!modsString.isEmpty()) { 40 | // split on whitespace to get indiv tokens 41 | modifierSet.addAll(Arrays.asList(modsString.split("\\s+"))); 42 | } 43 | 44 | String retType = method.getReturnType() != null ? method.getReturnType().toString() : ""; 45 | String name = method.getName(); 46 | 47 | List exceptionClasses = method.getExceptions(); 48 | List thrownExceptions = new ArrayList<>(); 49 | for (SootClass exception : exceptionClasses) { 50 | thrownExceptions.add(exception.getName()); 51 | } 52 | 53 | // to be populated later, no soot native way to get all the info required afaik 54 | List paramLines = new ArrayList<>(); 55 | List paramTokens = CodeAnalysisUtils.getParamTokensAndLines(method, mapper, paramLines); 56 | 57 | // Annotation tokens (e.g. "@Override") + line nums for reporting 58 | List annoLines = new ArrayList<>(); 59 | List annotations = CodeAnalysisUtils.getMethodAnnotationsWithLines(method, mapper, annoLines); 60 | 61 | return new ParsedSignature(modifierSet, retType, name, paramTokens, annotations, thrownExceptions); 62 | } 63 | 64 | 65 | static List compareSignatures( 66 | ParsedSignature oldSig, ParsedSignature newSig, 67 | SootMethod oldMethod, SootMethod newMethod, 68 | SourceCodeMapper oldMapper, SourceCodeMapper newMapper 69 | ) { 70 | List ops = new ArrayList<>(); 71 | 72 | // these are approx'd and could actually return slightly off numbers if hard to parse. 73 | int[] oldRange = CodeAnalysisUtils.getMethodLineRange(oldMethod, oldMapper); 74 | int[] newRange = CodeAnalysisUtils.getMethodLineRange(newMethod, newMapper); 75 | 76 | int oldLine = (oldRange[0] > 0) ? oldRange[0] : -1; 77 | int newLine = (newRange[0] > 0) ? newRange[0] : -1; 78 | 79 | // cmp modifiers todo test this, not sure how useful this is 80 | Set removedModifiers = new HashSet<>(oldSig.modifiers); 81 | removedModifiers.removeAll(newSig.modifiers); 82 | 83 | Set addedModifiers = new HashSet<>(newSig.modifiers); 84 | addedModifiers.removeAll(oldSig.modifiers); 85 | 86 | for (String mod : removedModifiers) { 87 | ops.add(new Delete( 88 | null, oldLine, 89 | "Removed modifier: " + mod 90 | )); 91 | } 92 | for (String mod : addedModifiers) { 93 | ops.add(new Insert( 94 | null, newLine, 95 | "Added modifier: " + mod 96 | )); 97 | } 98 | 99 | // cmp return type 100 | if (!oldSig.returnType.equals(newSig.returnType)) { 101 | SyntaxDifference diff = new SyntaxDifference( 102 | "Return type changed from " + oldSig.returnType + " to " + newSig.returnType 103 | ); 104 | ops.add( 105 | new Update(null, oldLine, newLine, 106 | oldSig.returnType, newSig.returnType, diff) 107 | ); 108 | } 109 | 110 | // cmp method name 111 | if (!oldSig.methodName.equals(newSig.methodName)) { 112 | SyntaxDifference diff = new SyntaxDifference( 113 | "Method name changed from " + oldSig.methodName + " to " + newSig.methodName 114 | ); 115 | ops.add( 116 | new Update(null, oldLine, newLine, 117 | oldSig.methodName, newSig.methodName, diff) 118 | ); 119 | } 120 | 121 | List oldParamLines = new ArrayList<>(); 122 | List oldParamTokens = CodeAnalysisUtils.getParamTokensAndLines(oldMethod, oldMapper, oldParamLines); 123 | 124 | List newParamLines = new ArrayList<>(); 125 | List newParamTokens = CodeAnalysisUtils.getParamTokensAndLines(newMethod, newMapper, newParamLines);; 126 | 127 | 128 | ops.addAll(compareStringListsDP(oldParamTokens, newParamTokens, 129 | oldParamLines, newParamLines, 130 | "Parameter changed")); 131 | // 132 | // if (oldParamLines.size() == 1 && newParamLines.size() == 1) { 133 | // // TODO : avoid accidently marking a inserted param as a insert to the entire line, if the param changed adn multiple params exist on the same li 134 | // This is debatable, if i mark just one side as an insert it will be more equatable with gumtree. However, I do think its less useful as a tool. hard to know. 135 | // if (!oldSig.paramTypes.equals(newSig.paramTypes)) { 136 | // SyntaxDifference diff = new SyntaxDifference("Parameter list changed"); 137 | // ops.add( 138 | // new Update(null, oldParamLines.get(0), newParamLines.get(0), 139 | // oldMapper.getCodeLine(oldParamLines.get(0)),newMapper.getCodeLine(newParamLines.get(0)), diff) 140 | // ); 141 | // } 142 | // } else { 143 | // // handle multi line parameters; 144 | // ops.addAll( 145 | // compareStringListsDP(oldSig.paramTypes, newSig.paramTypes, oldParamLines, newParamLines) 146 | // ); 147 | // } 148 | 149 | 150 | // List oldAnnotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(oldMethod, oldMapper); 151 | // List newAnnotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(newMethod, newMapper); 152 | // 153 | // // NB this is not accounting for field annotations. todo fix 154 | // // overwrite annotations using line numbers, unfortunately soot does not provide a way to get annotations 155 | // 156 | // oldSig.annotations = new ArrayList<>(); 157 | // newSig.annotations = new ArrayList<>(); 158 | // for (int i = 0; i < oldAnnotationLines.size(); i++) { 159 | // oldSig.annotations.add(oldMapper.getCodeLine(oldAnnotationLines.get(i))); 160 | // } 161 | // for (int i = 0; i < newAnnotationLines.size(); i++) { 162 | // newSig.annotations.add(newMapper.getCodeLine(newAnnotationLines.get(i))); 163 | // } 164 | // 165 | // 166 | // if (oldSig.annotations.size() == 1 && newSig.annotations.size() == 1) { 167 | // if (!Objects.equals(oldSig.annotations.get(0), newSig.annotations.get(0))) { 168 | // SyntaxDifference diff = new SyntaxDifference("Annotation changed"); 169 | // ops.add( 170 | // new Update(null, oldAnnotationLines.get(0), newAnnotationLines.get(0), 171 | // oldSig.annotations.get(0), newSig.annotations.get(0), diff) 172 | // ); 173 | // } 174 | // } else { 175 | // ops.addAll( 176 | // compareStringListsDP(oldSig.annotations, newSig.annotations, oldAnnotationLines, newAnnotationLines) 177 | // ); 178 | // } 179 | 180 | List oldAnnoLines = new ArrayList<>(); 181 | List oldAnnoTokens = CodeAnalysisUtils.getMethodAnnotationsWithLines(oldMethod, oldMapper, oldAnnoLines); 182 | 183 | 184 | List newAnnoLines = new ArrayList<>(); 185 | List newAnnoTokens = CodeAnalysisUtils.getMethodAnnotationsWithLines(newMethod, newMapper, newAnnoLines); 186 | 187 | ops.addAll(compareStringListsDP(oldAnnoTokens, newAnnoTokens, 188 | oldAnnoLines, newAnnoLines, 189 | "Annotation changed")); 190 | 191 | 192 | List oldExceptions = oldSig.thrownExceptions; 193 | List newExceptions = newSig.thrownExceptions; 194 | 195 | // following are being classified as deletes in order to remain more consitent with gumtree, but perhaps 196 | // they should be updates (esp based on how other bits of this impl are treating these sorta changes) 197 | 198 | Set removedExceptions = new HashSet<>(oldExceptions); 199 | removedExceptions.removeAll(newExceptions); 200 | // todo: again should this be deletes or updates... 201 | for (String ex : removedExceptions) { 202 | ops.add(new Delete(null, oldLine, "Removed exception from func sig: " + ex)); 203 | } 204 | 205 | Set addedExceptions = new HashSet<>(newExceptions); 206 | addedExceptions.removeAll(oldExceptions); 207 | // todo: again should this be inserts or updates... 208 | for (String ex : addedExceptions) { 209 | ops.add(new Insert(null, newLine, "Added exception from func sig: " + ex)); 210 | } 211 | 212 | return ops; 213 | } 214 | 215 | // left to right dynamic programming approach to try and match up parameters (or annos), basically a edit distance optimiation 216 | // nb soot gives parameter types, not names 217 | 218 | 219 | // generic DP function used for params and for annotations 220 | private static List compareStringListsDP( 221 | List oldEntries, // old parameter types or old annotation lines 222 | List newEntries, // new parameter types or new annotation lines 223 | List oldEntriesLines, // old parameter line numbers or old annotation line numbers 224 | List newEntriesLines, // new parameter line numbers or new annotation line numbers 225 | String label 226 | ) { 227 | List ops = new ArrayList<>(); 228 | int m = oldEntries.size(); 229 | int n = newEntries.size(); 230 | 231 | double[][] dp = new double[m + 1][n + 1]; 232 | String[][] opsTable = new String[m + 1][n + 1]; 233 | 234 | // init DP table 235 | for (int i = 0; i <= m; i++) { 236 | dp[i][0] = i; 237 | opsTable[i][0] = "DELETE"; 238 | } 239 | for (int j = 0; j <= n; j++) { 240 | dp[0][j] = j; 241 | opsTable[0][j] = "INSERT"; 242 | } 243 | opsTable[0][0] = "NO_CHANGE"; 244 | 245 | // fill DP 246 | for (int i = 1; i <= m; i++) { 247 | for (int j = 1; j <= n; j++) { 248 | String oldStr = oldEntries.get(i - 1); 249 | String newStr = newEntries.get(j - 1); 250 | 251 | if (oldStr.equals(newStr)) { 252 | dp[i][j] = dp[i - 1][j - 1]; 253 | opsTable[i][j] = "NO_CHANGE"; 254 | } else { 255 | double deleteCost = dp[i - 1][j] + 1; 256 | double insertCost = dp[i][j - 1] + 1; 257 | 258 | double similarity = JaroWinklerSimilarity.jaroSimilarity(oldStr, newStr); 259 | double updateCost = dp[i - 1][j - 1] + (1.0 - similarity); 260 | 261 | if (deleteCost <= insertCost && deleteCost <= updateCost) { 262 | dp[i][j] = deleteCost; 263 | opsTable[i][j] = "DELETE"; 264 | } else if (insertCost <= deleteCost && insertCost <= updateCost) { 265 | dp[i][j] = insertCost; 266 | opsTable[i][j] = "INSERT"; 267 | } else { 268 | dp[i][j] = updateCost; 269 | opsTable[i][j] = "UPDATE"; 270 | } 271 | } 272 | } 273 | } 274 | 275 | // backtrack 276 | int i = m, j = n; 277 | while (i > 0 || j > 0) { 278 | String operation = opsTable[i][j]; 279 | if ("NO_CHANGE".equals(operation)) { 280 | i--; 281 | j--; 282 | } else if ("DELETE".equals(operation)) { 283 | int oldLineNum = oldEntriesLines.get(i - 1); 284 | String entry = oldEntries.get(i - 1); 285 | ops.add(new Delete(null, oldLineNum, entry)); 286 | i--; 287 | } else if ("INSERT".equals(operation)) { 288 | int newLineNum = newEntriesLines.get(j - 1); 289 | String entry = newEntries.get(j - 1); 290 | ops.add(new Insert(null, newLineNum, entry)); 291 | j--; 292 | } else if ("UPDATE".equals(operation)) { 293 | int oldLineNum = oldEntriesLines.get(i - 1); 294 | int newLineNum = newEntriesLines.get(j - 1); 295 | String oldEntry = oldEntries.get(i - 1); 296 | String newEntry = newEntries.get(j - 1); 297 | 298 | SyntaxDifference diff = new SyntaxDifference( 299 | label + " from \"" + oldEntry + "\" to \"" + newEntry + "\"" 300 | ); 301 | ops.add(new Update(null, oldLineNum, newLineNum, oldEntry, newEntry, diff)); 302 | i--; 303 | j--; 304 | } 305 | } 306 | 307 | Collections.reverse(ops); 308 | return ops; 309 | } 310 | } 311 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/export/DiffGraphExporter.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.export; 2 | 3 | import org.pdgdiff.graph.GraphGenerator; 4 | import org.pdgdiff.graph.PDG; 5 | import org.pdgdiff.matching.GraphMapping; 6 | import org.pdgdiff.matching.NodeMapping; 7 | import soot.Unit; 8 | import soot.tagkit.LineNumberTag; 9 | import soot.toolkits.graph.pdg.PDGNode; 10 | 11 | import java.io.File; 12 | import java.io.FileWriter; 13 | import java.io.IOException; 14 | import java.io.PrintWriter; 15 | import java.util.*; 16 | import java.util.stream.Collectors; 17 | 18 | public class DiffGraphExporter { 19 | 20 | /** 21 | * This generates a singular 'delta' dot file, i.e. a way of representing the changes that have happeend on one graph and 22 | * taken it to another graph 23 | */ 24 | public static void exportDiffPDGs( 25 | GraphMapping graphMapping, 26 | List pdgListSrc, 27 | List pdgListDst, 28 | String outputDir 29 | ) { 30 | File outDir = new File(outputDir); 31 | if (!outDir.exists()) { 32 | outDir.mkdirs(); 33 | } 34 | 35 | // one pdg diff's dot file for each matched pair 36 | Map matchedPairs = graphMapping.getGraphMapping(); 37 | for (Map.Entry entry : matchedPairs.entrySet()) { 38 | PDG srcPDG = entry.getKey(); 39 | PDG dstPDG = entry.getValue(); 40 | NodeMapping nodeMapping = graphMapping.getNodeMapping(srcPDG); 41 | 42 | String srcMethod = (srcPDG.getCFG() != null) 43 | ? srcPDG.getCFG().getBody().getMethod().getName() 44 | : "UnknownSrcMethod"; 45 | String dstMethod = (dstPDG.getCFG() != null) 46 | ? dstPDG.getCFG().getBody().getMethod().getName() 47 | : "UnknownDstMethod"; 48 | 49 | String dotFileName = "diff_" + srcMethod + "_TO_" + dstMethod + ".dot"; 50 | File dotFile = new File(outDir, dotFileName); 51 | 52 | exportSingleDiffPDG(srcPDG, dstPDG, nodeMapping, dotFile); 53 | } 54 | 55 | // identify unmatched PDGs in source vs. destination 56 | List unmatchedInSrc = pdgListSrc.stream() 57 | .filter(pdg -> !matchedPairs.containsKey(pdg)) 58 | .collect(Collectors.toList()); 59 | List unmatchedInDst = pdgListDst.stream() 60 | .filter(pdg -> !matchedPairs.containsValue(pdg)) 61 | .collect(Collectors.toList()); 62 | 63 | 64 | // NB: if no match, i.e. a graph is inserted or deleted, we can't show a diff and no delta will be made. 65 | } 66 | 67 | /** 68 | * exprts a single .dot file showing the diff between one src PDG and one dst PDG 69 | * 70 | * This aims to follow similar logic to Editscriptgeneration 71 | */ 72 | private static void exportSingleDiffPDG( 73 | PDG srcPDG, 74 | PDG dstPDG, 75 | NodeMapping nodeMapping, 76 | File outputDotFile 77 | ) { 78 | try (PrintWriter writer = new PrintWriter(new FileWriter(outputDotFile))) { 79 | writer.println("digraph PDG_DIFF {"); 80 | writer.println(" rankdir=TB;"); 81 | writer.println(" node [shape=box, style=filled, fontname=Arial];"); 82 | writer.println(" edge [fontname=Arial];"); 83 | 84 | Map srcToDst = nodeMapping.getNodeMapping(); 85 | Map dstToSrc = nodeMapping.getReverseNodeMapping(); 86 | 87 | Set srcNodes = new HashSet<>(); 88 | srcPDG.iterator().forEachRemaining(srcNodes::add); 89 | Set dstNodes = new HashSet<>(); 90 | dstPDG.iterator().forEachRemaining(dstNodes::add); 91 | 92 | // map to store node details (label and color) keyed by their dot id 93 | Map nodeDataMap = new HashMap<>(); 94 | 95 | // process nodes from source PDG (matched or deleted nodes) 96 | for (PDGNode srcNode : srcNodes) { 97 | PDGNode dstNode = srcToDst.get(srcNode); 98 | String nodeId = getNodeId(srcNode, true); 99 | 100 | if (dstNode == null) { 101 | // node deleted in dst 102 | String label = removePrefix(srcNode.toString()); 103 | String color = "#FFCCCC"; // red for deletion 104 | nodeDataMap.put(nodeId, new NodeData(createNodeLabel(label, srcNode), color)); 105 | } else { 106 | // matched (poss either unchanged, moved, or updated) 107 | String label, color; 108 | if (Objects.equals(removePrefix(srcNode.toString()), removePrefix(dstNode.toString()))) { 109 | label = removePrefix(srcNode.toString()); 110 | color = "lightgrey"; // unchanged 111 | } else { 112 | label = String.format("%s!NEWLINE!----!NEWLINE!%s", 113 | removePrefix(srcNode.toString()), 114 | removePrefix(dstNode.toString())); 115 | color = "#FFCC99"; // orange for update 116 | } 117 | nodeDataMap.put(nodeId, new NodeData(createNodeLabel(label, srcNode, dstNode), color)); 118 | } 119 | } 120 | 121 | // processing nodes added in destination 122 | for (PDGNode dstNode : dstNodes) { 123 | if (!dstToSrc.containsKey(dstNode)) { 124 | String nodeId = getNodeId(dstNode, false); 125 | String label = removePrefix(dstNode.toString()); 126 | String color = "#CCFFCC"; // green for added 127 | nodeDataMap.put(nodeId, new NodeData(createNodeLabel(label, dstNode), color)); 128 | } 129 | } 130 | 131 | // process edges and record dependency labels 132 | Map> edgeMap = new HashMap<>(); 133 | Set connectedNodeIds = new HashSet<>(); 134 | 135 | // process edges from the src PDG 136 | for (PDGNode srcNode : srcNodes) { 137 | for (PDGNode succ : srcPDG.getSuccsOf(srcNode)) { 138 | String srcId = getMergedNodeId(srcNode, true, srcToDst); 139 | String tgtId = getMergedNodeId(succ, true, srcToDst); 140 | EdgeKey key = new EdgeKey(srcId, tgtId); 141 | 142 | // get dependency types for the edge in srcPDG. 143 | List depTypes = srcPDG.getEdgeLabels(srcNode, succ); 144 | String depLabel = depTypes.stream() 145 | .map(DiffGraphExporter::mapDependencyType) 146 | .collect(Collectors.joining(",")); 147 | edgeMap.computeIfAbsent(key, k -> new HashSet<>()).add("src:" + depLabel); 148 | 149 | connectedNodeIds.add(srcId); 150 | connectedNodeIds.add(tgtId); 151 | } 152 | } 153 | 154 | // process edges from the dest PDG 155 | for (PDGNode dstNode : dstNodes) { 156 | for (PDGNode succ : dstPDG.getSuccsOf(dstNode)) { 157 | String srcId = getMergedNodeId(dstNode, false, dstToSrc); 158 | String tgtId = getMergedNodeId(succ, false, dstToSrc); 159 | EdgeKey key = new EdgeKey(srcId, tgtId); 160 | 161 | List depTypes = dstPDG.getEdgeLabels(dstNode, succ); 162 | String depLabel = depTypes.stream() 163 | .map(DiffGraphExporter::mapDependencyType) 164 | .collect(Collectors.joining(",")); 165 | edgeMap.computeIfAbsent(key, k -> new HashSet<>()).add("dst:" + depLabel); 166 | 167 | connectedNodeIds.add(srcId); 168 | connectedNodeIds.add(tgtId); 169 | } 170 | } 171 | 172 | for (String nodeId : connectedNodeIds) { 173 | NodeData data = nodeDataMap.get(nodeId); 174 | if (data != null) { 175 | writer.printf(" %s [label=%s, fillcolor=\"%s\"];%n", 176 | nodeId, data.label, data.color); 177 | } 178 | } 179 | 180 | // write edges with colour and label 181 | for (Map.Entry> entry : edgeMap.entrySet()) { 182 | EdgeKey key = entry.getKey(); 183 | Set sources = entry.getValue(); 184 | String color; 185 | if (sources.stream().anyMatch(s -> s.startsWith("src:")) 186 | && sources.stream().anyMatch(s -> s.startsWith("dst:"))) { 187 | color = "black"; 188 | } else if (sources.stream().anyMatch(s -> s.startsWith("src:"))) { 189 | color = "red"; 190 | } else { 191 | color = "green"; 192 | } 193 | String edgeLabel = sources.stream() 194 | .map(s -> s.substring(4)) 195 | .distinct() 196 | .collect(Collectors.joining("/")); 197 | writer.printf(" %s -> %s [color=%s, label=\"%s\"];%n", 198 | key.srcId, key.tgtId, color, edgeLabel); 199 | } 200 | 201 | writer.println("}"); 202 | System.out.println("Created PDG diff: " + outputDotFile.getAbsolutePath()); 203 | } catch (IOException e) { 204 | e.printStackTrace(); 205 | } 206 | } 207 | 208 | // this is overloaded, depending on update or single-line number operation 209 | private static String createNodeLabel(String originalLabel, PDGNode node) { 210 | return createNodeLabel(originalLabel, node, null); 211 | } 212 | 213 | 214 | private static String createNodeLabel(String originalLabel, PDGNode node1, PDGNode node2) { 215 | int lineNum = getNodeLineNumber(node1); 216 | int lineNum2 = -1; 217 | if (node2 != null) { 218 | lineNum2 = getNodeLineNumber(node2); 219 | } 220 | String safeLabel = escape(originalLabel); 221 | 222 | StringBuilder sb = new StringBuilder(); 223 | sb.append("<"); 224 | sb.append("").append(safeLabel).append(""); 225 | if (lineNum != -1 && lineNum2 == -1) { 226 | sb.append("
") 227 | .append("") 228 | .append("Line: ").append(lineNum) 229 | .append(""); 230 | } else if(lineNum != -1) { 231 | sb.append("
") 232 | .append("") 233 | .append("Line: ").append(lineNum) 234 | .append(" -> ") 235 | .append("Line: ").append(lineNum2) 236 | .append(""); 237 | } 238 | 239 | sb.append(">"); 240 | return sb.toString(); 241 | } 242 | 243 | // helper classes and methods 244 | 245 | private static int getNodeLineNumber(PDGNode node) { 246 | if (node.getType() == PDGNode.Type.CFGNODE) { 247 | Object underlying = node.getNode(); 248 | if (underlying instanceof Unit) { 249 | Unit unit = (Unit) underlying; 250 | LineNumberTag tag = (LineNumberTag) unit.getTag("LineNumberTag"); 251 | if (tag != null) { 252 | return tag.getLineNumber(); 253 | } 254 | } 255 | } 256 | return -1; 257 | } 258 | 259 | private static class NodeData { 260 | String label; 261 | String color; 262 | NodeData(String label, String color) { 263 | this.label = label; 264 | this.color = color; 265 | } 266 | } 267 | 268 | private static String mapDependencyType(GraphGenerator.DependencyTypes depType) { 269 | if (depType == GraphGenerator.DependencyTypes.CONTROL_DEPENDENCY) { 270 | return "CTRL_DEP"; 271 | } else if (depType == GraphGenerator.DependencyTypes.DATA_DEPENDENCY) { 272 | return "DATA_DEP"; 273 | } else { 274 | return "UNKNOWN"; 275 | } 276 | } 277 | 278 | // generates a node ID 279 | private static String getNodeId(PDGNode node, boolean isSrc) { 280 | String prefix = isSrc ? "SRC_" : "DST_"; 281 | return prefix + System.identityHashCode(node); 282 | } 283 | 284 | // generates a node ID for merged nodes 285 | private static String getMergedNodeId(PDGNode node, boolean isSourceNode, Map mapping) { 286 | PDGNode mappedNode = mapping.get(node); 287 | if (mappedNode != null) { 288 | if (isSourceNode) { 289 | return getNodeId(node, true); 290 | } else { 291 | return getNodeId(mappedNode, true); 292 | } 293 | } else { 294 | return getNodeId(node, isSourceNode); 295 | } 296 | } 297 | 298 | // removes the prefix from a node label 299 | private static String removePrefix(String label) { 300 | String prefix = "Type: CFGNODE: "; 301 | return label.startsWith(prefix) ? label.substring(prefix.length()) : label; 302 | } 303 | 304 | // for dot formatting 305 | private static String escape(String text) { 306 | return text.replace("<", "<") 307 | .replace(">", ">") 308 | .replace("\"", "\\\"") 309 | .replace("!NEWLINE!", "
"); 310 | 311 | } 312 | 313 | private static class EdgeKey { 314 | final String srcId; 315 | final String tgtId; 316 | 317 | EdgeKey(String srcId, String tgtId) { 318 | this.srcId = srcId; 319 | this.tgtId = tgtId; 320 | } 321 | 322 | // for comp 323 | @Override 324 | public boolean equals(Object o) { 325 | if (this == o) return true; 326 | if (o == null || getClass() != o.getClass()) return false; 327 | EdgeKey edgeKey = (EdgeKey) o; 328 | return Objects.equals(srcId, edgeKey.srcId) && Objects.equals(tgtId, edgeKey.tgtId); 329 | } 330 | 331 | @Override 332 | public int hashCode() { 333 | return Objects.hash(srcId, tgtId); 334 | } 335 | } 336 | } 337 | -------------------------------------------------------------------------------- /src/main/java/org/pdgdiff/edit/EditScriptGenerator.java: -------------------------------------------------------------------------------- 1 | package org.pdgdiff.edit; 2 | 3 | import org.pdgdiff.edit.model.*; 4 | import org.pdgdiff.graph.PDG; 5 | import org.pdgdiff.matching.GraphMapping; 6 | import org.pdgdiff.matching.NodeMapping; 7 | import org.pdgdiff.util.CodeAnalysisUtils; 8 | import org.pdgdiff.util.SourceCodeMapper; 9 | import soot.SootMethod; 10 | import soot.Unit; 11 | import soot.toolkits.graph.pdg.PDGNode; 12 | 13 | import java.io.IOException; 14 | import java.util.*; 15 | import java.util.stream.Collectors; 16 | 17 | import org.pdgdiff.edit.SignatureDiffGenerator.ParsedSignature; 18 | 19 | import static org.pdgdiff.edit.SignatureDiffGenerator.compareSignatures; 20 | import static org.pdgdiff.edit.SignatureDiffGenerator.parseMethodSignature; 21 | import static org.pdgdiff.graph.GraphTraversal.collectNodesBFS; 22 | 23 | /** 24 | * Generates edit scripts based on PDG node mappings. 25 | */ 26 | public class EditScriptGenerator { 27 | 28 | public static List generateEditScript( 29 | PDG srcPDG, 30 | PDG dstPDG, 31 | GraphMapping graphMapping, 32 | String srcSourceFilePath, 33 | String dstSourceFilePath, 34 | SootMethod srcMethod, 35 | SootMethod destMethod 36 | ) throws IOException { 37 | // using a set to prevent duplicates (order does not matter for now). 38 | Set editScriptSet = new HashSet<>(); 39 | 40 | SourceCodeMapper srcCodeMapper = new SourceCodeMapper(srcSourceFilePath); 41 | SourceCodeMapper dstCodeMapper = new SourceCodeMapper(dstSourceFilePath); 42 | 43 | NodeMapping nodeMapping = graphMapping.getNodeMapping(srcPDG); 44 | 45 | Map mappings = nodeMapping.getNodeMapping(); 46 | Set srcNodesMapped = mappings.keySet(); 47 | Set dstNodesMapped = new HashSet<>(mappings.values()); 48 | 49 | Set visitedNodes = new HashSet<>(); 50 | 51 | // process mapped nodes for updates or moves 52 | for (PDGNode srcNode : srcNodesMapped) { 53 | PDGNode dstNode = mappings.get(srcNode); 54 | 55 | if (!visitedNodes.contains(srcNode)) { 56 | ComparisonResult compResult = nodesAreEqual(srcNode, dstNode, visitedNodes, srcCodeMapper, dstCodeMapper, nodeMapping); 57 | 58 | if (!compResult.isEqual) { 59 | if (compResult.isMove) { 60 | int oldLineNumber = getNodeLineNumber(srcNode); 61 | int newLineNumber = getNodeLineNumber(dstNode); 62 | String codeSnippet = srcCodeMapper.getCodeLine(oldLineNumber); 63 | editScriptSet.add(new Move(srcNode, oldLineNumber, newLineNumber, codeSnippet)); 64 | } else if (!compResult.syntaxDifferences.isEmpty()) { 65 | for (SyntaxDifference syntaxDiff : compResult.syntaxDifferences) { 66 | int oldLineNumber = syntaxDiff.getOldLineNumber(); 67 | int newLineNumber = syntaxDiff.getNewLineNumber(); 68 | String oldCodeSnippet = syntaxDiff.getOldCodeSnippet(); 69 | String newCodeSnippet = syntaxDiff.getNewCodeSnippet(); 70 | if (oldCodeSnippet.equals(newCodeSnippet)) { 71 | Move move = new Move(srcNode, oldLineNumber, newLineNumber, oldCodeSnippet); 72 | editScriptSet.add(move); 73 | } else { 74 | Update update = new Update(srcNode, oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, syntaxDiff); 75 | editScriptSet.add(update); 76 | } 77 | } 78 | } 79 | } 80 | } 81 | } 82 | 83 | // handle deletions 84 | for (PDGNode srcNode : srcPDG) { 85 | if (!srcNodesMapped.contains(srcNode) && !visitedNodes.contains(srcNode)) { 86 | int lineNumber = getNodeLineNumber(srcNode); 87 | String codeSnippet = srcCodeMapper.getCodeLine(lineNumber); 88 | editScriptSet.add(new Delete(srcNode, lineNumber, codeSnippet)); 89 | } 90 | } 91 | 92 | // handle insertions 93 | for (PDGNode dstNode : dstPDG) { 94 | if (!dstNodesMapped.contains(dstNode) && !visitedNodes.contains(dstNode)) { 95 | int lineNumber = getNodeLineNumber(dstNode); 96 | String codeSnippet = dstCodeMapper.getCodeLine(lineNumber); 97 | editScriptSet.add(new Insert(dstNode, lineNumber, codeSnippet)); 98 | } 99 | } 100 | 101 | // structural signature diff, happens in every case to account for annotations changing even if signature itself doesnt. 102 | ParsedSignature oldSig = parseMethodSignature(srcMethod, srcCodeMapper); 103 | ParsedSignature newSig = parseMethodSignature(destMethod, dstCodeMapper); 104 | 105 | // misleading naming here , should probably rename to something including annotations 106 | List signatureDiffs = 107 | compareSignatures(oldSig, newSig, srcMethod, destMethod, srcCodeMapper, dstCodeMapper); 108 | 109 | editScriptSet.addAll(signatureDiffs); 110 | 111 | return new ArrayList<>(editScriptSet); 112 | } 113 | 114 | 115 | public static List generateAddScript(PDG pdg, String sourceFilePath, SootMethod method) throws IOException { 116 | SourceCodeMapper codeMapper = new SourceCodeMapper(sourceFilePath); 117 | List editOperations = new ArrayList<>(); 118 | 119 | // insert the method signature lines (approx.), handling for annoataions 120 | int[] methodRange = CodeAnalysisUtils.getMethodLineRange(method, codeMapper); 121 | List annotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(method, codeMapper); 122 | if (!annotationLines.isEmpty() && Collections.min(annotationLines) < methodRange[0]) { 123 | methodRange[0] = Collections.min(annotationLines); 124 | } 125 | if (methodRange[0] > 0 && methodRange[1] >= methodRange[0]) { 126 | for (int i = methodRange[0]; i <= methodRange[1]; i++) { 127 | String signatureLine = codeMapper.getCodeLine(i); 128 | editOperations.add(new Insert(null, i, signatureLine)); 129 | } 130 | } 131 | 132 | editOperations.addAll( 133 | collectNodesBFS(pdg).stream() 134 | .map(node -> { 135 | int lineNumber = getNodeLineNumber(node); 136 | String codeSnippet = codeMapper.getCodeLine(lineNumber); 137 | return new Insert(node, lineNumber, codeSnippet); 138 | }) 139 | .collect(Collectors.toList()) 140 | ); 141 | 142 | 143 | // attempt to insert a trailing closing paren 144 | int maxLine = editOperations.stream() 145 | .mapToInt(op -> { 146 | PDGNode node = op.getNode(); 147 | return node == null ? -1 : getNodeLineNumber(node); 148 | }) 149 | .max() 150 | .orElse(-1); 151 | int nextLine = maxLine + 1; 152 | if (nextLine <= codeMapper.getTotalLines()) { 153 | String content = codeMapper.getCodeLine(nextLine).trim(); 154 | if (content.contains("}")) { 155 | editOperations.add(new Insert(null, nextLine, content)); 156 | } 157 | } 158 | 159 | return editOperations; 160 | } 161 | 162 | public static List generateDeleteScript(PDG pdg, String sourceFilePath, SootMethod method) throws IOException { 163 | SourceCodeMapper codeMapper = new SourceCodeMapper(sourceFilePath); 164 | List editOperations = new ArrayList<>(); 165 | 166 | // delete the method signature lines (approx.) 167 | int[] methodRange = CodeAnalysisUtils.getMethodLineRange(method, codeMapper); 168 | List annotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(method, codeMapper); 169 | if (!annotationLines.isEmpty() && Collections.min(annotationLines) < methodRange[0]) { 170 | methodRange[0] = Collections.min(annotationLines); 171 | } 172 | if (methodRange[0] > 0 && methodRange[1] >= methodRange[0]) { 173 | for (int i = methodRange[0]; i <= methodRange[1]; i++) { 174 | String signatureLine = codeMapper.getCodeLine(i); 175 | editOperations.add(new Delete(null, i, signatureLine)); 176 | } 177 | } 178 | 179 | editOperations.addAll( 180 | collectNodesBFS(pdg).stream() 181 | .map(node -> { 182 | int lineNumber = getNodeLineNumber(node); 183 | String codeSnippet = codeMapper.getCodeLine(lineNumber); 184 | return new Delete(node, lineNumber, codeSnippet); 185 | }) 186 | .collect(Collectors.toList()) 187 | ); 188 | 189 | 190 | // attempt to delete a trailing closing paren 191 | int maxLine = editOperations.stream() 192 | .mapToInt(op -> { 193 | PDGNode node = op.getNode(); 194 | return node == null ? -1 : getNodeLineNumber(node); 195 | }) 196 | .max() 197 | .orElse(-1); 198 | int nextLine = maxLine + 1; 199 | if (nextLine <= codeMapper.getTotalLines()) { 200 | String content = codeMapper.getCodeLine(nextLine).trim(); 201 | if (content.contains("}")) { 202 | editOperations.add(new Delete(null, nextLine, content)); 203 | } 204 | } 205 | 206 | return editOperations; 207 | } 208 | 209 | 210 | 211 | private static class ComparisonResult { 212 | public boolean isEqual; 213 | public boolean isMove; 214 | public Set syntaxDifferences; 215 | 216 | public ComparisonResult(boolean isEqual) { 217 | this.isEqual = isEqual; 218 | this.isMove = false; 219 | this.syntaxDifferences = new HashSet<>(); 220 | } 221 | 222 | public ComparisonResult(boolean isEqual, boolean isMove, Set syntaxDifferences) { 223 | this.isEqual = isEqual; 224 | this.isMove = isMove; 225 | this.syntaxDifferences = syntaxDifferences; 226 | } 227 | } 228 | 229 | 230 | public static int getNodeLineNumber(PDGNode node) { 231 | if (node.getType() == PDGNode.Type.CFGNODE) { 232 | Unit headUnit = (Unit) node.getNode(); 233 | return getLineNumber(headUnit); 234 | } 235 | return -1; 236 | } 237 | 238 | private static ComparisonResult nodesAreEqual(PDGNode n1, PDGNode n2, Set visitedNodes, 239 | SourceCodeMapper srcCodeMapper, SourceCodeMapper dstCodeMapper, 240 | NodeMapping nodeMapping) { 241 | if (visitedNodes.contains(n1)) { 242 | return new ComparisonResult(true); 243 | } 244 | visitedNodes.add(n1); 245 | visitedNodes.add(n2); 246 | 247 | if (!n1.getType().equals(n2.getType())) { 248 | return new ComparisonResult(false); 249 | } 250 | 251 | if (n1.getType() == PDGNode.Type.CFGNODE) { 252 | return compareCFGNodes(n1, n2, srcCodeMapper, dstCodeMapper); 253 | } 254 | 255 | return new ComparisonResult(true); 256 | } 257 | 258 | private static ComparisonResult compareCFGNodes(PDGNode n1, PDGNode n2, 259 | SourceCodeMapper srcCodeMapper, SourceCodeMapper dstCodeMapper) { 260 | Unit unit1 = (Unit) n1.getNode(); 261 | Unit unit2 = (Unit) n2.getNode(); 262 | 263 | List units1 = Collections.singletonList(unit1); 264 | List units2 = Collections.singletonList(unit2); 265 | 266 | Set differences = compareUnitLists(units1, units2, srcCodeMapper, dstCodeMapper); 267 | 268 | if (!differences.isEmpty()) { 269 | return new ComparisonResult(false, false, differences); 270 | } else { 271 | // check for move operations based on line numbers 272 | int lineNumber1 = getNodeLineNumber(n1); 273 | int lineNumber2 = getNodeLineNumber(n2); 274 | if (lineNumber1 != lineNumber2 && lineNumber1 != -1 && lineNumber2 != -1) { 275 | return new ComparisonResult(false, true, differences); 276 | } 277 | } 278 | 279 | return new ComparisonResult(true); 280 | } 281 | 282 | private static Set compareUnitLists(List units1, List units2, 283 | SourceCodeMapper srcCodeMapper, SourceCodeMapper dstCodeMapper) { 284 | Set differences = new HashSet<>(); 285 | 286 | int i = 0, j = 0; 287 | while (i < units1.size() && j < units2.size()) { 288 | Unit unit1 = units1.get(i); 289 | Unit unit2 = units2.get(j); 290 | 291 | if (unitsAreEqual(unit1, unit2)) { 292 | i++; 293 | j++; 294 | } else { 295 | SyntaxDifference diff = new SyntaxDifference(unit1, unit2, srcCodeMapper, dstCodeMapper); 296 | differences.add(diff); 297 | i++; 298 | j++; 299 | } 300 | } 301 | 302 | // handle remaining units in units1 (deletions) 303 | while (i < units1.size()) { 304 | SyntaxDifference diff = new SyntaxDifference(units1.get(i), null, srcCodeMapper, dstCodeMapper); 305 | differences.add(diff); 306 | i++; 307 | } 308 | 309 | // handle remaining units in units2 (insertions) 310 | while (j < units2.size()) { 311 | SyntaxDifference diff = new SyntaxDifference(null, units2.get(j), srcCodeMapper, dstCodeMapper); 312 | differences.add(diff); 313 | j++; 314 | } 315 | 316 | return differences; 317 | } 318 | 319 | private static boolean unitsAreEqual(Unit unit1, Unit unit2) { 320 | if (unit1 == null || unit2 == null) { 321 | return false; 322 | } 323 | // compares the actual body representation of the units 324 | return unit1.toString().equals(unit2.toString()); 325 | } 326 | 327 | private static int getLineNumber(Unit unit) { 328 | return CodeAnalysisUtils.getLineNumber(unit); 329 | } 330 | } 331 | --------------------------------------------------------------------------------