├── images
    ├── overview.png
    ├── text-based.jpg
    └── refactoredgraph.png
├── .gitmodules
├── .idea
    ├── .gitignore
    ├── vcs.xml
    ├── encodings.xml
    ├── misc.xml
    └── uiDesigner.xml
├── src
    └── main
    │   └── java
    │       └── org
    │           └── pdgdiff
    │               ├── matching
    │                   ├── models
    │                   │   ├── vf2
    │                   │   │   ├── CandidatePair.java
    │                   │   │   ├── VF2Matcher.java
    │                   │   │   └── VF2State.java
    │                   │   ├── ged
    │                   │   │   ├── GEDResult.java
    │                   │   │   ├── HungarianAlgorithm.java
    │                   │   │   └── GEDMatcher.java
    │                   │   ├── GEDGraphMatcher.java
    │                   │   ├── UllmannGraphMatcher.java
    │                   │   ├── heuristic
    │                   │   │   └── JaroWinklerSimilarity.java
    │                   │   ├── VF2GraphMatcher.java
    │                   │   └── ullmann
    │                   │   │   └── UllmannMatcher.java
    │                   ├── GraphMatcher.java
    │                   ├── GraphMatcherFactory.java
    │                   ├── StrategySettings.java
    │                   ├── NodeFeasibility.java
    │                   ├── NodeMapping.java
    │                   ├── GraphMapping.java
    │                   └── DiffEngine.java
    │               ├── io
    │                   ├── JsonOperationSerializer.java
    │                   ├── OperationFormatter.java
    │                   ├── OperationSerializer.java
    │                   └── JsonOperationFormatter.java
    │               ├── edit
    │                   ├── model
    │                   │   ├── EditOperation.java
    │                   │   ├── Delete.java
    │                   │   ├── Insert.java
    │                   │   ├── Move.java
    │                   │   ├── Update.java
    │                   │   └── SyntaxDifference.java
    │                   ├── EditDistanceCalculator.java
    │                   ├── ClassMetadataDiffGenerator.java
    │                   ├── SignatureDiffGenerator.java
    │                   └── EditScriptGenerator.java
    │               ├── export
    │                   ├── ExportUtils.java
    │                   ├── EditScriptExporter.java
    │                   └── DiffGraphExporter.java
    │               ├── util
    │                   ├── SourceCodeMapper.java
    │                   ├── SootInitializer.java
    │                   └── CodeAnalysisUtils.java
    │               ├── graph
    │                   ├── PDG.java
    │                   ├── GraphTraversal.java
    │                   ├── GraphExporter.java
    │                   ├── GraphGenerator.java
    │                   └── CycleDetection.java
    │               └── testclasses
    │                   ├── TestFileBefore.java
    │                   └── TestFileAfter.java
├── .gitignore
├── py-visualise
    ├── templates
    │   └── index.html
    ├── static
    │   └── css
    │   │   └── style.css
    └── app.py
├── pom.xml
├── README.md
└── benchmark
    └── evaluation-scripts
        └── analysis_line_num_granularity.py


/images/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/syntax/PDGdiff/main/images/overview.png


--------------------------------------------------------------------------------
/images/text-based.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/syntax/PDGdiff/main/images/text-based.jpg


--------------------------------------------------------------------------------
/images/refactoredgraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/syntax/PDGdiff/main/images/refactoredgraph.png


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "benchmark/datasets"]
2 | 	path = benchmark/datasets
3 | 	url = https://github.com/syntax/datasets.git
4 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |     <mapping directory="$PROJECT_DIR$/benchmark/datasets" vcs="Git" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding">
4 |     <file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
5 |     <file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/vf2/CandidatePair.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models.vf2;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | class CandidatePair {
 6 |     PDGNode n1;
 7 |     PDGNode n2;
 8 | 
 9 |     public CandidatePair(PDGNode n1, PDGNode n2) {
10 |         this.n1 = n1;
11 |         this.n2 = n2;
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/ged/GEDResult.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models.ged;
 2 | 
 3 | import org.pdgdiff.matching.NodeMapping;
 4 | 
 5 | public class GEDResult {
 6 |     public final double distance;
 7 |     public final NodeMapping nodeMapping;
 8 | 
 9 |     public GEDResult(double distance, NodeMapping nodeMapping) {
10 |         this.distance = distance;
11 |         this.nodeMapping = nodeMapping;
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ExternalStorageConfigurationManager" enabled="true" />
 4 |   <component name="MavenProjectsManager">
 5 |     <option name="originalFiles">
 6 |       <list>
 7 |         <option value="$PROJECT_DIR$/pom.xml" />
 8 |       </list>
 9 |     </option>
10 |   </component>
11 |   <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
12 |     <output url="file://$PROJECT_DIR$/out" />
13 |   </component>
14 | </project>


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/io/JsonOperationSerializer.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.io;
 2 | 
 3 | import org.pdgdiff.edit.model.EditOperation;
 4 | import org.pdgdiff.matching.StrategySettings;
 5 | 
 6 | import java.io.Writer;
 7 | import java.util.List;
 8 | 
 9 | public class JsonOperationSerializer extends OperationSerializer {
10 | 
11 |     public JsonOperationSerializer(List<EditOperation> editScript, StrategySettings settings) {
12 |         super(editScript, settings);
13 |     }
14 | 
15 |     @Override
16 |     protected OperationFormatter newFormatter(Writer writer) {
17 |         return new JsonOperationFormatter(writer);
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/model/EditOperation.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.edit.model;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | public abstract class EditOperation {
 6 |     protected PDGNode node;
 7 | 
 8 |     public EditOperation(PDGNode node) {
 9 |         this.node = node;
10 |     }
11 | 
12 |     public PDGNode getNode() {
13 |         return node;
14 |     }
15 | 
16 |     public abstract String getName();
17 | 
18 |     @Override
19 |     public abstract String toString();
20 | 
21 |     // following are to prevent duplicate entries in edit scripts
22 |     @Override
23 |     public abstract boolean equals(Object obj);
24 | 
25 |     @Override
26 |     public abstract int hashCode();
27 | }
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | !**/src/main/**/target/
 4 | !**/src/test/**/target/
 5 | 
 6 | ### IntelliJ IDEA ###
 7 | .idea/modules.xml
 8 | .idea/jarRepositories.xml
 9 | .idea/compiler.xml
10 | .idea/libraries/
11 | *.iws
12 | *.iml
13 | *.ipr
14 | 
15 | ### Eclipse ###
16 | .apt_generated
17 | .classpath
18 | .factorypath
19 | .project
20 | .settings
21 | .springBeans
22 | .sts4-cache
23 | 
24 | ### NetBeans ###
25 | /nbproject/private/
26 | /nbbuild/
27 | /dist/
28 | /nbdist/
29 | /.nb-gradle/
30 | build/
31 | !**/src/main/**/build/
32 | !**/src/test/**/build/
33 | 
34 | ### VS Code ###
35 | .vscode/
36 | 
37 | ### Mac OS ###
38 | .DS_Store
39 | 
40 | out/*
41 | py-visualise/out/*
42 | py-visualise/testclasses/*


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/io/OperationFormatter.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.io;
 2 | 
 3 | import org.pdgdiff.edit.model.*;
 4 | import org.pdgdiff.matching.StrategySettings;
 5 | 
 6 | public interface OperationFormatter {
 7 | 
 8 |     void writeInfo(StrategySettings settings) throws Exception;
 9 | 
10 |     void startOutput() throws Exception;
11 |     void endOutput() throws Exception;
12 | 
13 |     void startOperations() throws Exception;
14 |     void endOperations() throws Exception;
15 | 
16 |     void insertOperation(Insert operation) throws Exception;
17 |     void deleteOperation(Delete operation) throws Exception;
18 |     void updateOperation(Update operation) throws Exception;
19 |     void moveOperation(Move operation) throws Exception;
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/EditDistanceCalculator.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.edit;
 2 | 
 3 | import org.pdgdiff.edit.model.EditOperation;
 4 | 
 5 | import java.util.List;
 6 | 
 7 | public class EditDistanceCalculator {
 8 | 
 9 |     public static int calculateEditDistance(List<EditOperation> editScript) {
10 |         int distance = 0;
11 |         for (EditOperation op : editScript) {
12 |             switch (op.getName()) {
13 |                 case "Insert":
14 |                 case "Delete":
15 |                 case "Update":
16 |                 case "Move":
17 |                     distance += 1;
18 |                     break;
19 |                 default:
20 |                     break;
21 |             }
22 |         }
23 |         return distance;
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/export/ExportUtils.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.export;
 2 | 
 3 | import java.nio.charset.StandardCharsets;
 4 | import java.security.MessageDigest;
 5 | import java.security.NoSuchAlgorithmException;
 6 | 
 7 | public class ExportUtils {
 8 | 
 9 |     public static String generateHash(String methodName) {
10 |         try {
11 |             MessageDigest digest = MessageDigest.getInstance("SHA-256");
12 |             byte[] hashBytes = digest.digest(methodName.getBytes(StandardCharsets.UTF_8));
13 |             StringBuilder hexString = new StringBuilder();
14 |             for (byte b : hashBytes) {
15 |                 hexString.append(String.format("%02x", b));
16 |             }
17 |             return hexString.toString();
18 |         } catch (NoSuchAlgorithmException e) {
19 |             e.printStackTrace();
20 |             return methodName + ": failed to generate hash for method name!";
21 |         }
22 |     }
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/GraphMatcher.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching;
 2 | 
 3 | import org.pdgdiff.graph.PDG;
 4 | 
 5 | import java.util.HashSet;
 6 | import java.util.List;
 7 | 
 8 | /**
 9 |  * GraphMatcher abstract class to compare two lists of PDGs and find similarities, aiming to create a 1:1 mapping between
10 |  * PDGs from the two lists. A PDGs represents a methods, with each list representing a different class
11 |  */
12 | public abstract class GraphMatcher {
13 |     protected final HashSet<PDG> matchedPDGs;
14 |     protected List<PDG> srcPdgs;
15 |     protected List<PDG> dstPdgs;
16 |     protected GraphMapping graphMapping; // to store graph-level and node-level mappings
17 | 
18 |     public GraphMatcher(List<PDG> srcPdgs, List<PDG> dstPdgs) {
19 |         this.srcPdgs = srcPdgs;
20 |         this.dstPdgs = dstPdgs;
21 |         this.graphMapping = new GraphMapping();
22 |         this.matchedPDGs = new HashSet<>();
23 |     }
24 | 
25 |     public abstract GraphMapping matchPDGLists();
26 | }


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/GraphMatcherFactory.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching;
 2 | 
 3 | import org.pdgdiff.graph.PDG;
 4 | import org.pdgdiff.matching.models.GEDGraphMatcher;
 5 | import org.pdgdiff.matching.models.UllmannGraphMatcher;
 6 | import org.pdgdiff.matching.models.VF2GraphMatcher;
 7 | 
 8 | import java.util.List;
 9 | 
10 | public class GraphMatcherFactory {
11 | 
12 |     public enum MatchingStrategy {
13 |         VF2,
14 |         ULLMANN,
15 |         GED
16 |     }
17 | 
18 |     public static GraphMatcher createMatcher(MatchingStrategy strategy, List<PDG> srcPDGs, List<PDG> destPDGs) {
19 |         switch (strategy) {
20 |             case VF2:
21 |                 return new VF2GraphMatcher(srcPDGs, destPDGs);
22 |             case ULLMANN:
23 |                 return new UllmannGraphMatcher(srcPDGs, destPDGs);
24 |             case GED:
25 |                 return new GEDGraphMatcher(srcPDGs, destPDGs);
26 |             default:
27 |                 throw new IllegalArgumentException("Unknown matching strategy: " + strategy);
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/StrategySettings.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching;
 2 | 
 3 | import org.pdgdiff.edit.RecoveryProcessor;
 4 | 
 5 | public class StrategySettings {
 6 |     protected RecoveryProcessor.RecoveryStrategy recoveryStrategy;
 7 |     protected GraphMatcherFactory.MatchingStrategy matchingStrategy;
 8 |     protected boolean aggregateRecovery;
 9 | 
10 |     public StrategySettings(RecoveryProcessor.RecoveryStrategy recoveryStrategy, GraphMatcherFactory.MatchingStrategy matchingStrategy, boolean aggregateRecovery) {
11 |         this.recoveryStrategy = recoveryStrategy;
12 |         this.matchingStrategy = matchingStrategy;
13 |         this.aggregateRecovery = aggregateRecovery;
14 |     }
15 | 
16 |     public RecoveryProcessor.RecoveryStrategy getRecoveryStrategy() {
17 |         return recoveryStrategy;
18 |     }
19 | 
20 |     public GraphMatcherFactory.MatchingStrategy getMatchingStrategy() {
21 |         return matchingStrategy;
22 |     }
23 | 
24 | 
25 |     public boolean isAggregateRecovery() {
26 |         return aggregateRecovery;
27 |     }
28 | 
29 |     public void setRecoveryStrategy(RecoveryProcessor.RecoveryStrategy recoveryStrategy) {
30 |         this.recoveryStrategy = recoveryStrategy;
31 |     }
32 | 
33 |     public void setMatchingStrategy(GraphMatcherFactory.MatchingStrategy matchingStrategy) {
34 |         this.matchingStrategy = matchingStrategy;
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/util/SourceCodeMapper.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.util;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.FileReader;
 5 | import java.io.IOException;
 6 | import java.util.HashMap;
 7 | 
 8 | /**
 9 |  * Maps line numbers to source code snippets for naive parsing when Soot struggles to define a line number for a unit.
10 |  * Used for later hositing of elements when constructing the Edit Script at the source level.
11 |  */
12 | public class SourceCodeMapper {
13 |     private HashMap<Integer, String> lineNumberToCodeMap;
14 | 
15 |     public SourceCodeMapper(String sourceFilePath) throws IOException {
16 |         lineNumberToCodeMap = new HashMap<>();
17 |         loadSourceCode(sourceFilePath);
18 |     }
19 | 
20 |     private void loadSourceCode(String sourceFilePath) throws IOException {
21 |         BufferedReader reader = new BufferedReader(new FileReader(sourceFilePath));
22 |         String line;
23 |         int lineNumber = 1;
24 |         while ((line = reader.readLine()) != null) {
25 |             lineNumberToCodeMap.put(lineNumber, line);
26 |             lineNumber++;
27 |         }
28 |         reader.close();
29 |     }
30 | 
31 |     public String getCodeLine(int lineNumber) {
32 |         return lineNumberToCodeMap.getOrDefault(lineNumber, "");
33 |     }
34 | 
35 |     public int getTotalLines() {
36 |         return lineNumberToCodeMap.size();
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/model/Delete.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.edit.model;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | import java.util.Objects;
 6 | 
 7 | public class Delete extends EditOperation {
 8 |     private int lineNumber;
 9 |     private String codeSnippet;
10 | 
11 |     public Delete(PDGNode node, int lineNumber, String codeSnippet) {
12 |         super(node);
13 |         this.lineNumber = lineNumber;
14 |         this.codeSnippet = codeSnippet;
15 |     }
16 | 
17 |     public PDGNode getNode() { return node; }
18 | 
19 |     public int getLineNumber() {
20 |         return lineNumber;
21 |     }
22 | 
23 |     public String getCodeSnippet() {
24 |         return codeSnippet;
25 |     }
26 | 
27 |     @Override
28 |     public String getName() {
29 |         return "Delete";
30 |     }
31 | 
32 |     @Override
33 |     public String toString() {
34 |         return String.format("Delete at line %d: %s", lineNumber, codeSnippet);
35 |     }
36 | 
37 |     @Override
38 |     public boolean equals(Object obj) {
39 |         if (this == obj) return true;
40 |         if (!(obj instanceof Delete)) return false;
41 |         Delete other = (Delete) obj;
42 |         return lineNumber == other.lineNumber &&
43 |                 Objects.equals(codeSnippet, other.codeSnippet);
44 |     }
45 | 
46 |     @Override
47 |     public int hashCode() {
48 |         return Objects.hash(lineNumber, codeSnippet);
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/model/Insert.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.edit.model;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | import java.util.Objects;
 6 | 
 7 | public class Insert extends EditOperation {
 8 |     private int lineNumber;
 9 |     private String codeSnippet;
10 | 
11 |     public Insert(PDGNode node, int lineNumber, String codeSnippet) {
12 |         super(node);
13 |         this.lineNumber = lineNumber;
14 |         this.codeSnippet = codeSnippet;
15 |     }
16 | 
17 |     public PDGNode getNode() { return node; }
18 | 
19 |     public int getLineNumber() {
20 |         return lineNumber;
21 |     }
22 | 
23 |     public String getCodeSnippet() {
24 |         return codeSnippet;
25 |     }
26 | 
27 |     @Override
28 |     public String getName() {
29 |         return "Insert";
30 |     }
31 | 
32 |     @Override
33 |     public String toString() {
34 |         return String.format("Insert at line %d: %s", lineNumber, codeSnippet);
35 |     }
36 | 
37 |     @Override
38 |     public boolean equals(Object obj) {
39 |         if (this == obj) return true;
40 |         if (!(obj instanceof Insert)) return false;
41 |         Insert other = (Insert) obj;
42 |         return lineNumber == other.lineNumber &&
43 |                 Objects.equals(codeSnippet, other.codeSnippet);
44 |     }
45 | 
46 |     @Override
47 |     public int hashCode() {
48 |         return Objects.hash(lineNumber, codeSnippet);
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/py-visualise/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>Code Diff Viewer</title>
 7 |     <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
 8 |     <style>
 9 |         .preformatted {
10 |             white-space: pre-wrap;
11 |             font-family: monospace;
12 |         }
13 |     </style>
14 | </head>
15 | <body>
16 |     <h1>Code Difference Viewer</h1>
17 | 
18 |     <div class="diff-container">
19 |         <div class="file-content" id="class1">
20 |             <h3>TestFileBefore.java</h3>
21 |             <div class="preformatted">{{ class1 | safe }}</div>
22 |         </div>
23 |         <div class="file-content" id="class2">
24 |             <h3>TestFileAfter.java</h3>
25 |             <div class="preformatted">{{ class2 | safe }}</div>
26 |         </div>
27 |     </div>
28 | 
29 |     <h2>Highlighted Differences Summary</h2>
30 |     <div>
31 |         {% for diff in diffs %}
32 |             <div>
33 |                 <h4>Old Line {{ diff.oldLine }}:</h4>
34 |                 <div class="preformatted">{{ diff.oldCode | safe }}</div>
35 |             </div>
36 |             <div>
37 |                 <h4>New Line {{ diff.newLine }}:</h4>
38 |                 <div class="preformatted">{{ diff.newCode | safe }}</div>
39 |             </div>
40 |             <hr>
41 |         {% endfor %}
42 |     </div>
43 | </body>
44 | </html>
45 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/graph/PDG.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.graph;
 2 | 
 3 | import soot.toolkits.graph.HashMutableEdgeLabelledDirectedGraph;
 4 | import soot.toolkits.graph.UnitGraph;
 5 | import soot.toolkits.graph.pdg.PDGNode;
 6 | 
 7 | import java.util.List;
 8 | 
 9 | /**
10 |  * Program Dependency Graph (PDG) class that extends the Soot HashMutableEdgeLabelledDirectedGraph. this is
11 |  * similar to the soot HashMutablePDG, but removes some abstractions that remove the granularity of the PDG and allows
12 |  * for specific edge types to be added to the graph.
13 |  */
14 | public class PDG extends HashMutableEdgeLabelledDirectedGraph<PDGNode, GraphGenerator.DependencyTypes> {
15 |     private UnitGraph cfg = null;
16 |     protected PDGNode startNode = null;
17 | 
18 |     public PDG() {
19 |         super();
20 |     }
21 | 
22 |     public void setCFG(UnitGraph cfg) {
23 |         this.cfg = cfg;
24 |     }
25 | 
26 |     public UnitGraph getCFG() {
27 |         return cfg;
28 |     }
29 | 
30 |     public PDGNode getStartNode() {
31 |         return startNode;
32 |     }
33 | 
34 |     public boolean hasDataEdge(PDGNode src, PDGNode tgt) {
35 |         return this.containsEdge(src, tgt, GraphGenerator.DependencyTypes.DATA_DEPENDENCY);
36 |     }
37 | 
38 |     public boolean hasControlEdge(PDGNode src, PDGNode tgt) {
39 |         return this.containsEdge(src, tgt, GraphGenerator.DependencyTypes.CONTROL_DEPENDENCY);
40 |     }
41 | 
42 |     public List<GraphGenerator.DependencyTypes> getEdgeLabels(PDGNode src, PDGNode tgt) {
43 |         return this.getLabelsForEdges(src, tgt);
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/io/OperationSerializer.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.io;
 2 | 
 3 | import org.pdgdiff.edit.model.*;
 4 | import org.pdgdiff.matching.StrategySettings;
 5 | 
 6 | import java.io.Writer;
 7 | import java.util.List;
 8 | 
 9 | public abstract class OperationSerializer {
10 |     protected List<EditOperation> editScript;
11 |     protected StrategySettings settings;
12 | 
13 | 
14 |     public OperationSerializer(List<EditOperation> editScript, StrategySettings settings) {
15 |         this.editScript = editScript;
16 |         this.settings = settings;
17 |     }
18 | 
19 |     protected abstract OperationFormatter newFormatter(Writer writer) throws Exception;
20 | 
21 |     public void writeTo(Writer writer) throws Exception {
22 |         OperationFormatter formatter = newFormatter(writer);
23 | 
24 |         formatter.startOutput();
25 | 
26 |         if (settings != null) {
27 |             formatter.writeInfo(settings);
28 |         }
29 | 
30 |         formatter.startOperations();
31 |         for (EditOperation op : editScript) {
32 |             if (op instanceof Insert) {
33 |                 formatter.insertOperation((Insert) op);
34 |             } else if (op instanceof Delete) {
35 |                 formatter.deleteOperation((Delete) op);
36 |             } else if (op instanceof Update) {
37 |                 formatter.updateOperation((Update) op);
38 |             } else if (op instanceof Move) {
39 |                 formatter.moveOperation((Move) op);
40 |             }
41 |         }
42 |         formatter.endOperations();
43 |         formatter.endOutput();
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/NodeFeasibility.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | public class NodeFeasibility {
 6 |     public static boolean isSameNodeCategory(PDGNode n1, PDGNode n2) {
 7 |         // get unit for each node
 8 |         Object node1 = n1.getNode();
 9 |         Object node2 = n2.getNode();
10 | 
11 |         // check for abstract syntax categories
12 |         return (isStatement(node1) && isStatement(node2)) ||
13 |                 (isDeclaration(node1) && isDeclaration(node2)) ||
14 |                 (isControlFlowNode(node1) && isControlFlowNode(node2)) ||
15 |                 (isDataNode(node1) && isDataNode(node2));
16 |     }
17 | 
18 |     private static boolean isStatement(Object node) {
19 |         return node instanceof soot.jimple.Stmt;
20 |     }
21 | 
22 |     private static boolean isDeclaration(Object node) {
23 |         if (node instanceof soot.Value) {
24 |             soot.Value value = (soot.Value) node;
25 | 
26 |             // check for local variables
27 |             if (value instanceof soot.jimple.internal.JimpleLocal) {
28 |                 return true;
29 |             }
30 | 
31 |             // check for field references
32 |             return value instanceof soot.jimple.InstanceFieldRef || value instanceof soot.jimple.StaticFieldRef;
33 |         }
34 |         return false;
35 |     }
36 | 
37 |     private static boolean isControlFlowNode(Object node) {
38 |         return node instanceof soot.jimple.IfStmt || node instanceof soot.jimple.SwitchStmt;
39 |     }
40 | 
41 |     private static boolean isDataNode(Object node) {
42 |         return node instanceof soot.jimple.AssignStmt || node instanceof soot.jimple.ArrayRef;
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/model/Move.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.edit.model;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | import java.util.Objects;
 6 | 
 7 | /**
 8 |  * Represents a move operation in the edit script.
 9 |  */
10 | public class Move extends EditOperation {
11 |     private int oldLineNumber;
12 |     private int newLineNumber;
13 |     private String codeSnippet;
14 | 
15 |     public Move(PDGNode node, int oldLineNumber, int newLineNumber, String codeSnippet) {
16 |         super(node);
17 |         this.oldLineNumber = oldLineNumber;
18 |         this.newLineNumber = newLineNumber;
19 |         this.codeSnippet = codeSnippet;
20 |     }
21 | 
22 |     public int getOldLineNumber() {
23 |         return oldLineNumber;
24 |     }
25 | 
26 |     public int getNewLineNumber() {
27 |         return newLineNumber;
28 |     }
29 | 
30 |     public String getCodeSnippet() {
31 |         return codeSnippet;
32 |     }
33 | 
34 |     @Override
35 |     public String getName() {
36 |         return "Move";
37 |     }
38 | 
39 |     @Override
40 |     public String toString() {
41 |         return String.format("Move from line %d to line %d: %s", oldLineNumber, newLineNumber, codeSnippet);
42 |     }
43 | 
44 |     @Override
45 |     public boolean equals(Object obj) {
46 |         if (this == obj) return true;
47 |         if (!(obj instanceof Move)) return false;
48 |         Move other = (Move) obj;
49 |         return oldLineNumber == other.oldLineNumber &&
50 |                 newLineNumber == other.newLineNumber &&
51 |                 Objects.equals(codeSnippet, other.codeSnippet);
52 |     }
53 | 
54 |     @Override
55 |     public int hashCode() {
56 |         return Objects.hash(oldLineNumber, newLineNumber, codeSnippet);
57 |     }
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/py-visualise/static/css/style.css:
--------------------------------------------------------------------------------
 1 | .highlight-change1-old {
 2 |     background-color: #ffcccc;
 3 |     color: #990000;
 4 | }
 5 | 
 6 | .highlight-change1-new {
 7 |     background-color: #ccffcc;
 8 |     color: #006600;
 9 | }
10 | 
11 | .highlight-change2-old {
12 |     background-color: #ccccff;
13 |     color: #000099;
14 | }
15 | 
16 | .highlight-change2-new {
17 |     background-color: #ffccf2;
18 |     color: #cc0099;
19 | }
20 | 
21 | .highlight-change3-old {
22 |     background-color: #ffccff;
23 |     color: #990099;
24 | }
25 | 
26 | .highlight-change3-new {
27 |     background-color: #ebccff;
28 |     color: #6600cc;
29 | }
30 | 
31 | .highlight-change4-old {
32 |     background-color: #ffeecc;
33 |     color: #996633;
34 | }
35 | 
36 | .highlight-change4-new {
37 |     background-color: #ccffff;
38 |     color: #003366;
39 | }
40 | 
41 | .highlight-change5-old {
42 |     background-color: #e6e6ff;
43 |     color: #3333cc;
44 | }
45 | 
46 | .highlight-change5-new {
47 |     background-color: #e6ffe6;
48 |     color: #339933;
49 | }
50 | 
51 | .highlight-change6-old {
52 |     background-color: #ffe6e6;
53 |     color: #cc3333;
54 | }
55 | 
56 | .highlight-change6-new {
57 |     background-color: #e6ffff;
58 |     color: #339999;
59 | }
60 | 
61 | .diff-container {
62 |     display: flex;
63 |     gap: 20px;
64 | }
65 | 
66 | .deleted-line {
67 |     text-decoration: line-through;
68 |     background-color: #ffccf2;
69 |     color: red;
70 | }
71 | 
72 | .inserted-line {
73 |     background-color: #d4fcbc;
74 |     color: green;
75 | }
76 | 
77 | .file-content {
78 |     width: 45%;
79 |     padding: 10px;
80 |     border: 1px solid #ddd;
81 |     background-color: #f9f9f9;
82 |     white-space: pre-wrap;
83 |     font-family: monospace;
84 |     overflow-x: auto;
85 | }
86 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/NodeMapping.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | import java.util.HashMap;
 6 | import java.util.Map;
 7 | 
 8 | /**
 9 |  * NodeMapping class to store mappings between nodes in two PDGs. This class is used to store the mapping between
10 |  * nodes in two PDGs that have been matched by the GraphMatcher.
11 |  */
12 | public class NodeMapping {
13 |     private Map<PDGNode, PDGNode> nodeMapping;
14 |     private Map<PDGNode, PDGNode> reverseNodeMapping;
15 | 
16 |     public NodeMapping() {
17 |         nodeMapping = new HashMap<>();
18 |         reverseNodeMapping = new HashMap<>();
19 |     }
20 | 
21 |     // adds a mapping between a source node and a destination node
22 |     public void addMapping(PDGNode srcNode, PDGNode dstNode) {
23 |         nodeMapping.put(srcNode, dstNode);
24 |         reverseNodeMapping.put(dstNode, srcNode);
25 |     }
26 | 
27 |     // exposes the entire node mapping
28 |     public Map<PDGNode, PDGNode> getNodeMapping() {
29 |         return nodeMapping;
30 |     }
31 | 
32 |     // exposes the reverse node mapping, useful for backwarsd traverse
33 |     public Map<PDGNode, PDGNode> getReverseNodeMapping() {
34 |         return reverseNodeMapping;
35 |     }
36 | 
37 |     // print all node mappings for debugging
38 |     public void printMappings() {
39 |         for (Map.Entry<PDGNode, PDGNode> entry : nodeMapping.entrySet()) {
40 |             System.out.println("Source Node: " + entry.getKey()
41 |                     + " --> Mapped to: " + entry.getValue());
42 |         }
43 |     }
44 | 
45 |     public boolean isEmpty() {
46 |         return nodeMapping.isEmpty();
47 |     }
48 | 
49 |     public int size() {
50 |         return nodeMapping.size();
51 |     }
52 | }


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/GraphMapping.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching;
 2 | 
 3 | import org.pdgdiff.graph.PDG;
 4 | 
 5 | import java.util.HashMap;
 6 | import java.util.Map;
 7 | 
 8 | /**
 9 |  * GraphMapping class to store mappings between PDGs. This class is used to store the mapping between
10 |  * PDGs in two lists that have been matched by the GraphMatcher. For each PDG mapping, a node mapping
11 |  * is also stored.
12 |  */
13 | public class GraphMapping {
14 |     private Map<PDG, PDG> graphMapping;
15 |     private Map<PDG, NodeMapping> nodeMappings;
16 | 
17 |     public GraphMapping() {
18 |         this.graphMapping = new HashMap<>();
19 |         this.nodeMappings = new HashMap<>();
20 |     }
21 | 
22 |     public void addGraphMapping(PDG srcPDG, PDG dstPDG, NodeMapping nodeMapping) {
23 |         graphMapping.put(srcPDG, dstPDG);
24 |         nodeMappings.put(srcPDG, nodeMapping);
25 |     }
26 | 
27 |     // retrieves the node mapping for a given PDG pair
28 |     public NodeMapping getNodeMapping(PDG srcPDG) {
29 |         return nodeMappings.get(srcPDG);
30 |     }
31 | 
32 |     // exposes the entire graph mapping
33 |     public Map<PDG, PDG> getGraphMapping() {
34 |         return graphMapping;
35 |     }
36 | 
37 |     // pretty print all graph mappings for debugging (redundant otherwise)
38 |     public void printGraphMappings() {
39 |         for (Map.Entry<PDG, PDG> entry : graphMapping.entrySet()) {
40 |             System.out.println("Source PDG: " + entry.getKey() + " --> Mapped to: " + entry.getValue());
41 |             NodeMapping nodeMapping = nodeMappings.get(entry.getKey());
42 |             if (nodeMapping != null) {
43 |                 System.out.println("Node Mappings for this PDG:");
44 |                 nodeMapping.printMappings();
45 |             }
46 |         }
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/graph/GraphTraversal.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.graph;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | import java.util.*;
 6 | 
 7 | /**
 8 |  * this class is used to traverse the graph using bfs and collect all nodes. This acts as a helper function for other
 9 |  * methods, especially when order of nodes in the graph is an important consideration.
10 | 
11 |  */
12 | public class GraphTraversal {
13 | 
14 |     private static boolean debug = false;
15 | 
16 |     public static void setLogging(boolean enable) {
17 |         debug = enable;
18 |     }
19 | 
20 |     public static List<PDGNode> collectNodesBFS(PDG pdg) {
21 |         if (debug) System.out.println("[BFS] Traversing graph");
22 | 
23 |         PDGNode start_node = pdg.getStartNode();
24 |         List<PDGNode> nodeList = new ArrayList<>();
25 | 
26 |         if (start_node == null) {
27 |             if (debug) System.out.println("[BFS] No start node found in the PDG.");
28 |             return nodeList;
29 |         }
30 | 
31 |         Queue<PDGNode> queue = new LinkedList<>();
32 |         Set<PDGNode> visited = new HashSet<>();
33 | 
34 |         queue.add(start_node);
35 |         visited.add(start_node);
36 |         nodeList.add(start_node);
37 | 
38 |         // begin BFS
39 |         while (!queue.isEmpty()) {
40 |             PDGNode current_node = queue.poll();
41 |             if (debug) System.out.println("[BFS] Visiting node: " + current_node.toShortString());
42 | 
43 |             // add dependents to the queue
44 |             List<PDGNode> dependents = current_node.getDependents();
45 |             for (PDGNode dependent : dependents) {
46 |                 if (!visited.contains(dependent)) {
47 |                     queue.add(dependent);
48 |                     visited.add(dependent);
49 |                     nodeList.add(dependent);
50 |                 }
51 |             }
52 |         }
53 | 
54 |         if (debug) System.out.println("[BFS] BFS Graph traversal complete.");
55 |         return nodeList;
56 |     }
57 | 
58 |     public static int getNodeCount(PDG pdg) {
59 |         List<PDGNode> nodeList = collectNodesBFS(pdg);
60 |         return nodeList.size();
61 |     }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/vf2/VF2Matcher.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models.vf2;
 2 | 
 3 | import org.pdgdiff.matching.NodeMapping;
 4 | import org.pdgdiff.graph.PDG;
 5 | import soot.toolkits.graph.pdg.PDGNode;
 6 | 
 7 | import java.util.Map;
 8 | 
 9 | /**
10 |  * VF2Matcher class to perform graph matching using the VF2 algorithm. This class contains methods to match two PDGs
11 |  * using the VF2 algorithm and return the node mappings between the two PDGs.
12 |  */
13 | public class VF2Matcher {
14 |     private final PDG srcPdg;
15 |     private final PDG dstPdg;
16 |     private final NodeMapping nodeMapping;
17 | 
18 |     public VF2Matcher(PDG srcPdg, PDG dstPdg) {
19 |         this.srcPdg = srcPdg;
20 |         this.dstPdg = dstPdg;
21 |         this.nodeMapping = new NodeMapping();
22 |     }
23 | 
24 |     public NodeMapping match() {
25 |         // Initialize state
26 |         VF2State state = new VF2State(srcPdg, dstPdg);
27 |         // Start recursive matching
28 |         if (matchRecursive(state)) {
29 |             return nodeMapping;
30 |         } else {
31 |             // No isomorphism found
32 |             return null;
33 |         }
34 |     }
35 | 
36 | 
37 |     // TODO: investigate vf2, i believe it to be too strict. need to figure out way of mapping methods -> methods if they
38 |     // TODO: are similar 'ish', dont' need exact matches when I am effectively looking at version differences.
39 |     private boolean matchRecursive(VF2State state) {
40 |         if (state.isComplete()) {
41 |             // Mapping is complete, transfer mappings to nodeMapping
42 |             for (Map.Entry<PDGNode, PDGNode> entry : state.getMapping().entrySet()) {
43 |                 nodeMapping.addMapping(entry.getKey(), entry.getValue());
44 |             }
45 |             return true;
46 |         }
47 | 
48 |         // Generate candidate pairs
49 |         for (CandidatePair pair : state.generateCandidates()) {
50 |             if (state.isFeasible(pair)) {
51 |                 state.addPair(pair);
52 |                 if (matchRecursive(state)) {
53 |                     return true;
54 |                 }
55 |                 state.removePair(pair);
56 |             }
57 |         }
58 |         return false;
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/testclasses/TestFileBefore.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.testclasses;
 2 | 
 3 | public class TestFileBefore {
 4 | 
 5 |     private int onefield;
 6 | 
 7 |     public String anotherfield;
 8 | 
 9 |     public static void main(String[] args) {
10 |         TestFileBefore test = new TestFileBefore();
11 |         int result = test.addNumbers(5, 10);
12 |         System.out.println("Result: " + result);
13 |         int res = test.minus(10, 5);
14 |         System.out.println("Result: " + res);
15 | 
16 |         int complexRes = test.detailedComputation(5, 10);
17 |         System.out.println("Detailed Computation Result: " + complexRes);
18 |         int t = test.identical(5, 10);
19 |         System.out.println("identical Result: " + t);
20 |     }
21 | 
22 |     public int identical(int num1, int num2) {
23 |         int result = 0;
24 | 
25 |         // Conditional statements
26 |         if (num1 > num2) {
27 |             result = num1 + num2;
28 |         } else if (num1 < num2) {
29 |             result = num1 - num2;
30 |         } else {
31 |             result = num1 * num2;
32 |         }
33 |         return result;
34 |     }
35 | 
36 |     public int addNumbers(int a, int b) {
37 |         int toadd1 = a;
38 |         int toadd2 = b;
39 |         int sum = toadd1 + toadd2;
40 |         return sum;
41 |     }
42 | 
43 |     public int minus(int a, int b) {
44 |         int sum = a - b;
45 |         return sum;
46 |     }
47 | 
48 |     // added these more complex classes with more intense control flow and non-matching names to try and catch edge cases
49 |     public int detailedComputation(int num1, int num2) {
50 |         int result = 0;
51 | 
52 |         // Conditional statements
53 |         if (num1 > num2) {
54 |             result = num1 + num2;
55 |         } else if (num1 < num2) {
56 |             result = num1 - num2;
57 |         } else {
58 |             result = num1 * num2;
59 |         }
60 | 
61 |         // Loop that performs additional operations
62 |         for (int i = 0; i < 4; i++) {
63 |             result -= i;
64 |             if (result % 3 == 0) {
65 |                 result /= 3;
66 |             } else {
67 |                 result += i * 2;
68 |             }
69 |         }
70 | 
71 |         // Nested conditional inside a loop
72 |         for (int i = 0; i < 6; i++) {
73 |             if (i % 2 == 1) {
74 |                 result *= i;
75 |             }
76 |         }
77 | 
78 |         return result;
79 |     }
80 | 
81 | }


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/model/Update.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.edit.model;
 2 | 
 3 | import soot.toolkits.graph.pdg.PDGNode;
 4 | 
 5 | import java.util.Objects;
 6 | 
 7 | /**
 8 |  * Represents an update operation in the edit script.
 9 |  */
10 | public class Update extends EditOperation {
11 |     private int oldLineNumber;
12 |     private int newLineNumber;
13 |     private String oldCodeSnippet;
14 |     private String newCodeSnippet;
15 |     private SyntaxDifference syntaxDifference;
16 | 
17 |     public Update(PDGNode node, int oldLineNumber, int newLineNumber,
18 |                   String oldCodeSnippet, String newCodeSnippet,
19 |                   SyntaxDifference syntaxDifference) {
20 |         super(node);
21 |         this.oldLineNumber = oldLineNumber;
22 |         this.newLineNumber = newLineNumber;
23 |         this.oldCodeSnippet = oldCodeSnippet;
24 |         this.newCodeSnippet = newCodeSnippet;
25 |         this.syntaxDifference = syntaxDifference;
26 |     }
27 | 
28 |     public int getOldLineNumber() {
29 |         return oldLineNumber;
30 |     }
31 | 
32 |     public int getNewLineNumber() {
33 |         return newLineNumber;
34 |     }
35 | 
36 |     public String getOldCodeSnippet() {
37 |         return oldCodeSnippet;
38 |     }
39 | 
40 |     public String getNewCodeSnippet() {
41 |         return newCodeSnippet;
42 |     }
43 | 
44 |     public SyntaxDifference getSyntaxDifference() {
45 |         return syntaxDifference;
46 |     }
47 | 
48 |     @Override
49 |     public String getName() {
50 |         return "Update";
51 |     }
52 | 
53 |     @Override
54 |     public String toString() {
55 |         return String.format("Update at lines %d -> %d:\nOld Code: %s\nNew Code: %s\nDifference: %s",
56 |                 oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, syntaxDifference);
57 |     }
58 | 
59 |     @Override
60 |     public boolean equals(Object obj) {
61 |         if (this == obj) return true;
62 |         if (!(obj instanceof Update)) return false;
63 |         Update other = (Update) obj;
64 |         return oldLineNumber == other.oldLineNumber &&
65 |                 newLineNumber == other.newLineNumber &&
66 |                 Objects.equals(oldCodeSnippet, other.oldCodeSnippet) &&
67 |                 Objects.equals(newCodeSnippet, other.newCodeSnippet) &&
68 |                 Objects.equals(syntaxDifference, other.syntaxDifference);
69 |     }
70 | 
71 |     @Override
72 |     public int hashCode() {
73 |         return Objects.hash(oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, syntaxDifference);
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/testclasses/TestFileAfter.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.testclasses;
 2 | 
 3 | public class TestFileAfter {
 4 | 
 5 |     private int thefield;
 6 | 
 7 |     public static void main(String[] args) {
 8 |         TestFileAfter test = new TestFileAfter();
 9 |         int number1 = 5;
10 |         int number2 = 4;
11 |         int result = test.addNumbers(number1, number2);
12 |         System.out.println("Result: " + result);
13 |         int product = test.multiplyNumbers(number1, number2);
14 |         System.out.println("Product: " + product);
15 | 
16 | 
17 | 
18 |         int complexResult = test.complexCalculation(number1, number2);
19 |         int identical_out = test.identical(3,10);
20 |         System.out.println("Complex Calculation Result: " + complexResult);
21 |         System.out.println("identical Result: " + identical_out);
22 |     }
23 | 
24 |     public int addNumbers(int number, int number2) {
25 |         int sum = number + number2;
26 |         return sum;
27 |     }
28 | 
29 |     public int multiplyNumbers(int number, int number2) {
30 |         int product = number * number2;
31 |         return product;
32 |     }
33 | 
34 |     // added these more complex classes with more intense control flow and non-matching names to try and catch edge cases
35 |     public int complexCalculation(int num1, int num2) {
36 |         int result = 0;
37 | 
38 |         // Conditional statements
39 |         if (num1 > num2) {
40 |             result = num1 - num2;
41 |         } else if (num1 < num2) {
42 |             result = num1 + num2;
43 |         } else {
44 |             result = num1 * num2;
45 |         }
46 | 
47 |         // Loop that performs additional operations
48 |         for (int i = 0; i < 3; i++) {
49 |             result += i;
50 |             if (result % 2 == 0) {
51 |                 result /= 2;
52 |             } else {
53 |                 result *= 3;
54 |             }
55 |         }
56 | 
57 |         // Nested conditional inside a loop
58 |         for (int i = 0; i < 5; i++) {
59 |             if (i % 2 == 0) {
60 |                 result += i;
61 |             } else {
62 |                 result -= i;
63 |             }
64 |         }
65 | 
66 |         return result;
67 |     }
68 | 
69 |     public int identical(int num1, int num2) {
70 |         int result = 0;
71 | 
72 |         // Conditional statements
73 |         if (num1 > num2) {
74 |             result = num1 + num2;
75 |         } else if (num1 < num2) {
76 |             result = num1 - num2;
77 |         } else {
78 |             result = num1 * num2;
79 |         }
80 |         return result;
81 |     }
82 | }


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/GEDGraphMatcher.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models;
 2 | 
 3 | import org.pdgdiff.graph.PDG;
 4 | import org.pdgdiff.matching.GraphMapping;
 5 | import org.pdgdiff.matching.GraphMatcher;
 6 | import org.pdgdiff.matching.NodeMapping;
 7 | import org.pdgdiff.matching.models.ged.GEDMatcher;
 8 | import org.pdgdiff.matching.models.ged.GEDResult;
 9 | 
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | 
13 | /**
14 |  * A GraphMatcher that uses a Graph Edit Distance approach to
15 |  * match PDGs from the source and dest file.  Similar "outer loop" to VF2GraphMatcher,
16 |  * but calls GEDMatcher internally for each PDG pair.
17 |  */
18 | public class GEDGraphMatcher extends GraphMatcher {
19 | 
20 |     public GEDGraphMatcher(List<PDG> srcPdgs, List<PDG> dstPdgs) {
21 |         super(srcPdgs, dstPdgs);
22 |     }
23 | 
24 |     @Override
25 |     public GraphMapping matchPDGLists() {
26 |         List<PDG> unmappedSrcPdgs = new ArrayList<>(srcPdgs);
27 |         List<PDG> unmappedDstPdgs = new ArrayList<>(dstPdgs);
28 | 
29 |         while (!unmappedSrcPdgs.isEmpty() && !unmappedDstPdgs.isEmpty()) {
30 |             double minDistance = Double.POSITIVE_INFINITY;
31 |             PDG bestSrcPdg = null;
32 |             PDG bestDstPdg = null;
33 |             NodeMapping bestNodeMapping = null;
34 | 
35 |             // for each unmatched PDG in src and dest, compute the minimal graph-edit distance
36 |             for (PDG srcPdg : unmappedSrcPdgs) {
37 |                 for (PDG dstPdg : unmappedDstPdgs) {
38 |                     GEDMatcher ged = new GEDMatcher(srcPdg, dstPdg);
39 |                     GEDResult result = ged.match();  // get (distance, nodeMapping)
40 | 
41 |                     if (result != null && result.distance < minDistance) {
42 |                         minDistance = result.distance;
43 |                         bestSrcPdg = srcPdg;
44 |                         bestDstPdg = dstPdg;
45 |                         bestNodeMapping = result.nodeMapping;
46 |                     }
47 |                 }
48 |             }
49 | 
50 |             if (bestSrcPdg != null && bestDstPdg != null) {
51 |                 //  found the "best" pair, remove them from the unmatched sets
52 |                 unmappedSrcPdgs.remove(bestSrcPdg);
53 |                 unmappedDstPdgs.remove(bestDstPdg);
54 | 
55 |                 // the chosen mapping added in the global GraphMapping
56 |                 graphMapping.addGraphMapping(bestSrcPdg, bestDstPdg, bestNodeMapping);
57 |             } else {
58 |                 // no good matches remain
59 |                 break;
60 |             }
61 |         }
62 | 
63 |         return graphMapping;
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/UllmannGraphMatcher.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models;
 2 | 
 3 | import org.pdgdiff.graph.GraphTraversal;
 4 | import org.pdgdiff.graph.PDG;
 5 | import org.pdgdiff.matching.GraphMapping;
 6 | import org.pdgdiff.matching.GraphMatcher;
 7 | import org.pdgdiff.matching.NodeMapping;
 8 | import org.pdgdiff.matching.models.ullmann.UllmannMatcher;
 9 | 
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | 
13 | public class UllmannGraphMatcher extends GraphMatcher {
14 |     public UllmannGraphMatcher(List<PDG> list1, List<PDG> list2) {
15 |         super(list1, list2);
16 |     }
17 | 
18 |     @Override
19 |     public GraphMapping matchPDGLists() {
20 |         List<PDG> unmappedPDGs1 = new ArrayList<>(srcPdgs);
21 |         List<PDG> unmappedPDGs2 = new ArrayList<>(dstPdgs);
22 | 
23 |         while (!unmappedPDGs1.isEmpty() && !unmappedPDGs2.isEmpty()) {
24 |             double maxScore = Double.NEGATIVE_INFINITY;
25 |             PDG bestPdg1 = null;
26 |             PDG bestPdg2 = null;
27 |             NodeMapping bestNodeMapping = null;
28 | 
29 |             // for each pair of unmapped PDGs, compute similarity score
30 |             for (PDG pdg1 : unmappedPDGs1) {
31 |                 for (PDG pdg2 : unmappedPDGs2) {
32 |                     UllmannMatcher ullmannMatcher = new UllmannMatcher(pdg1, pdg2);
33 |                     NodeMapping nodeMapping = ullmannMatcher.match();
34 | 
35 |                     if (nodeMapping != null && !nodeMapping.isEmpty()) {
36 |                         int mappedNodes = nodeMapping.size();
37 |                         int unmappedNodes1 = GraphTraversal.getNodeCount(pdg1) - mappedNodes;
38 |                         int unmappedNodes2 = GraphTraversal.getNodeCount(pdg2) - mappedNodes;
39 | 
40 |                         // TODO: this is using same score as vf2 matcher, again not sure if this is ideal!
41 |                         double score = (double) mappedNodes / (mappedNodes + unmappedNodes1 + unmappedNodes2);
42 | 
43 |                         if (score > maxScore) {
44 |                             maxScore = score;
45 |                             bestPdg1 = pdg1;
46 |                             bestPdg2 = pdg2;
47 |                             bestNodeMapping = nodeMapping;
48 |                         }
49 |                     }
50 |                 }
51 |             }
52 | 
53 |             if (bestPdg1 != null && bestPdg2 != null) {
54 |                 // map the best pdg pair found
55 |                 unmappedPDGs1.remove(bestPdg1);
56 |                 unmappedPDGs2.remove(bestPdg2);
57 |                 graphMapping.addGraphMapping(bestPdg1, bestPdg2, bestNodeMapping);
58 |             } else {
59 |                 // no more matches found
60 |                 break;
61 |             }
62 |         }
63 | 
64 |         for (PDG pdg1 : unmappedPDGs1) {
65 |             System.out.println("No matching PDG found for: " + pdg1.getCFG().getBody().getMethod().getSignature());
66 |         }
67 | 
68 |         return graphMapping;
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/heuristic/JaroWinklerSimilarity.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models.heuristic;
 2 | 
 3 | 
 4 | /**
 5 | * calculate % similarity between strings using Jaro-Winkler algorithm
 6 | **/
 7 | public class JaroWinklerSimilarity {
 8 | 
 9 |     public static double JaroWinklerSimilarity(String s1, String s2) {
10 |         double jaro = jaroSimilarity(s1, s2);
11 |         int prefixLength = commonPrefixLength(s1, s2);
12 |         double SCALING_FACTOR = 0.1;
13 | 
14 |         return jaro + (prefixLength * SCALING_FACTOR * (1 - jaro));
15 |     }
16 | 
17 |     // returns a double which is a similarity score between 0 and 1
18 |     public static double jaroSimilarity(String s1, String s2) {
19 |         if (s1.equals(s2)) {
20 |             return 1.0;
21 |         }
22 | 
23 |         if (s1.isEmpty() || s2.isEmpty()) {
24 |             return 0.0;
25 |         }
26 | 
27 |         // max distance for matching characters is floor(max(|s1|, |s2|) / 2) - 1
28 |         int matchDistance = Math.max(s1.length(), s2.length()) / 2 - 1;
29 | 
30 |         boolean[] s1Matches = new boolean[s1.length()];
31 |         boolean[] s2Matches = new boolean[s2.length()];
32 | 
33 |         // counting matches and transpositions
34 |         int matches = 0;
35 |         int transpositions = 0;
36 | 
37 |         for (int i = 0; i < s1.length(); i++) {
38 |             int start = Math.max(0, i - matchDistance);
39 |             int end = Math.min(i + matchDistance + 1, s2.length());
40 | 
41 |             for (int j = start; j < end; j++) {
42 |                 if (!s2Matches[j] && s1.charAt(i) == s2.charAt(j)) {
43 |                     s1Matches[i] = true;
44 |                     s2Matches[j] = true;
45 |                     matches++;
46 |                     break;
47 |                 }
48 |             }
49 |         }
50 | 
51 |         if (matches == 0) {
52 |             return 0.0;
53 |         }
54 | 
55 |         int k = 0;
56 |         for (int i = 0; i < s1.length(); i++) {
57 |             if (s1Matches[i]) {
58 |                 while (!s2Matches[k]) {
59 |                     k++;
60 |                 }
61 |                 if (s1.charAt(i) != s2.charAt(k)) {
62 |                     transpositions++;
63 |                 }
64 |                 k++;
65 |             }
66 |         }
67 | 
68 |         transpositions /= 2;
69 | 
70 |         // final similarity formula
71 |         return ((matches / (double) s1.length()) +
72 |                 (matches / (double) s2.length()) +
73 |                 ((matches - transpositions) / (double) matches)) / 3.0;
74 |     }
75 | 
76 |     public static int commonPrefixLength(String s1, String s2) {
77 |         int prefixLength = 0;
78 |         int maxPrefixLength = Math.min(4, Math.min(s1.length(), s2.length()));
79 | 
80 |         for (int i = 0; i < maxPrefixLength; i++) {
81 |             if (s1.charAt(i) == s2.charAt(i)) {
82 |                 prefixLength++;
83 |             } else {
84 |                 break;
85 |             }
86 |         }
87 | 
88 |         return prefixLength;
89 |     }
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/ged/HungarianAlgorithm.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models.ged;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | /**
 6 |  * implementation of the Hungarian (Kuhn-Munkres) algorithm for the assignment problem on a cost matrix.
 7 |  *
 8 |  * For an n x n matrix costMatrix[row][col],
 9 |  * returns an int[] assignment where assignment[row] = col,
10 |  * or -1 if unmatched.
11 |  *
12 |  * NB matricies must be square hence PDGs have been padded in calling function when needed.
13 |  *
14 |  * sources;
15 |  * https://www.hungarianalgorithm.com/examplehungarianalgorithm.php
16 |  * https://en.wikipedia.org/wiki/Hungarian_algorithm
17 |  *
18 |  */
19 | 
20 | public class HungarianAlgorithm {
21 | 
22 |     public static int[] minimizeAssignment(double[][] costMatrix) {
23 |         int n = costMatrix.length;
24 |         int[] assignment = new int[n];
25 |         int[] partialMatch = new int[n];
26 |         int[] trace = new int[n];
27 |         double[] potentialRows = new double[n];
28 |         double[] potentialCols = new double[n];
29 | 
30 |         Arrays.fill(assignment, -1);
31 | 
32 |         for (int i = 1; i < n; i++) {
33 |             partialMatch[0] = i;
34 |             int currCol = 0;
35 |             double[] minCols = new double[n];
36 |             boolean[] used = new boolean[n];
37 |             Arrays.fill(minCols, Double.POSITIVE_INFINITY);
38 |             do {
39 |                 used[currCol] = true;
40 |                 int currRow = partialMatch[currCol];
41 |                 double delta = Double.POSITIVE_INFINITY;
42 |                 int nextCol = 0;
43 |                 for (int j = 1; j < n; j++) {
44 |                     if (!used[j]) {
45 |                         double cur = costMatrix[currRow][j] - potentialRows[currRow] - potentialCols[j];
46 |                         if (cur < minCols[j]) {
47 |                             minCols[j] = cur;
48 |                             trace[j] = currCol;
49 |                         }
50 |                         if (minCols[j] < delta) {
51 |                             delta = minCols[j];
52 |                             nextCol = j;
53 |                         }
54 |                     }
55 |                 }
56 |                 for (int j = 0; j < n; j++) {
57 |                     if (used[j]) {
58 |                         potentialRows[partialMatch[j]] += delta;
59 |                         potentialCols[j] -= delta;
60 |                     } else {
61 |                         minCols[j] -= delta;
62 |                     }
63 |                 }
64 |                 currCol = nextCol;
65 |             } while (partialMatch[currCol] != 0);
66 | 
67 |             do {
68 |                 int nextCol = trace[currCol];
69 |                 partialMatch[currCol] = partialMatch[nextCol];
70 |                 currCol = nextCol;
71 |             } while (currCol != 0);
72 |         }
73 | 
74 |         // 'partialMatch[j] = i' means that  column j matched to row i
75 |         for (int j = 1; j < n; j++) {
76 |             assignment[partialMatch[j]] = j;
77 |         }
78 |         return assignment;
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/util/SootInitializer.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.util;
 2 | 
 3 | import soot.G;
 4 | import soot.Scene;
 5 | import soot.options.Options;
 6 | 
 7 | import java.util.Collections;
 8 | 
 9 | /**
10 |  * SootInitializer class to initialize Soot, the static analysis framework of this specific implementation of the
11 |  * approach with the necessary configurations for PDG generation.
12 |  */
13 | public class SootInitializer {
14 | 
15 |     public static void initializeSoot(String dir) {
16 |         resetSoot();
17 | 
18 |         // setting soot options
19 |         Options.v().set_prepend_classpath(true);
20 |         Options.v().set_allow_phantom_refs(true);
21 |         Options.v().set_output_format(Options.output_format_jimple);
22 |         Options.v().set_verbose(true); // Debug output
23 | 
24 |         // The following phase options are configured to preserve the original code structure, as well as poss.
25 |         // read https://www.sable.mcgill.ca/soot/tutorial/phase/phase.html
26 |         // in some cases however this is not possible because of how soot constructs Jimple, this is a limitation of
27 |         // the implementation of this approach
28 |         Options.v().set_keep_line_number(true);
29 | 
30 |         Options.v().setPhaseOption("jb", "use-original-names:true");
31 |         Options.v().setPhaseOption("jb", "use-original-bytecode:true");
32 |         Options.v().setPhaseOption("jj", "simplify-off:true");
33 | 
34 |         Options.v().setPhaseOption("jb.dce", "enabled:false");  // Disable dead code elimination
35 |         Options.v().setPhaseOption("jb.dae", "enabled:false");  // Disable dead assignment elimination
36 |         Options.v().setPhaseOption("jb.uce", "enabled:false");  // Disable unreachable code elimination
37 |         Options.v().setPhaseOption("jb.cp", "enabled:false");  // Disable const propagation
38 |         Options.v().setPhaseOption("jb.ule", "enabled:false");  // Disable unused local elimination
39 |         Options.v().setPhaseOption("jop", "enabled:false");     // Disable optimizations like const folding
40 |         Options.v().setPhaseOption("wjop", "enabled:false");    // Disable whole-program optimizations
41 | 
42 |         Options.v().setPhaseOption("jb.tr", "enabled:false");   // Disable transformation on control flow
43 |         Options.v().setPhaseOption("bb", "enabled:false");      // Disable basic block merging or splitting
44 |         Options.v().setPhaseOption("jap", "enabled:false");     // Disable aggregation
45 |         Options.v().setPhaseOption("jtp.ls", "enabled:false");  // Disable loop simplification
46 |         Options.v().setPhaseOption("jop.uce", "enabled:false"); // Disable unreachable code elimination
47 |         Options.v().setPhaseOption("jop.cpf", "enabled:false");
48 | 
49 | 
50 |         Options.v().set_soot_classpath(dir);
51 |         Options.v().set_process_dir(Collections.singletonList(dir));
52 | 
53 |         Options.v().set_whole_program(true);
54 |         Options.v().set_no_bodies_for_excluded(true);
55 | 
56 | 
57 |         // finally loading necessary classes into the soot scene
58 |         Scene.v().loadNecessaryClasses();
59 |     }
60 | 
61 | 
62 |     // reset Soot (clean up)
63 |     public static void resetSoot() {
64 |         G.reset();
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/VF2GraphMatcher.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.matching.models;
 2 | 
 3 | import org.pdgdiff.graph.GraphTraversal;
 4 | import org.pdgdiff.graph.PDG;
 5 | import org.pdgdiff.matching.GraphMapping;
 6 | import org.pdgdiff.matching.GraphMatcher;
 7 | import org.pdgdiff.matching.NodeMapping;
 8 | import org.pdgdiff.matching.models.vf2.VF2Matcher;
 9 | 
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | 
13 | public class VF2GraphMatcher extends GraphMatcher {
14 |     public VF2GraphMatcher(List<PDG> srcPdgs, List<PDG> dstPdgs) {
15 |         super(srcPdgs, dstPdgs);
16 |     }
17 | 
18 |     @Override
19 |     public GraphMapping matchPDGLists() {
20 |         List<PDG> unmappedSrcPdgs = new ArrayList<>(srcPdgs);
21 |         List<PDG> unmappedDstPdgs = new ArrayList<>(dstPdgs);
22 | 
23 |         while (!unmappedSrcPdgs.isEmpty() && !unmappedDstPdgs.isEmpty()) {
24 |             double maxScore = Double.NEGATIVE_INFINITY;
25 |             PDG bestSrcPdg = null;
26 |             PDG bestDstPdg = null;
27 |             NodeMapping bestNodeMapping = null;
28 | 
29 |             // for each pair of unmapped PDGs, compute similarity score
30 |             for (PDG srcPdg : unmappedSrcPdgs) {
31 |                 for (PDG dstPdg : unmappedDstPdgs) {
32 |                     VF2Matcher vf2Matcher = new VF2Matcher(srcPdg, dstPdg);
33 |                     NodeMapping nodeMapping = vf2Matcher.match();
34 | 
35 |                     if (nodeMapping != null && !nodeMapping.isEmpty()) {
36 |                         int mappedNodes = nodeMapping.size();
37 |                         int unmappedSrcNodes = GraphTraversal.getNodeCount(srcPdg) - mappedNodes;
38 |                         int unmappedDstNodes = GraphTraversal.getNodeCount(dstPdg) - mappedNodes;
39 | 
40 |                         // calculate the score that minimizes unmapped nodes, this is my 'similarity' metric as of rn lol
41 |                         // this might be to be improved. TODO look into other metrics/ measures.
42 |                         // TODO might want to add a threshold. possibly not all graphs should be mapped to all graphs!
43 |                         double score = (double) mappedNodes / (mappedNodes + unmappedSrcNodes + unmappedDstNodes);
44 | 
45 |                         if (score > maxScore) {
46 |                             maxScore = score;
47 |                             bestSrcPdg = srcPdg;
48 |                             bestDstPdg = dstPdg;
49 |                             bestNodeMapping = nodeMapping;
50 |                         }
51 |                     }
52 |                 }
53 |             }
54 | 
55 |             if (bestSrcPdg != null && bestDstPdg != null) {
56 |                 unmappedSrcPdgs.remove(bestSrcPdg);
57 |                 unmappedDstPdgs.remove(bestDstPdg);
58 |                 graphMapping.addGraphMapping(bestSrcPdg, bestDstPdg, bestNodeMapping);
59 |             } else {
60 |                 break;
61 |             }
62 |         }
63 | 
64 |         // handling PDGs in src that were not matched
65 |         for (PDG pdg1 : unmappedSrcPdgs) {
66 |             System.out.println("No matching PDG found for: " + pdg1.getCFG().getBody().getMethod().getSignature());
67 |         }
68 | 
69 |         return graphMapping;
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/io/JsonOperationFormatter.java:
--------------------------------------------------------------------------------
 1 | package org.pdgdiff.io;
 2 | 
 3 | import org.pdgdiff.edit.model.*;
 4 | import com.google.gson.stream.JsonWriter;
 5 | import org.pdgdiff.matching.StrategySettings;
 6 | 
 7 | import java.io.IOException;
 8 | import java.io.Writer;
 9 | 
10 | public class JsonOperationFormatter implements OperationFormatter {
11 |     private final JsonWriter writer;
12 | 
13 |     public JsonOperationFormatter(Writer writer) {
14 |         this.writer = new JsonWriter(writer);
15 |         this.writer.setIndent("  ");
16 |     }
17 | 
18 |     @Override
19 |     public void writeInfo(StrategySettings settings) throws Exception {
20 |         writer.name("strategySettings").beginObject();
21 |         writer.name("recoveryStrategy").value(settings.getRecoveryStrategy().toString());
22 |         writer.name("matchingStrategy").value(settings.getMatchingStrategy().toString());
23 |         writer.endObject();
24 |     }
25 | 
26 |     @Override
27 |     public void startOutput() throws IOException {
28 |         writer.beginObject();
29 |     }
30 | 
31 |     @Override
32 |     public void endOutput() throws IOException {
33 |         writer.endObject();
34 |         writer.close();
35 |     }
36 | 
37 |     @Override
38 |     public void startOperations() throws IOException {
39 |         writer.name("actions").beginArray();
40 |     }
41 | 
42 |     @Override
43 |     public void endOperations() throws IOException {
44 |         writer.endArray();
45 |     }
46 | 
47 |     @Override
48 |     public void insertOperation(Insert operation) throws IOException {
49 |         writer.beginObject();
50 |         writer.name("action").value("Insert");
51 |         writer.name("line").value(operation.getLineNumber());
52 |         writer.name("code").value(operation.getCodeSnippet());
53 |         writer.endObject();
54 |     }
55 | 
56 |     @Override
57 |     public void deleteOperation(Delete operation) throws IOException {
58 |         writer.beginObject();
59 |         writer.name("action").value("Delete");
60 |         writer.name("line").value(operation.getLineNumber());
61 |         writer.name("code").value(operation.getCodeSnippet());
62 |         writer.endObject();
63 |     }
64 | 
65 |     @Override
66 |     public void updateOperation(Update operation) throws IOException {
67 |         writer.beginObject();
68 |         writer.name("action").value("Update");
69 |         writer.name("oldLine").value(operation.getOldLineNumber());
70 |         writer.name("newLine").value(operation.getNewLineNumber());
71 |         writer.name("oldCode").value(operation.getOldCodeSnippet());
72 |         writer.name("newCode").value(operation.getNewCodeSnippet());
73 | 
74 |         SyntaxDifference diff = operation.getSyntaxDifference();
75 |         if (diff != null) {
76 |             writer.name("difference").beginObject();
77 |             writer.name("message").value(diff.getMessage());
78 |             writer.name("oldJimple").value(diff.getOldJimpleCode());
79 |             writer.name("newJimple").value(diff.getNewJimpleCode());
80 |             writer.endObject();
81 |         } else {
82 |             writer.name("difference").value("signature or class metadata change");
83 |         }
84 | 
85 |         writer.endObject();
86 |     }
87 | 
88 |     @Override
89 |     public void moveOperation(Move operation) throws IOException {
90 |         writer.beginObject();
91 |         writer.name("action").value("Move");
92 |         writer.name("oldLine").value(operation.getOldLineNumber());
93 |         writer.name("newLine").value(operation.getNewLineNumber());
94 |         writer.name("code").value(operation.getCodeSnippet());
95 |         writer.endObject();
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 2 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 | 
 6 |     <groupId>com.pdgdiff</groupId>
 7 |     <artifactId>soot-pdg</artifactId>
 8 |     <version>1.0-SNAPSHOT</version>
 9 | 
10 |     <dependencies>
11 |         <!-- Stable Soot release from Maven Central -->
12 |         <dependency>
13 |             <groupId>org.soot-oss</groupId>
14 |             <artifactId>soot</artifactId>
15 |             <version>4.3.0</version> <!-- Stable version -->
16 |         </dependency>
17 | 
18 |         <!-- SLF4J for logging (Soot uses this for logging) -->
19 |         <dependency>
20 |             <groupId>org.slf4j</groupId>
21 |             <artifactId>slf4j-api</artifactId>
22 |             <version>1.7.30</version>
23 |         </dependency>
24 |         <dependency>
25 |             <groupId>org.slf4j</groupId>
26 |             <artifactId>slf4j-simple</artifactId>
27 |             <version>1.7.30</version>
28 |         </dependency>
29 | 
30 |         <!-- for JSON serialization -->
31 |         <dependency>
32 |             <groupId>com.google.code.gson</groupId>
33 |             <artifactId>gson</artifactId>
34 |             <version>2.8.6</version>
35 |         </dependency>
36 | 
37 | 
38 |         <!-- JUnit for unit testing -->
39 |         <dependency>
40 |             <groupId>junit</groupId>
41 |             <artifactId>junit</artifactId>
42 |             <version>4.13.2</version>
43 |             <scope>test</scope>
44 |         </dependency>
45 |         <dependency>
46 |             <groupId>junit</groupId>
47 |             <artifactId>junit</artifactId>
48 |             <version>4.13.2</version>
49 |             <scope>test</scope>
50 |         </dependency>
51 | 
52 |     </dependencies>
53 | 
54 |     <build>
55 |         <plugins>
56 |             <plugin>
57 |                 <groupId>org.apache.maven.plugins</groupId>
58 |                 <artifactId>maven-compiler-plugin</artifactId>
59 |                 <version>3.8.1</version>
60 |                 <executions>
61 |                     <!-- Default compilation settings for all classes -->
62 |                     <execution>
63 |                         <id>default-compile</id>
64 |                         <phase>compile</phase>
65 |                         <goals>
66 |                             <goal>compile</goal>
67 |                         </goals>
68 |                         <configuration>
69 |                             <source>1.8</source>
70 |                             <target>1.8</target>
71 |                         </configuration>
72 |                     </execution>
73 |                     <!-- Custom compilation for org.pdgdiff.testclasses -->
74 |                     <execution>
75 |                         <id>compile-testclasses</id>
76 |                         <phase>compile</phase>
77 |                         <goals>
78 |                             <goal>compile</goal>
79 |                         </goals>
80 |                         <configuration>
81 |                             <includes>
82 |                                 <include>org/pdgdiff/testclasses/**</include>
83 |                             </includes>
84 |                             <compilerArgs>
85 |                                 <arg>-g</arg> <!-- Include debug information -->
86 |                                 <arg>-O</arg> <!-- Disable optimizations -->
87 |                             </compilerArgs>
88 |                         </configuration>
89 |                     </execution>
90 |                 </executions>
91 |             </plugin>
92 |         </plugins>
93 |     </build>
94 | </project>
95 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/model/SyntaxDifference.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.edit.model;
  2 | 
  3 | import org.pdgdiff.edit.EditScriptGenerator;
  4 | import org.pdgdiff.util.SourceCodeMapper;
  5 | import soot.Unit;
  6 | import soot.tagkit.LineNumberTag;
  7 | import soot.toolkits.graph.pdg.PDGNode;
  8 | 
  9 | import java.util.Objects;
 10 | 
 11 | /**
 12 |  * Represents a syntax difference between two Units or PDGNodes.
 13 |  */
 14 | public class SyntaxDifference {
 15 |     private Unit oldUnit;
 16 |     private Unit newUnit;
 17 |     private String message;
 18 | 
 19 |     private int oldLineNumber;
 20 |     private int newLineNumber;
 21 |     private String oldCodeSnippet;
 22 |     private String newCodeSnippet;
 23 | 
 24 |     private String oldJimpleCode;
 25 |     private String newJimpleCode;
 26 | 
 27 |     public SyntaxDifference(Unit oldUnit, Unit newUnit,
 28 |                             SourceCodeMapper oldSourceMapper, SourceCodeMapper newSourceMapper) {
 29 |         this.oldUnit = oldUnit;
 30 |         this.newUnit = newUnit;
 31 |         this.oldLineNumber = getLineNumber(oldUnit);
 32 |         this.newLineNumber = getLineNumber(newUnit);
 33 |         this.oldCodeSnippet = oldSourceMapper.getCodeLine(oldLineNumber);
 34 |         this.newCodeSnippet = newSourceMapper.getCodeLine(newLineNumber);
 35 |         this.oldJimpleCode = oldUnit != null ? oldUnit.toString() : null;
 36 |         this.newJimpleCode = newUnit != null ? newUnit.toString() : null;
 37 |     }
 38 | 
 39 | 
 40 |     // Constructor for general messages
 41 |     public SyntaxDifference(String message) {
 42 |         this.message = message;
 43 |     }
 44 | 
 45 |     public String getMessage() {
 46 |         return message;
 47 |     }
 48 | 
 49 |     public int getOldLineNumber() {
 50 |         return oldLineNumber;
 51 |     }
 52 | 
 53 |     public int getNewLineNumber() {
 54 |         return newLineNumber;
 55 |     }
 56 | 
 57 |     public String getOldCodeSnippet() {
 58 |         return oldCodeSnippet;
 59 |     }
 60 | 
 61 |     public String getNewCodeSnippet() {
 62 |         return newCodeSnippet;
 63 |     }
 64 | 
 65 |     public String getOldJimpleCode() {
 66 |         return oldJimpleCode;
 67 |     }
 68 | 
 69 |     public String getNewJimpleCode() {
 70 |         return newJimpleCode;
 71 |     }
 72 | 
 73 |     @Override
 74 |     public String toString() {
 75 |         if (message != null) {
 76 |             return message;
 77 |         } else if (oldUnit != null || newUnit != null) {
 78 |             return String.format(
 79 |                     "Unit Difference at lines %d -> %d:\nOld Code: '%s'\nNew Code: '%s'\nOld Jimple: '%s'\nNew Jimple: '%s'",
 80 |                     oldLineNumber, newLineNumber,
 81 |                     oldCodeSnippet == null ? "null" : oldCodeSnippet.trim(),
 82 |                     newCodeSnippet == null ? "null" : newCodeSnippet.trim(),
 83 |                     oldJimpleCode == null ? "null" : oldJimpleCode.trim(),
 84 |                     newJimpleCode == null ? "null" : newJimpleCode.trim());
 85 |         } else {
 86 |             return "Unknown Difference";
 87 |         }
 88 |     }
 89 | 
 90 |     // Helper methods
 91 |     private int getLineNumber(Unit unit) {
 92 |         if (unit == null) {
 93 |             return -1;
 94 |         }
 95 |         LineNumberTag tag = (LineNumberTag) unit.getTag("LineNumberTag");
 96 |         if (tag != null) {
 97 |             return tag.getLineNumber();
 98 |         }
 99 |         return -1;
100 |     }
101 | 
102 |     private int getNodeLineNumber(PDGNode node) {
103 |         if (node == null) {
104 |             return -1;
105 |         }
106 |         return EditScriptGenerator.getNodeLineNumber(node);
107 |     }
108 | 
109 |     private String getNodeCodeSnippet(PDGNode node, SourceCodeMapper codeMapper) {
110 |         int lineNumber = getNodeLineNumber(node);
111 |         if (lineNumber != -1) {
112 |             return codeMapper.getCodeLine(lineNumber);
113 |         }
114 |         return null;
115 |     }
116 | 
117 |     @Override
118 |     public boolean equals(Object obj) {
119 |         if (this == obj) return true;
120 |         if (!(obj instanceof SyntaxDifference)) return false;
121 |         SyntaxDifference that = (SyntaxDifference) obj;
122 |         return oldLineNumber == that.oldLineNumber &&
123 |                 newLineNumber == that.newLineNumber &&
124 |                 Objects.equals(oldCodeSnippet, that.oldCodeSnippet) &&
125 |                 Objects.equals(newCodeSnippet, that.newCodeSnippet) &&
126 |                 Objects.equals(oldJimpleCode, that.oldJimpleCode) &&
127 |                 Objects.equals(newJimpleCode, that.newJimpleCode);
128 |     }
129 | 
130 |     @Override
131 |     public int hashCode() {
132 |         return Objects.hash(oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, oldJimpleCode, newJimpleCode);
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/graph/GraphExporter.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.graph;
  2 | 
  3 | import soot.SootMethod;
  4 | import soot.toolkits.graph.UnitGraph;
  5 | import soot.toolkits.graph.pdg.PDGNode;
  6 | 
  7 | import java.io.*;
  8 | import java.util.HashSet;
  9 | import java.util.List;
 10 | import java.util.Set;
 11 | 
 12 | public class GraphExporter {
 13 | 
 14 |     public static void clearOutputFolder(String folderPath) {
 15 |         File outputFolder = new File(folderPath);
 16 |         if (outputFolder.exists()) {
 17 |             File[] files = outputFolder.listFiles();
 18 |             if (files != null) {
 19 |                 for (File file : files) {
 20 |                     file.delete();
 21 |                 }
 22 |             }
 23 |         }
 24 |     }
 25 | 
 26 | 
 27 |     public static void exportPDG(PDG pdg, String dotFileName, String txtFileName) throws IOException {
 28 |         UnitGraph cfg = pdg.getCFG();
 29 |         SootMethod method = (cfg != null) ? cfg.getBody().getMethod() : null;
 30 | 
 31 |         exportPDGToDot(pdg, dotFileName);
 32 | 
 33 |         assert method != null;
 34 |         exportPDGToFile(pdg, txtFileName, method.getName());
 35 |     }
 36 | 
 37 |     public static void exportPDGToFile(PDG pdg, String fileName, String methodName) throws IOException {
 38 |         try (PrintWriter writer = new PrintWriter(new FileWriter(fileName, true))) {
 39 |             writer.println("\n\n---------> Method: " + methodName);
 40 |             // dump text repr, toString might be overridden in PDG need to check
 41 |             writer.println(pdg.toString());
 42 |             writer.println("---------> End of PDG for method: " + methodName + "\n\n");
 43 |         }
 44 |     }
 45 | 
 46 |     public static void exportPDGToDot(PDG pdg, String fileName) {
 47 |         try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
 48 |             writer.println("digraph PDG {");
 49 |             writer.println("  graph [ranksep=2, nodesep=0.1];");
 50 |             writer.println("  node [shape=ellipse, style=filled, fillcolor=lightgrey, fontname=Arial, fontsize=12];");
 51 |             writer.println("  edge [fontname=Arial, fontsize=10];");
 52 | 
 53 |             Set<PDGNode> connectedNodes = new HashSet<>();
 54 | 
 55 | 
 56 |            // for each node, print out edges to its successors
 57 |             for (PDGNode src : pdg) {
 58 |                 List<PDGNode> successors = pdg.getSuccsOf(src);
 59 |                 for (PDGNode tgt : successors) {
 60 |                     // getLabelsForEdges return can contain multiple edge labels
 61 |                     List<GraphGenerator.DependencyTypes> labels = pdg.getLabelsForEdges(src, tgt);
 62 |                     for (GraphGenerator.DependencyTypes depType : labels) {
 63 |                         String colour = "black";
 64 |                         String depLabel = "UNKNOWN";
 65 |                         if (depType == GraphGenerator.DependencyTypes.CONTROL_DEPENDENCY) {
 66 |                             colour = "red";
 67 |                             depLabel = "CTRL_DEP";
 68 |                         } else if (depType == GraphGenerator.DependencyTypes.DATA_DEPENDENCY) {
 69 |                             colour = "blue";
 70 |                             depLabel = "DATA_DEP";
 71 |                         }
 72 |                         writer.printf("  %s -> %s [label=\"%s\", color=\"%s\"];\n",
 73 |                                 getNodeId(src),
 74 |                                 getNodeId(tgt),
 75 |                                 depLabel,
 76 |                                 colour);
 77 |                         connectedNodes.add(src);
 78 |                         connectedNodes.add(tgt);
 79 |                     }
 80 |                 }
 81 |             }
 82 | 
 83 |             for (PDGNode node : connectedNodes) {
 84 |                 String label = escapeSpecialCharacters(removeCFGNodePrefix(node.toString()));
 85 |                 writer.printf("  %s [label=\"%s\"];%n", getNodeId(node), label);
 86 |             }
 87 | 
 88 |             writer.println("}");
 89 |             System.out.println("PDG exported to DOT file: " + fileName);
 90 | 
 91 |         } catch (IOException e) {
 92 |             e.printStackTrace();
 93 |         }
 94 |     }
 95 | 
 96 |     // helper methods
 97 | 
 98 |     private static String getNodeId(PDGNode node) {
 99 |         return "node_" + System.identityHashCode(node);
100 |     }
101 | 
102 |     private static String removeCFGNodePrefix(String label) {
103 |         String prefix = "Type: CFGNODE: ";
104 |         if (label.startsWith(prefix)) {
105 |             return label.substring(prefix.length());
106 |         }
107 |         return label;
108 |     }
109 | 
110 |     // to avoid parse errors, otherwise print("") could ruin some things
111 |     private static String escapeSpecialCharacters(String label) {
112 |         return label.replace("\"", "\\\"");
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/ullmann/UllmannMatcher.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.matching.models.ullmann;
  2 | 
  3 | import org.pdgdiff.graph.GraphTraversal;
  4 | import org.pdgdiff.graph.PDG;
  5 | import org.pdgdiff.matching.NodeMapping;
  6 | import soot.toolkits.graph.pdg.PDGNode;
  7 | 
  8 | import java.util.*;
  9 | 
 10 | /**
 11 |  * UllmannMatcher class to perform graph matching using Ullmann's Algorithm.
 12 |  * This class contains methods to match two PDGs and return the node mappings between them.
 13 |  */
 14 | public class UllmannMatcher {
 15 |     private NodeMapping nodeMapping;
 16 | 
 17 |     private List<PDGNode> srcNodes;
 18 |     private List<PDGNode> dstNodes;
 19 |     private final int n;
 20 |     private final int m;
 21 |     private int[][] compatMatrix; // Compatibility matrix
 22 |     private Stack<int[][]> matBacklog;
 23 | 
 24 |     public UllmannMatcher(PDG srcPdg, PDG dstPdg) {
 25 |         this.nodeMapping = new NodeMapping();
 26 | 
 27 |         this.srcNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(srcPdg));
 28 |         this.dstNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(dstPdg));
 29 |         this.n = srcNodes.size();
 30 |         this.m = dstNodes.size();
 31 |         this.compatMatrix = new int[n][m];
 32 |         this.matBacklog = new Stack<>();
 33 |     }
 34 | 
 35 |     public NodeMapping match() {
 36 |         if (n > m) {
 37 |             return null;
 38 |         }
 39 | 
 40 |         initializeM();
 41 | 
 42 |         // start recursive search
 43 |         if (matchRecursive(0)) {
 44 |             return nodeMapping;
 45 |         } else {
 46 |             return null;
 47 |         }
 48 |     }
 49 | 
 50 |     private void initializeM() {
 51 |         for (int i = 0; i < n; i++) {
 52 |             PDGNode node1 = srcNodes.get(i);
 53 |             for (int j = 0; j < m; j++) {
 54 |                 PDGNode node2 = dstNodes.get(j);
 55 |                 compatMatrix[i][j] = nodesAreCompatible(node1, node2) ? 1 : 0;
 56 |             }
 57 |         }
 58 |     }
 59 | 
 60 |     private boolean matchRecursive(int depth) {
 61 |         if (depth == n) {
 62 |             // all nodes have been matched
 63 |             buildNodeMapping();
 64 |             return true;
 65 |         }
 66 | 
 67 |         for (int j = 0; j < m; j++) {
 68 |             if (compatMatrix[depth][j] == 1) {
 69 |                 if (isFeasible(depth, j)) {
 70 |                     int[][] MBackup = copyMatrix(compatMatrix);
 71 |                     // remove conflicting mappings
 72 |                     for (int k = depth + 1; k < n; k++) {
 73 |                         compatMatrix[k][j] = 0;
 74 |                     }
 75 |                     for (int l = 0; l < m; l++) {
 76 |                         if (l != j) {
 77 |                             compatMatrix[depth][l] = 0;
 78 |                         }
 79 |                     }
 80 |                     compatMatrix[depth][j] = -1; // selected
 81 | 
 82 |                     matBacklog.push(MBackup);
 83 |                     if (matchRecursive(depth + 1)) {
 84 |                         return true;
 85 |                     }
 86 |                     compatMatrix = matBacklog.pop();
 87 |                 }
 88 |             }
 89 |         }
 90 |         return false;
 91 |     }
 92 | 
 93 |     private boolean isFeasible(int i, int j) {
 94 |         // check adjacency compatibility
 95 |         PDGNode srcNode = srcNodes.get(i);
 96 |         PDGNode dstNode = dstNodes.get(j);
 97 | 
 98 |         // for all prev mapped nodes
 99 |         for (int k = 0; k < i; k++) {
100 |             int mappedIndex = -1;
101 |             // find the node in PDG2 that k in pdg1 is mapped to
102 |             for (int l = 0; l < m; l++) {
103 |                 if (compatMatrix[k][l] == -1) {
104 |                     mappedIndex = l;
105 |                     break;
106 |                 }
107 |             }
108 |             if (mappedIndex != -1) {
109 |                 PDGNode mappedSrcNode = srcNodes.get(k);
110 |                 PDGNode mappedDstNode = this.dstNodes.get(mappedIndex);
111 | 
112 |                 // check if adjacency is preserved
113 |                 boolean adjInPDG1 = areAdjacent(srcNode, mappedSrcNode);
114 |                 boolean adjInPDG2 = areAdjacent(dstNode, mappedDstNode);
115 | 
116 |                 if (adjInPDG1 != adjInPDG2) {
117 |                     return false;
118 |                 }
119 |             }
120 |         }
121 |         return true;
122 |     }
123 | 
124 |     private boolean areAdjacent(PDGNode n1, PDGNode n2) {
125 |         // chck if n1 and n2 are adjacent in the PDG
126 |         return n1.getDependents().contains(n2) || n1.getBackDependets().contains(n2)
127 |                 || n2.getDependents().contains(n1) || n2.getBackDependets().contains(n1);
128 |     }
129 | 
130 |     private void buildNodeMapping() {
131 |         for (int i = 0; i < n; i++) {
132 |             for (int j = 0; j < m; j++) {
133 |                 if (compatMatrix[i][j] == -1) {
134 |                     nodeMapping.addMapping(srcNodes.get(i), dstNodes.get(j));
135 |                     break;
136 |                 }
137 |             }
138 |         }
139 |     }
140 | 
141 |     private boolean nodesAreCompatible(PDGNode n1, PDGNode n2) {
142 |         // TODO: add more like VF2
143 |         // compare node types and attributes
144 |         return n1.getType().equals(n2.getType()) && n1.getAttrib().equals(n2.getAttrib());
145 |     }
146 | 
147 |     private int[][] copyMatrix(int[][] original) {
148 |         int[][] copy = new int[n][m];
149 |         for (int i = 0; i < n; i++) {
150 |             System.arraycopy(original[i], 0, copy[i], 0, m);
151 |         }
152 |         return copy;
153 |     }
154 | }
155 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Graph Based Code differencing
 2 | 
 3 | _Objectives of this repository: Create a novel graph-based differencing approach, with the aim of improving code-differencing with respects to semantics, with some corresonding tool that software engineers can use._
 4 | 
 5 | This application is designed to take two Java classes, and produce a graph-based representation of the differences between them. This can then be visualised in a traditional text-bsed format, or as a graph.
 6 | It represents both classes as a [Program Dependence Graph](https://dl.acm.org/doi/10.1145/24039.24041) (generated through [Soot](https://github.com/soot-oss/soot)) and uses heuristics to perform graph isomorphism.
 7 | The application will suggest which methods might have originated from one another and will suggest edit scripts between methods in the source and destination file. The final output will be a singular `diff.json` which represents the aggregated per-method edit scripts.
 8 | 
 9 | Due to the fact that a PDG is often built from an intermediate representation for the sake of proper analysis, such as LLVM for a C/C++ program or Java Bytecode in this case, some syntactic differences won't be captured, and the algorithm will be limited to the granularity of the intermediate representation. For the purpose of a closer and more accurate
10 | differencing, compiler optimisations are disabled for target comparison files both at the java compiler level and the Soot level. In construction of Jimple, Soot's IR, some optimisations are not configurable and this is a described limitation of the approach.
11 | In some specific use cases, such as analysis efficiencies or trying to determine if programs are semantically identical, this tool should (🤞) be more useful than other differencing approaches.
12 | 
13 | If you find this interesting, please feel free to read the thesis (link wip) I wrote in conjunction with this tool!
14 | 
15 | This approach has been evaluated against a subset of data from [Gumtree's](https://github.com/GumTreeDiff/gumtree) evaluation [dataset](https://github.com/GumTreeDiff/datasets). This modified dataset can be found in the following [repository](https://github.com/syntax/datasets).
16 | 
17 | ### How does this work?
18 | 
19 | ![Overview](images/overview.png)
20 | 
21 | The complete process flow is described in the above visualisation. Data is read in from the source and destination files, and a PDG is generated for each. The PDGs are then compared using a graph isomorphism strategy, and the results are used to generate an pairing between methods. Each method pairing also has a node mapping, which is used to generate a edit script between two methods. A recovery method is applied to this to analyse operations using further heuristics, and these edit scripts are aggregated to create a final delta that summarises the changes between two files. 
22 | Current recommended matching strategies, that are proven to work quite well in most cases include VF2 and GED.
23 | 
24 | ### How can I visualise the changes?
25 | 
26 | After running the program with the preffered matching engine settings, the diff can be visualised in different ways. Most commonly, 
27 | one can run the _py-visualise_ Flask app to view the diff in its side-by-side, text-based form. Alternatively, remaning more loyal to the graph-based differencing approach, the delta can be viewed at the Jimple level as a singular _delta_
28 | graph. This can be used by exporting the dot file created in the delta-graph folder to a png, or similar.
29 | 
30 | ![Delta](images/refactoredgraph.png)
31 | **NB**: This delta has been edited slightly to ensure its readable on this README and not too large. This delta graph depicts the changes that transform the method _detailedComputation_ to the method _complexCalculation_, both depicted at the Java level in the text-based visualisation below.
32 | 
33 | Alternatively, the delta can be visualised with a more traditional side-by-side view, as shown below. Note that because differencing occurs at the IR level post compiler-optimisation, some syntactic differences may not be captured. As an example, deadcode in addNumbers is not considered. This view can be run by `cd`'ing to the `py-visualise` directory and running `python3 app.py` following a install of the required dependencies.
34 | 
35 | ![Side-by-side](images/text-based.jpg)
36 | 
37 | Insertions and Deletions are marked with a green and red background respectively. Update operations are marked with a variety of colours, so that changes across the source and destination methods and the node pairings of the algorithm can be easily identified. A list of all the edit operations required to describe the delta is listed below the visualisation.
38 | 
39 | ### List of dependencies
40 |  - Java 8 (an older version of Java is required to use Soot)
41 |  - Maven 3.6.x
42 |  - Soot 4.3.0
43 |  - Python 3.8.x or later
44 | 
45 | ### Getting Started
46 | To run,
47 | 
48 | Compile dependencies;
49 | ```bash
50 | mvn compile
51 | ```
52 | 
53 | Run the application;
54 | ```bash
55 | mvn exec:java -Dexec.mainClass="org.pdgdiff.Main"
56 | ```
57 | 
58 | Run the application with arguments for differencing;
59 | 
60 | This implementation of the approach (that is, using Soot) requires the following arguments:
61 | - The relative path to the source and destination versions files.
62 | - The relative path to the compiled source and destination versions files.
63 | - The fully qualified name of the class in the source and destination files.
64 | ```bash
65 | mvn exec:java -Dexec.mainClass="org.pdgdiff.Main" -Dexec.args="<beforeSourcePath> <afterSourcePath> <beforeCompiledDir> <afterCompiledDir> <beforeClassName> <afterClassName>"
66 | ```
67 | 
68 | Package the application (necessary for evaluation purposes only);
69 | ```bash
70 | mvn clean package
71 | ```
72 | 
73 | To run the Flask frontend for text-based visualisation;
74 | ```bash
75 | cd py-visualise
76 | python3 app.py
77 | ```


--------------------------------------------------------------------------------
/py-visualise/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, render_template
  2 | import json
  3 | import os
  4 | from difflib import SequenceMatcher
  5 | import re
  6 | 
  7 | app = Flask(__name__)
  8 | 
  9 | TEST_CLASSES_PATH = "./testclasses"
 10 | OUT_PATH = "./out"
 11 | 
 12 | def read_file(filepath):
 13 |     with open(filepath, 'r') as file:
 14 |         return file.read()
 15 | 
 16 | def generate_color_pairs(n):
 17 |     base_colors = [
 18 |         ("highlight-change1-old", "highlight-change1-old"),
 19 |         ("highlight-change2-old", "highlight-change2-old"),
 20 |         ("highlight-change3-old", "highlight-change3-old"),
 21 |         ("highlight-change4-old", "highlight-change4-old"),
 22 |         ("highlight-change5-old", "highlight-change5-old"),
 23 |         ("highlight-change6-old", "highlight-change6-old"),
 24 |         ("highlight-change1-new", "highlight-change1-new"),
 25 |         ("highlight-change2-new", "highlight-change2-new"),
 26 |         ("highlight-change3-new", "highlight-change3-new"),
 27 |         ("highlight-change4-new", "highlight-change4-new"),
 28 |         ("highlight-change5-new", "highlight-change5-new"),
 29 |         ("highlight-change6-new", "highlight-change6-new"),
 30 |     ]
 31 |     
 32 |     color_pairs = []
 33 |     for i in range(n):
 34 |         color_pairs.append(base_colors[i % len(base_colors)])
 35 |     
 36 |     return color_pairs
 37 | 
 38 | def highlight_word_differences_with_colors(old_line, new_line, color_pair):
 39 |     # posibly investigate wierd spacing behaviour on front end output, think this is because of the way the split is done
 40 |     seperators = r'([.,(){};=+-/*])'
 41 |     old_words = re.split(seperators, old_line)
 42 |     new_words = re.split(seperators, new_line)
 43 |     
 44 |     old_words = [word for word in old_words if word.strip() != '']
 45 |     new_words = [word for word in new_words if word.strip() != '']
 46 |     
 47 |     old_highlight = []
 48 |     new_highlight = []
 49 | 
 50 |     matcher = SequenceMatcher(None, old_words, new_words)
 51 | 
 52 |     for tag, i1, i2, j1, j2 in matcher.get_opcodes():
 53 |         if tag == 'equal':
 54 |             old_highlight.append(' '.join(old_words[i1:i2]))
 55 |             new_highlight.append(' '.join(new_words[j1:j2]))
 56 |         elif tag == 'replace':
 57 |             old_highlight.append(f"<span class='{color_pair[0]}'>{' '.join(old_words[i1:i2])}</span>")
 58 |             new_highlight.append(f"<span class='{color_pair[1]}'>{' '.join(new_words[j1:j2])}</span>")
 59 |         elif tag == 'delete':
 60 |             old_highlight.append(f"<span class='{color_pair[0]}'>{' '.join(old_words[i1:i2])}</span>")
 61 |         elif tag == 'insert':
 62 |             new_highlight.append(f"<span class='{color_pair[1]}'>{' '.join(new_words[j1:j2])}</span>")
 63 | 
 64 |     return ' '.join(old_highlight), ' '.join(new_highlight)
 65 | 
 66 | @app.route('/')
 67 | def diff_view():
 68 |     class1_content = read_file(os.path.join(TEST_CLASSES_PATH, 'TestFileBefore.java')).splitlines()
 69 |     class2_content = read_file(os.path.join(TEST_CLASSES_PATH, 'TestFileAfter.java')).splitlines()
 70 | 
 71 |     with open(os.path.join(OUT_PATH, 'diff.json'), 'r') as diff_file:
 72 |         diff_data = json.load(diff_file)
 73 | 
 74 |     num_actions = len(diff_data["actions"])
 75 |     color_pairs = generate_color_pairs(num_actions)
 76 | 
 77 |     highlighted_class1 = class1_content[:]
 78 |     highlighted_class2 = class2_content[:]
 79 | 
 80 |     highlighted_diffs = []
 81 | 
 82 |     for i, action in enumerate(diff_data["actions"]):
 83 |         if action["action"] == "Update":
 84 |             old_line_number = action["oldLine"] - 1
 85 |             new_line_number = action["newLine"] - 1
 86 | 
 87 |             if 0 <= old_line_number < len(highlighted_class1) and 0 <= new_line_number < len(highlighted_class2):
 88 |                 highlighted_old, highlighted_new = highlight_word_differences_with_colors(
 89 |                     class1_content[old_line_number], class2_content[new_line_number], color_pairs[i % len(color_pairs)]
 90 |                 )
 91 |                 highlighted_class1[old_line_number] = highlighted_old
 92 |                 highlighted_class2[new_line_number] = highlighted_new
 93 | 
 94 |                 highlighted_diffs.append({
 95 |                     "oldLine": action["oldLine"],
 96 |                     "newLine": action["newLine"],
 97 |                     "oldCode": highlighted_old,
 98 |                     "newCode": highlighted_new
 99 |                 })
100 | 
101 |         elif action["action"] == "Insert":
102 |             new_line_number = action["line"] - 1
103 | 
104 |             if 0 <= new_line_number < len(highlighted_class2):
105 |                 highlighted_new = f"<span class='inserted-line'>+ {class2_content[new_line_number]}</span>"
106 |                 highlighted_class2[new_line_number] = highlighted_new
107 | 
108 |                 highlighted_diffs.append({
109 |                     "oldLine": None,
110 |                     "newLine": action["line"],
111 |                     "oldCode": "",
112 |                     "newCode": highlighted_new
113 |                 })
114 | 
115 |         elif action["action"] == "Delete":
116 |             old_line_number = action["line"] - 1
117 | 
118 |             if 0 <= old_line_number < len(highlighted_class1):
119 |                 highlighted_old = f"<span class='deleted-line'>- {class1_content[old_line_number]}</span>"
120 |                 highlighted_class1[old_line_number] = highlighted_old
121 | 
122 |                 highlighted_diffs.append({
123 |                     "oldLine": action["line"],
124 |                     "newLine": None,
125 |                     "oldCode": highlighted_old,
126 |                     "newCode": ""
127 |                 })
128 | 
129 |     highlighted_class1_content = '\n'.join(highlighted_class1)
130 |     highlighted_class2_content = '\n'.join(highlighted_class2)
131 | 
132 |     return render_template('index.html', class1=highlighted_class1_content, class2=highlighted_class2_content, diffs=highlighted_diffs)
133 | 
134 | if __name__ == '__main__':
135 |     app.run(debug=True)
136 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/graph/GraphGenerator.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.graph;
  2 | 
  3 | import soot.Body;
  4 | import soot.SootClass;
  5 | import soot.SootMethod;
  6 | import soot.Unit;
  7 | import soot.toolkits.graph.*;
  8 | import soot.toolkits.graph.pdg.MHGDominatorTree;
  9 | import soot.toolkits.graph.pdg.PDGNode;
 10 | import soot.toolkits.scalar.SimpleLocalDefs;
 11 | import soot.toolkits.scalar.SimpleLocalUses;
 12 | import soot.toolkits.scalar.UnitValueBoxPair;
 13 | 
 14 | import java.util.ArrayList;
 15 | import java.util.HashMap;
 16 | import java.util.List;
 17 | import java.util.Map;
 18 | 
 19 | /**
 20 |  * GraphGenerator class to generate a Program Dependency Graph (PDG) for a specific method
 21 |  */
 22 | public class GraphGenerator {
 23 | 
 24 |     // enum for dependency types
 25 |     public enum DependencyTypes {
 26 |         CONTROL_DEPENDENCY,
 27 |         DATA_DEPENDENCY
 28 |     }
 29 | 
 30 | 
 31 |     public static PDG constructPdg(SootMethod method) {
 32 |         Body body = method.retrieveActiveBody();
 33 |         System.out.println("Generating PDG for method: " + method.getName());
 34 |         UnitGraph eug = new ExceptionalUnitGraph(body);
 35 | 
 36 |         // soots api for creating postdominator tree
 37 |         MHGDominatorTree<Unit> postdominatorTree = new MHGDominatorTree(new MHGPostDominatorsFinder(eug));
 38 | 
 39 |         //get dominance frontiers based on the postdominator tree, equivalent to using it
 40 |         DominanceFrontier<Unit> dominanceFrontier = new CytronDominanceFrontier<>(postdominatorTree);
 41 | 
 42 |         PDG pdg = new PDG();
 43 |         pdg.setCFG(eug);
 44 | 
 45 |         SimpleLocalDefs definitions = new SimpleLocalDefs(eug);
 46 |         SimpleLocalUses uses = new SimpleLocalUses(body, definitions);
 47 | 
 48 |         Map<Unit, PDGNode> unitToNodeMap = new HashMap<>();
 49 | 
 50 |         PDGNode startNode = null;
 51 | 
 52 |         // building a list of units in the order they appear to properly detect the backedges
 53 |         List<Unit> allUnits = new ArrayList<>(body.getUnits());
 54 |         Map<Unit, Integer> unitOrder = new HashMap<>();
 55 |         int idx = 0;
 56 |         for (Unit u : allUnits) {
 57 |             unitOrder.put(u, idx++);
 58 |         }
 59 | 
 60 |         for (Unit unit : body.getUnits()) {
 61 | 
 62 |             PDGNode node = addOrGetNode(pdg, unit, unitToNodeMap);
 63 | 
 64 |             //add control dependencies based on dominance frontier
 65 |             for (DominatorNode<Unit> dode : dominanceFrontier.getDominanceFrontierOf(postdominatorTree.getDode(unit))) {
 66 |                 Unit frontier = dode.getGode();
 67 |                 PDGNode frontierNode = addOrGetNode(pdg, frontier, unitToNodeMap);
 68 | 
 69 |                 if (!pdg.containsEdge(frontierNode, node, DependencyTypes.CONTROL_DEPENDENCY)) {
 70 |                     // TODO: this isnt probably bang on, but need some 'start node' to be set. taking the first unit often leads to disconnected graphs
 71 |                     if (startNode == null) {
 72 |                         startNode = frontierNode;
 73 |                         pdg.startNode = startNode;
 74 |                     }
 75 |                     pdg.addEdge(frontierNode, node, DependencyTypes.CONTROL_DEPENDENCY);
 76 |                     frontierNode.addDependent(node);
 77 | 
 78 |                     System.out.println("Control Dependency: " + frontierNode + " -> " + node);
 79 | 
 80 |                     // preliminary but if fronteid is earlier or equal to 'unit', treat it as a backedge
 81 |                     if (isBackEdge(frontier, unit, unitOrder)) {
 82 |                         node.addBackDependent(frontierNode);
 83 |                         System.out.println("  => (Back edge) " + node + " <- " + frontierNode);
 84 |                     }
 85 |                 }
 86 |             }
 87 | 
 88 |             // add data dependencies based on uses
 89 |             for (UnitValueBoxPair unitValueBoxPair : uses.getUsesOf(unit)) {
 90 |                 Unit useUnit = unitValueBoxPair.unit;
 91 |                 PDGNode useNode = addOrGetNode(pdg, useUnit, unitToNodeMap);
 92 | 
 93 |                 if (!pdg.containsEdge(node, useNode, DependencyTypes.DATA_DEPENDENCY)) {
 94 |                     // TODO: this isnt probably bang on, but need some 'start node' to be set. taking the first unit often leads to disconnected graphs
 95 |                     if (startNode == null) {
 96 |                         startNode = node;
 97 |                         pdg.startNode = startNode;
 98 |                     }
 99 |                     pdg.addEdge(node, useNode, DependencyTypes.DATA_DEPENDENCY);
100 |                     node.addDependent(useNode);
101 |                     System.out.println("Data Dependency: " + node + " -> " + useNode);
102 | 
103 |                     // preliminary but if 'useUnit' is earlier or equal to 'unit', treat it as a backedge
104 |                     if (isBackEdge(useUnit, unit, unitOrder)) {
105 |                         useNode.addBackDependent(node);
106 |                         System.out.println("  => (Data Back edge) " + useNode + " <- " + node);
107 |                     }
108 |                 }
109 |             }
110 |         }
111 |         return pdg;
112 |     }
113 | 
114 |     private static boolean isBackEdge(Unit frontier, Unit unit, Map<Unit, Integer> orderMap) {
115 |         // return true if 'frontier' is same or earlier in the ordering => likely a backward/loop edge.
116 |         return orderMap.get(frontier) <= orderMap.get(unit);
117 |     }
118 | 
119 |     private static PDGNode addOrGetNode(PDG pdg, Unit unit, Map<Unit, PDGNode> unitToNodeMap) {
120 |         PDGNode node = unitToNodeMap.get(unit);
121 |         if (node == null) {
122 |             // create a new PDGNode for this Unit
123 |             node = new PDGNode(unit, PDGNode.Type.CFGNODE);
124 |             unitToNodeMap.put(unit, node);
125 | 
126 |             // add the node to the PDG if it is not already there
127 |             if (!pdg.containsNode(node)) {
128 |                 pdg.addNode(node);
129 |             }
130 |         }
131 |         return node;
132 |     }
133 | }
134 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/graph/CycleDetection.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.graph;
  2 | 
  3 | import soot.Unit;
  4 | import soot.tagkit.LineNumberTag;
  5 | import soot.tagkit.Tag;
  6 | import soot.toolkits.graph.pdg.PDGNode;
  7 | 
  8 | import java.util.*;
  9 | 
 10 | // implements Tarjans algorithm for detection of strongly connected components
 11 | public class CycleDetection {
 12 | 
 13 |     private static boolean debug = true;
 14 |     private static int index = 0;
 15 |     // usin IdentityHashMap because PDGNode doesnt implement equals and hashCode
 16 |     private static Map<PDGNode, Integer> indices = new IdentityHashMap<>();
 17 |     private static Map<PDGNode, Integer> lowLinks = new IdentityHashMap<>();
 18 |     private static Deque<PDGNode> stack = new ArrayDeque<>();
 19 |     private static Set<Set<PDGNode>> stronglyConnectedComponents = new HashSet<>();
 20 | 
 21 |     public static void setLogging(boolean enable) {
 22 |         debug = enable;
 23 |     }
 24 | 
 25 |     public static boolean hasCycle(PDG pdg) {
 26 |         if (debug) System.out.println("[CycleDetection] Detecting cycles using Tarjan's Algorithm");
 27 | 
 28 |         index = 0;
 29 |         indices.clear();
 30 |         lowLinks.clear();
 31 |         stack.clear();
 32 |         stronglyConnectedComponents.clear();
 33 | 
 34 |         List<PDGNode> allNodes = GraphTraversal.collectNodesBFS(pdg);
 35 | 
 36 |         // tarjan's algorithm starting from each node
 37 |         for (PDGNode node : allNodes) {
 38 |             if (!indices.containsKey(node)) {
 39 |                 strongConnect(node, pdg);
 40 |             }
 41 |         }
 42 | 
 43 |         // check if any strongly connected component is a cycle
 44 |         boolean hasCycle = false;
 45 |         int maxCycleSize = 0;
 46 |         List<Set<PDGNode>> maxSizeSCCs = new ArrayList<>();
 47 | 
 48 |         for (Set<PDGNode> scc : stronglyConnectedComponents) {
 49 |             if (scc.size() > 1 || hasSelfLoop(scc, pdg)) {
 50 |                 hasCycle = true;
 51 | 
 52 |                 int sccSize = scc.size();
 53 |                 if (sccSize > maxCycleSize) {
 54 |                     maxCycleSize = sccSize;
 55 |                     maxSizeSCCs.clear();
 56 |                     maxSizeSCCs.add(scc);
 57 |                 } else if (sccSize == maxCycleSize) {
 58 |                     maxSizeSCCs.add(scc);
 59 |                 }
 60 | 
 61 |                 if (debug) {
 62 |                     System.out.println("[CycleDetection] Cycle detected in SCC:");
 63 |                     for (PDGNode node : scc) {
 64 |                         int lineNumber = getLineNumberFromPDGNode(node);
 65 |                         if (lineNumber != -1) {
 66 |                             System.out.println("  Node: " + node + " at line " + lineNumber);
 67 |                         } else {
 68 |                             System.out.println("  Node: " + node + " (line number not available)");
 69 |                         }
 70 |                     }
 71 |                 }
 72 |             }
 73 |         }
 74 | 
 75 |         if (hasCycle && debug) {
 76 |             System.out.println("[CycleDetection] Largest cycle size: " + maxCycleSize);
 77 |             for (Set<PDGNode> scc : maxSizeSCCs) {
 78 |                 System.out.println("[CycleDetection] -> Largest cycle detected in this SCC:");
 79 |                 for (PDGNode node : scc) {
 80 |                     int lineNumber = getLineNumberFromPDGNode(node);
 81 |                     if (lineNumber != -1) {
 82 |                         System.out.println("  Node: " + node + " at line " + lineNumber);
 83 |                     } else {
 84 |                         System.out.println("  Node: " + node + " (line number not available)");
 85 |                     }
 86 |                 }
 87 |             }
 88 |         }
 89 | 
 90 |         if (!hasCycle && debug) {
 91 |             System.out.println("[CycleDetection] No cycles detected in graph");
 92 |         }
 93 | 
 94 |         return hasCycle;
 95 |     }
 96 | 
 97 |     // gather SCCs
 98 |     private static void strongConnect(PDGNode node, PDG pdg) {
 99 |         indices.put(node, index);
100 |         lowLinks.put(node, index);
101 |         index++;
102 |         stack.push(node);
103 | 
104 |         List<PDGNode> successors = pdg.getSuccsOf(node);
105 | 
106 |         for (PDGNode dependent : successors) {
107 |             if (!indices.containsKey(dependent)) {
108 |                 strongConnect(dependent, pdg);
109 |                 lowLinks.put(node, Math.min(lowLinks.get(node), lowLinks.get(dependent)));
110 |             } else if (stack.contains(dependent)) {
111 |                 lowLinks.put(node, Math.min(lowLinks.get(node), indices.get(dependent)));
112 |             }
113 |         }
114 | 
115 |         // if node is a root node, pop the stack and generate an SCC
116 |         if (lowLinks.get(node).equals(indices.get(node))) {
117 |             Set<PDGNode> scc = new HashSet<>();
118 |             PDGNode w;
119 |             do {
120 |                 w = stack.pop();
121 |                 scc.add(w);
122 |             } while (w != node);
123 |             stronglyConnectedComponents.add(scc);
124 |         }
125 |     }
126 | 
127 |     private static boolean hasSelfLoop(Set<PDGNode> scc, PDG pdg) {
128 |         for (PDGNode node : scc) {
129 |             for (PDGNode succ : pdg.getSuccsOf(node)) {
130 |                 if (node == succ) { // should be identity comparison
131 |                     return true;
132 |                 }
133 |             }
134 |         }
135 |         return false;
136 |     }
137 | 
138 |     private static int getLineNumberFromPDGNode(PDGNode node) {
139 |         if (node.getType() == PDGNode.Type.CFGNODE) {
140 |             Unit headUnit = (Unit) node.getNode();
141 |             if (headUnit != null) {
142 |                 Tag tag = headUnit.getTag("LineNumberTag");
143 |                 if (tag instanceof LineNumberTag) {
144 |                     LineNumberTag lineNumberTag = (LineNumberTag) tag;
145 |                     return lineNumberTag.getLineNumber();
146 |                 }
147 |             }
148 |         }
149 |         return -1;
150 |     }
151 | }
152 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/export/EditScriptExporter.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.export;
  2 | 
  3 | import org.pdgdiff.edit.model.EditOperation;
  4 | import org.pdgdiff.graph.PDG;
  5 | import org.pdgdiff.io.JsonOperationSerializer;
  6 | import org.pdgdiff.io.OperationSerializer;
  7 | import org.pdgdiff.matching.GraphMapping;
  8 | import org.pdgdiff.matching.StrategySettings;
  9 | 
 10 | import java.io.BufferedWriter;
 11 | import java.io.FileWriter;
 12 | import java.io.IOException;
 13 | import java.io.Writer;
 14 | import java.nio.file.Files;
 15 | import java.nio.file.Paths;
 16 | import java.nio.file.StandardCopyOption;
 17 | import java.util.List;
 18 | 
 19 | import static org.pdgdiff.export.ExportUtils.generateHash;
 20 | 
 21 | public class EditScriptExporter {
 22 | 
 23 |     private static final int MAX_FILENAME_LENGTH = 255; // probably max, otherwise sometimes have issues with OS FS
 24 | 
 25 | 
 26 |     public static void exportEditScript(List<EditOperation> editScript, String method1Signature, String method2Signature, StrategySettings strategySettings) {
 27 |         // Sanitize method names for use in filenames
 28 |         String method1Safe = method1Signature.replaceAll("[^a-zA-Z0-9\\.\\-]", "_");
 29 |         String method2Safe = method2Signature.replaceAll("[^a-zA-Z0-9\\.\\-]", "_");
 30 | 
 31 |         String outputDir = "out/";
 32 |         String filename = outputDir + "editScript_" + method1Safe + "_to_" + method2Safe + ".json";
 33 | 
 34 |         // check if too long, otherwise will fail
 35 |         if (filename.length() > MAX_FILENAME_LENGTH) {
 36 |             String method1Abbrev = generateHash(method1Safe);
 37 |             System.out.println("Method name too big to save to file, hashed;" + method1Safe + " -> " + method1Abbrev);
 38 |             String method2Abbrev = generateHash(method2Safe);
 39 |             System.out.println("Method name too big to save to file, hashed;" + method2Safe + " -> " + method2Abbrev);
 40 |             filename = outputDir + "editScript_" + method1Abbrev + "_to_" + method2Abbrev + "_concat.json";
 41 |         }
 42 | 
 43 |         try (Writer writer = new FileWriter(filename)) {
 44 |             OperationSerializer serializer = new JsonOperationSerializer(editScript, strategySettings);
 45 |             serializer.writeTo(writer);
 46 |             System.out.println("Edit script exported to: " + filename);
 47 |         } catch (Exception e) {
 48 |             System.err.println("Failed to export edit script to " + filename);
 49 |             e.printStackTrace();
 50 |         }
 51 |     }
 52 | 
 53 | 
 54 |     public static void exportGraphMappings(GraphMapping graphMapping, List<PDG> pdgList1, List<PDG> pdgList2, String outputDir) {
 55 |         String filename = outputDir + "graphMappings.txt";
 56 | 
 57 |         // for multi-class graph matchings, we append to the file
 58 |         try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename, true))) {
 59 |             writer.write("Graph Mappings (Before -> After):\n");
 60 | 
 61 |             graphMapping.getGraphMapping().forEach((srcPDG, dstPDG) -> {
 62 |                 try {
 63 |                     String srcMethodSignature = srcPDG.getCFG().getBody().getMethod().getSignature();
 64 |                     String dstMethodSignature = dstPDG.getCFG().getBody().getMethod().getSignature();
 65 |                     writer.write(srcMethodSignature + " -> " + dstMethodSignature + "\n");
 66 |                 } catch (IOException e) {
 67 |                     System.err.println("Error writing mapping to file: " + e.getMessage());
 68 |                 }
 69 |             });
 70 | 
 71 |             writer.write("\nUnmatched Graphs in Source:\n");
 72 |             pdgList1.stream()
 73 |                     .filter(pdg -> !graphMapping.getGraphMapping().containsKey(pdg))
 74 |                     .forEach(pdg -> {
 75 |                         try {
 76 |                             String methodSignature = pdg.getCFG().getBody().getMethod().getSignature();
 77 |                             writer.write(methodSignature + "\n");
 78 |                         } catch (IOException e) {
 79 |                             System.err.println("Error writing unmatched source graph to file: " + e.getMessage());
 80 |                         }
 81 |                     });
 82 | 
 83 |             writer.write("\nUnmatched Graphs in Destination:\n");
 84 |             pdgList2.stream()
 85 |                     .filter(pdg -> !graphMapping.getGraphMapping().containsValue(pdg))
 86 |                     .forEach(pdg -> {
 87 |                         try {
 88 |                             String methodSignature = pdg.getCFG().getBody().getMethod().getSignature();
 89 |                             writer.write(methodSignature + "\n");
 90 |                         } catch (IOException e) {
 91 |                             System.err.println("Error writing unmatched destination graph to file: " + e.getMessage());
 92 |                         }
 93 |                     });
 94 | 
 95 |             System.out.println("Graph mappings exported to: " + filename);
 96 |         } catch (IOException e) {
 97 |             System.err.println("Failed to export graph mappings to " + filename);
 98 |         }
 99 |     }
100 | 
101 | 
102 | 
103 |     public static void writeAggregatedEditScript(List<EditOperation> aggregatedEditScripts, String filename, StrategySettings strategySettings) {
104 |         try (Writer writer = new FileWriter(filename)) {
105 |             OperationSerializer serializer = new JsonOperationSerializer(aggregatedEditScripts, strategySettings);
106 |             serializer.writeTo(writer);
107 |             System.out.println("Edit script exported to: " + filename);
108 |         } catch (Exception e) {
109 |             System.err.println("Failed to export edit script to " + filename);
110 |             e.printStackTrace();
111 |         }
112 |     }
113 | 
114 |     public static void copyResultsToOutput(String beforeSourceDir, String afterSourceDir) {
115 |         try {
116 |             Files.copy(Paths.get(beforeSourceDir), Paths.get("py-visualise/testclasses/TestFileBefore.java"), StandardCopyOption.REPLACE_EXISTING);
117 |             Files.copy(Paths.get(afterSourceDir), Paths.get("py-visualise/testclasses/TestFileAfter.java"), StandardCopyOption.REPLACE_EXISTING);
118 |             Files.copy(Paths.get("out/diff.json"), Paths.get("py-visualise/out/diff.json"), StandardCopyOption.REPLACE_EXISTING);
119 |             System.out.println(" --> results copied to python visualiser");
120 |         } catch (IOException e) {
121 |             System.err.println("An error occurred while copying the source files to the output folder: " + e.getMessage());
122 |             e.printStackTrace();
123 | 
124 |         }
125 |     }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/ged/GEDMatcher.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.matching.models.ged;
  2 | 
  3 | import org.pdgdiff.matching.NodeFeasibility;
  4 | import org.pdgdiff.graph.GraphTraversal;
  5 | import org.pdgdiff.graph.PDG;
  6 | import org.pdgdiff.matching.NodeMapping;
  7 | import soot.toolkits.graph.pdg.PDGNode;
  8 | 
  9 | import java.util.ArrayList;
 10 | import java.util.List;
 11 | import java.util.Map;
 12 | 
 13 | import static org.pdgdiff.matching.models.heuristic.JaroWinklerSimilarity.JaroWinklerSimilarity;
 14 | 
 15 | /**
 16 |  * Performs a Graph Edit Distance node alignment between two PDGs.
 17 |  * Returns a GEDResult containing the (distance, nodeMapping).
 18 |  *
 19 |  */
 20 | public class GEDMatcher {
 21 | 
 22 |     private final PDG srcPdg;
 23 |     private final PDG dstPdg;
 24 | 
 25 |     public GEDMatcher(PDG srcPdg, PDG dstPdg) {
 26 |         this.srcPdg = srcPdg;
 27 |         this.dstPdg = dstPdg;
 28 |     }
 29 | 
 30 |     // find edit distance and return node mappings
 31 |     public GEDResult match() {
 32 |         List<PDGNode> srcNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(srcPdg));
 33 |         List<PDGNode> dstNodes = new ArrayList<>(GraphTraversal.collectNodesBFS(dstPdg));
 34 | 
 35 |         int n1 = srcNodes.size();
 36 |         int n2 = dstNodes.size();
 37 | 
 38 |         // create square cost mat of n x n size, must be square for Hungarian algo
 39 |         // NOTE because its square there is going to be some dummy nodes (where its padded, pdg prob doesnt produce square mat)
 40 |         int n = Math.max(n1, n2);
 41 |         double[][] squareMatrix = new double[n][n];
 42 | 
 43 |         // deletion and insertion costs, todo tune these
 44 |         double insertionCostVal = 1.0;
 45 |         double deletionCostVal  = 1.0;
 46 | 
 47 |         // fill the "real" submatrix of the cost matrix (where i < n1 and j < n2) with substitution costs for each node pair
 48 |         for (int i = 0; i < n1; i++) {
 49 |             for (int j = 0; j < n2; j++) {
 50 |                 squareMatrix[i][j] = substitutionCost(srcNodes.get(i), dstNodes.get(j));
 51 |             }
 52 |         }
 53 | 
 54 |         // fill extra non-match rows/columns with insertion/deletion costs
 55 |         for (int i = 0; i < n; i++) {
 56 |             for (int j = 0; j < n; j++) {
 57 | 
 58 |                 // if within real submatrix, i.e. no dummies,  just covered it in loop above
 59 |                 if (i < n1 && j < n2) {
 60 |                     continue;
 61 |                 }
 62 | 
 63 |                 // i < n1 but j >= n2 => "dummy" node in PDG2 => old node i must be deleted
 64 |                 if (i < n1 && j >= n2) {
 65 |                     squareMatrix[i][j] = deletionCostVal;
 66 |                 }
 67 |                 // i >= n1 but j < n2 => "dummy" node in PDG1 => new node j must be inserted
 68 |                 else if (i >= n1 && j < n2) {
 69 |                     squareMatrix[i][j] = insertionCostVal;
 70 |                 }
 71 |                 // i >= n1 && j >= n2 => both dummy => set cost = 0 or could change this to some small cost
 72 |                 else if (i >= n1 && j >= n2) {
 73 |                     squareMatrix[i][j] = 0.0;
 74 |                 }
 75 |             }
 76 |         }
 77 | 
 78 |         // solving the assignment on the n x n square matrix
 79 |         int[] assignment = HungarianAlgorithm.minimizeAssignment(squareMatrix);
 80 | 
 81 |         NodeMapping nodeMapping = new NodeMapping();
 82 |         double totalCost = 0.0;
 83 | 
 84 | 
 85 |         // checking for real vs dummy nodes
 86 |         for (int i = 0; i < n; i++) {
 87 |             // 'assignment[i] = j' means row i is matched to column j. each i, j  in [0..n)
 88 |             int j = assignment[i];
 89 |             if (j < 0) {
 90 |                 continue;
 91 |             }
 92 |             double cost = squareMatrix[i][j];
 93 |             totalCost += cost;
 94 | 
 95 |             // If i < n1 and j < n2 its within range of 'real' submat=> real node match => substitution
 96 |             if (i < n1 && j < n2) {
 97 |                 nodeMapping.addMapping(srcNodes.get(i), dstNodes.get(j));
 98 |             }
 99 |             // todo: I believe inserts and deletes shouldn't be added to node mapping,
100 |             //  and that their absence will be handled as insertions/deletions in the final mapping
101 |             // unmatched nodes in Nodemapping will be handled as insertions/ deletions
102 |             // if i < n1 && j >= n2 => deletion (old node i matched to dummy)
103 |             // if i >= n1 && j < n2 => insertion (new node j matched to dummy)
104 |             // if both dummy => ignore
105 |         }
106 | 
107 |         // penalise matched nodes that have considerable semantic differences by inspecting edges
108 |         double edgePenalty = computeEdgeMismatchPenalty(nodeMapping);
109 |         totalCost += edgePenalty;
110 | 
111 |         return new GEDResult(totalCost, nodeMapping);
112 |     }
113 | 
114 |     /**
115 |      * returns substitution cost between two nodes.
116 |      * considers node label similarity and node category similarity.
117 |      */
118 |     private double substitutionCost(PDGNode n1, PDGNode n2) {
119 |         // base cost if categories differ
120 |         if (!NodeFeasibility.isSameNodeCategory(n1, n2)) {
121 |             return 1.0;  // or big penalty
122 |         }
123 | 
124 |         // compare the node "type" or attribute
125 |         double attributePenalty = n1.getAttrib().equals(n2.getAttrib()) ? 0.0 : 0.8;
126 | 
127 |         // get the textual content to compare.
128 |         String label1 = extractRelevantLabel(n1);
129 |         String label2 = extractRelevantLabel(n2);
130 | 
131 |         double sim = JaroWinklerSimilarity(label1, label2); // in [0..1], higher=better
132 |         double stringCost = 1.0 - sim; // bigger difference -> bigger cost
133 | 
134 |         double alpha = 0.1;  // weighting for syntactic differences, i.e. string difference
135 |         double beta  = 0.9;  // weighting for semantic difference, i.e. attribute difference
136 | 
137 |         return alpha * stringCost + beta * attributePenalty;
138 |     }
139 | 
140 |     private String extractRelevantLabel(PDGNode node) {
141 |         // remove beginning of  'Type: CFGNODE: <code begins here>'
142 |         return node.toString().substring(15);
143 |     }
144 | 
145 |     /**
146 |      * check for edges and inforce mismatch penalty
147 |      */
148 |     private double computeEdgeMismatchPenalty(NodeMapping mapping) {
149 |         double mismatchCost = 0.0;
150 |         double edgePenalty = 0.5;  // this is complete guess work.
151 | 
152 |         Map<PDGNode, PDGNode> forwardMap = mapping.getNodeMapping();
153 | 
154 |         // for each mapped edge (n1->m1) in old, see if (n2->m2) exists in new
155 |         for (PDGNode oldSrc : forwardMap.keySet()) {
156 |             PDGNode newSrc = forwardMap.get(oldSrc);
157 | 
158 |             for (PDGNode oldTgt : oldSrc.getDependents()) {
159 |                 PDGNode newTgt = forwardMap.get(oldTgt);
160 |                 if (newTgt != null) {
161 |                     // if the new edge does not exist, penalize
162 |                     if (!newSrc.getDependents().contains(newTgt)) {
163 |                         mismatchCost += edgePenalty;
164 |                     }
165 |                 }
166 |             }
167 |         }
168 |         // todo possibly add checks for edges that are in new pdg, but not in old pdg (vice versa)
169 |         //  can use mappings.getReverseNodeMapping()
170 | 
171 |         return mismatchCost;
172 |     }
173 | }
174 | 


--------------------------------------------------------------------------------
/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="Palette2">
  4 |     <group name="Swing">
  5 |       <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
  6 |         <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
  7 |       </item>
  8 |       <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
  9 |         <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
 10 |       </item>
 11 |       <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
 12 |         <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
 13 |       </item>
 14 |       <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
 15 |         <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
 16 |       </item>
 17 |       <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
 18 |         <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
 19 |         <initial-values>
 20 |           <property name="text" value="Button" />
 21 |         </initial-values>
 22 |       </item>
 23 |       <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
 24 |         <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
 25 |         <initial-values>
 26 |           <property name="text" value="RadioButton" />
 27 |         </initial-values>
 28 |       </item>
 29 |       <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
 30 |         <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
 31 |         <initial-values>
 32 |           <property name="text" value="CheckBox" />
 33 |         </initial-values>
 34 |       </item>
 35 |       <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
 36 |         <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
 37 |         <initial-values>
 38 |           <property name="text" value="Label" />
 39 |         </initial-values>
 40 |       </item>
 41 |       <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
 42 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
 43 |           <preferred-size width="150" height="-1" />
 44 |         </default-constraints>
 45 |       </item>
 46 |       <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
 47 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
 48 |           <preferred-size width="150" height="-1" />
 49 |         </default-constraints>
 50 |       </item>
 51 |       <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
 52 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
 53 |           <preferred-size width="150" height="-1" />
 54 |         </default-constraints>
 55 |       </item>
 56 |       <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
 57 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 58 |           <preferred-size width="150" height="50" />
 59 |         </default-constraints>
 60 |       </item>
 61 |       <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
 62 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 63 |           <preferred-size width="150" height="50" />
 64 |         </default-constraints>
 65 |       </item>
 66 |       <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
 67 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 68 |           <preferred-size width="150" height="50" />
 69 |         </default-constraints>
 70 |       </item>
 71 |       <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
 72 |         <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
 73 |       </item>
 74 |       <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
 75 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 76 |           <preferred-size width="150" height="50" />
 77 |         </default-constraints>
 78 |       </item>
 79 |       <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
 80 |         <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
 81 |           <preferred-size width="150" height="50" />
 82 |         </default-constraints>
 83 |       </item>
 84 |       <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
 85 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 86 |           <preferred-size width="150" height="50" />
 87 |         </default-constraints>
 88 |       </item>
 89 |       <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
 90 |         <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
 91 |           <preferred-size width="200" height="200" />
 92 |         </default-constraints>
 93 |       </item>
 94 |       <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
 95 |         <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
 96 |           <preferred-size width="200" height="200" />
 97 |         </default-constraints>
 98 |       </item>
 99 |       <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
100 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
101 |       </item>
102 |       <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
103 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
104 |       </item>
105 |       <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
106 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
107 |       </item>
108 |       <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
109 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
110 |       </item>
111 |       <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
112 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
113 |           <preferred-size width="-1" height="20" />
114 |         </default-constraints>
115 |       </item>
116 |       <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
117 |         <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
118 |       </item>
119 |       <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
120 |         <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
121 |       </item>
122 |     </group>
123 |   </component>
124 | </project>


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/DiffEngine.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.matching;
  2 | 
  3 | import org.pdgdiff.edit.ClassMetadataDiffGenerator;
  4 | import org.pdgdiff.edit.EditDistanceCalculator;
  5 | import org.pdgdiff.edit.EditScriptGenerator;
  6 | import org.pdgdiff.edit.RecoveryProcessor;
  7 | import org.pdgdiff.edit.model.EditOperation;
  8 | import org.pdgdiff.export.DiffGraphExporter;
  9 | import org.pdgdiff.graph.CycleDetection;
 10 | import org.pdgdiff.graph.GraphTraversal;
 11 | import org.pdgdiff.graph.PDG;
 12 | import soot.SootClass;
 13 | 
 14 | import soot.SootMethod;
 15 | 
 16 | import java.io.*;
 17 | import java.util.ArrayList;
 18 | import java.util.List;
 19 | import java.util.stream.Collectors;
 20 | 
 21 | import static org.pdgdiff.export.EditScriptExporter.*;
 22 | 
 23 | public class DiffEngine {
 24 | 
 25 |     private static final List<EditOperation> aggregatedEditScripts = new ArrayList<>();
 26 |     private static final boolean debug = false; // setting for development
 27 | 
 28 | 
 29 |     public static void difference(List<PDG> pdgList1, List<PDG> pdgList2,
 30 |                                   StrategySettings strategySettings, String srcSourceFilePath, String dstSourceFilePath) throws IOException {
 31 | 
 32 |         GraphMatcher matcher = GraphMatcherFactory.createMatcher(strategySettings.matchingStrategy, pdgList1, pdgList2);
 33 |         // for each graph print the size and if it has a cycle (debug mode)
 34 |         if (debug) pdgList1.forEach(pdg -> {
 35 |             System.out.println("------");
 36 |             System.out.println(pdg.getCFG().getBody().getMethod().getSignature());
 37 |             System.out.println("Node count" + GraphTraversal.getNodeCount(pdg));
 38 |             CycleDetection.hasCycle(pdg);
 39 |         });
 40 |         // perform the actual graph matching
 41 |         System.out.println("-> Beginning matching PDGs using strategy: " + strategySettings.matchingStrategy);
 42 |         GraphMapping graphMapping = matcher.matchPDGLists();
 43 | 
 44 |         // TODO: clean up debug print stmts
 45 |         System.out.println("--> Graph matching complete using strategy: " + strategySettings.matchingStrategy);
 46 | 
 47 |         // handle unmatched graphs, i.e. additions or deletions of methods to the versions
 48 |         List<PDG> unmatchedInList1 = pdgList1.stream()
 49 |                 .filter(pdg -> !graphMapping.getGraphMapping().containsKey(pdg))
 50 |                 .collect(Collectors.toList());
 51 | 
 52 |         List<PDG> unmatchedInList2 = pdgList2.stream()
 53 |                 .filter(pdg -> !graphMapping.getGraphMapping().containsValue(pdg))
 54 |                 .collect(Collectors.toList());
 55 | 
 56 |         // generate edit scripts for unmatched methods discovered in above statements
 57 |         generateEditScriptsForUnmatched(unmatchedInList1, unmatchedInList2, srcSourceFilePath, dstSourceFilePath, strategySettings);
 58 |         exportGraphMappings(graphMapping, pdgList1, pdgList2, "out/");
 59 | 
 60 |         DiffGraphExporter.exportDiffPDGs(
 61 |                 graphMapping,
 62 |                 pdgList1,
 63 |                 pdgList2,
 64 |                 "out/delta-graphs/"
 65 |         );
 66 | 
 67 |         graphMapping.getGraphMapping().forEach((srcPDG, dstPDG) -> {
 68 |             String method1 = srcPDG.getCFG().getBody().getMethod().getSignature();
 69 |             String method2 = dstPDG.getCFG().getBody().getMethod().getSignature();
 70 |             System.out.println("---\n> PDG from class 1: " + method1 + " is matched with PDG from class 2: " + method2);
 71 |             if (debug) {
 72 |                 System.out.println(GraphTraversal.getNodeCount(srcPDG));
 73 |                 CycleDetection.hasCycle(srcPDG);
 74 |                 System.out.println(GraphTraversal.getNodeCount(dstPDG));
 75 |                 CycleDetection.hasCycle(dstPDG);
 76 |             }
 77 |             NodeMapping nodeMapping = graphMapping.getNodeMapping(srcPDG);
 78 |             if (nodeMapping != null) {
 79 |                 System.out.println("--- Node Mapping:");
 80 |                 nodeMapping.printMappings();
 81 | 
 82 |                 try {
 83 |                     SootMethod srcObj = srcPDG.getCFG().getBody().getMethod();
 84 |                     SootMethod destObj = dstPDG.getCFG().getBody().getMethod();
 85 | 
 86 |                     List<EditOperation> editScript = EditScriptGenerator.generateEditScript(srcPDG, dstPDG, graphMapping,
 87 |                             srcSourceFilePath, dstSourceFilePath, srcObj, destObj);
 88 | 
 89 |                     List<EditOperation> recoveredEditScript = RecoveryProcessor.recoverMappings(editScript, strategySettings.recoveryStrategy);
 90 | 
 91 |                     int editDistance = EditDistanceCalculator.calculateEditDistance(recoveredEditScript);
 92 |                     System.out.println("--- Edit information ---");
 93 |                     System.out.println("-- Edit Distance: " + editDistance);
 94 | 
 95 |                     System.out.println("-- Edit Script:");
 96 |                     for (EditOperation op : recoveredEditScript) {
 97 |                         System.out.println(op);
 98 |                     }
 99 | 
100 |                     // serialise and export
101 |                     aggregatedEditScripts.addAll(recoveredEditScript);
102 |                     exportEditScript(recoveredEditScript, method1, method2, strategySettings);
103 |                 } catch (Exception e) {
104 |                     e.printStackTrace();
105 |                 }
106 |             }
107 |         });
108 | 
109 |         // build edit script for class mappings at this point
110 |         if (!pdgList1.isEmpty() && !pdgList2.isEmpty()) {
111 |             SootClass srcClass = pdgList1.get(0).getCFG().getBody().getMethod().getDeclaringClass();
112 |             SootClass dstClass = pdgList2.get(0).getCFG().getBody().getMethod().getDeclaringClass();
113 | 
114 |             // TODO: if one of these is empty, i need to mark it as an insertion or deletion of the entire class.
115 |             //  so need to do a INSERT all or DELETE all for class metadata, this is currently not handled and only
116 |             //  approximate.
117 |             List<EditOperation> metadataScript = ClassMetadataDiffGenerator.generateClassMetadataDiff(srcClass, dstClass, srcSourceFilePath, dstSourceFilePath);
118 |             aggregatedEditScripts.addAll(metadataScript);
119 |             exportEditScript(metadataScript, "metadata", "metadata", null);
120 |         }
121 | 
122 |         if (strategySettings.isAggregateRecovery()) {
123 |             List<EditOperation> recAggregatedEditScripts = RecoveryProcessor.recoverMappings(aggregatedEditScripts, strategySettings.recoveryStrategy);
124 |             writeAggregatedEditScript(recAggregatedEditScripts, "out/diff.json", strategySettings);
125 |         } else {
126 |             writeAggregatedEditScript(aggregatedEditScripts, "out/diff.json", strategySettings);
127 |         }
128 |     }
129 | 
130 |     private static void generateEditScriptsForUnmatched(List<PDG> unmatchedInList1, List<PDG> unmatchedInList2,
131 |                                                         String srcSourceFilePath, String dstSourceFilePath, StrategySettings strategySettings) {
132 |         unmatchedInList1.forEach(pdg -> {
133 |             try {
134 |                 SootMethod method = pdg.getCFG().getBody().getMethod();
135 |                 String methodSignature = pdg.getCFG().getBody().getMethod().getSignature();
136 |                 System.out.println("Unmatched method in List 1 (to be deleted): " + methodSignature);
137 | 
138 |                 List<EditOperation> editScript = EditScriptGenerator.generateDeleteScript(pdg, srcSourceFilePath, method);
139 |                 List<EditOperation> recoveredEditScript = RecoveryProcessor.recoverMappings(editScript, strategySettings.recoveryStrategy);
140 |                 aggregatedEditScripts.addAll(recoveredEditScript);
141 |                 exportEditScript(recoveredEditScript, methodSignature, "DELETION", strategySettings);
142 |             } catch (Exception e) {
143 |                 System.err.println("Failed to generate delete script for unmatched method in List 1");
144 |                 e.printStackTrace();
145 |             }
146 |         });
147 | 
148 |         unmatchedInList2.forEach(pdg -> {
149 |             try {
150 |                 SootMethod method = pdg.getCFG().getBody().getMethod();
151 |                 String methodSignature = pdg.getCFG().getBody().getMethod().getSignature();
152 |                 System.out.println("Unmatched method in List 2 (to be added): " + methodSignature);
153 | 
154 |                 List<EditOperation> editScript = EditScriptGenerator.generateAddScript(pdg, dstSourceFilePath, method);
155 |                 List<EditOperation> recoveredEditScript = RecoveryProcessor.recoverMappings(editScript, strategySettings.recoveryStrategy);
156 |                 aggregatedEditScripts.addAll(recoveredEditScript);
157 |                 exportEditScript(recoveredEditScript, "INSERTION", methodSignature, strategySettings);
158 |             } catch (Exception e) {
159 |                 System.err.println("Failed to generate add script for unmatched method in List 2");
160 |                 e.printStackTrace();
161 |             }
162 |         });
163 |     }
164 | 
165 | 
166 | }
167 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/ClassMetadataDiffGenerator.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.edit;
  2 | 
  3 | import org.pdgdiff.edit.model.*;
  4 | import org.pdgdiff.util.CodeAnalysisUtils;
  5 | import org.pdgdiff.util.SourceCodeMapper;
  6 | import soot.Modifier;
  7 | import soot.SootClass;
  8 | import soot.SootField;
  9 | import soot.util.Chain;
 10 | 
 11 | import java.io.IOException;
 12 | import java.util.*;
 13 | 
 14 | public class ClassMetadataDiffGenerator {
 15 | 
 16 |     public static List<EditOperation> generateClassMetadataDiff(
 17 |             SootClass srcClass,
 18 |             SootClass dstClass,
 19 |             String srcSourceFilePath,
 20 |             String dstSourceFilePath
 21 |     ) throws IOException {
 22 |         Set<EditOperation> editScriptSet = new HashSet<>();
 23 | 
 24 |         SourceCodeMapper srcCodeMapper = new SourceCodeMapper(srcSourceFilePath);
 25 |         SourceCodeMapper dstCodeMapper = new SourceCodeMapper(dstSourceFilePath);
 26 | 
 27 |         // cmp class metadata
 28 |         compareClassMetadata(srcClass, dstClass, srcCodeMapper, dstCodeMapper, editScriptSet);
 29 | 
 30 |         // cmp fields
 31 |         compareFields(srcClass, dstClass, srcCodeMapper, dstCodeMapper, editScriptSet);
 32 | 
 33 |         return new ArrayList<>(editScriptSet);
 34 |     }
 35 | 
 36 |     private static void compareClassMetadata(
 37 |             SootClass srcClass,
 38 |             SootClass dstClass,
 39 |             SourceCodeMapper srcCodeMapper,
 40 |             SourceCodeMapper dstCodeMapper,
 41 |             Set<EditOperation> editScriptSet
 42 |     ) {
 43 |         // compare class modifiers
 44 |         if (srcClass.getModifiers() != dstClass.getModifiers()) {
 45 |             int srcClassLineNumber = CodeAnalysisUtils.getClassLineNumber(srcClass, srcCodeMapper);
 46 |             int dstClassLineNumber = CodeAnalysisUtils.getClassLineNumber(dstClass, dstCodeMapper);
 47 | 
 48 |             String srcClassDeclaration = CodeAnalysisUtils.getClassDeclaration(srcClass, srcCodeMapper);
 49 |             String dstClassDeclaration = CodeAnalysisUtils.getClassDeclaration(dstClass, dstCodeMapper);
 50 | 
 51 |             EditOperation classUpdate = new Update(
 52 |                     null, // no node associated
 53 |                     srcClassLineNumber,
 54 |                     dstClassLineNumber,
 55 |                     srcClassDeclaration,
 56 |                     dstClassDeclaration,
 57 |                     new SyntaxDifference("ClassMetadataDiff: Class modifiers differ")
 58 |             );
 59 | 
 60 |             editScriptSet.add(classUpdate);
 61 |         }
 62 |     }
 63 | 
 64 | 
 65 |     // in an ideal world this would also be able to compare uses of a field in the entire body, then I would be able to
 66 |     // account for rename refactors in the code base quite cleverly, maybe somethnig to look into.
 67 | 
 68 |     private static void compareFields(
 69 |             SootClass srcClass,
 70 |             SootClass dstClass,
 71 |             SourceCodeMapper srcCodeMapper,
 72 |             SourceCodeMapper dstCodeMapper,
 73 |             Set<EditOperation> editScriptSet
 74 |     ) {
 75 |         Chain<SootField> srcFields = srcClass.getFields();
 76 |         Chain<SootField> dstFields = dstClass.getFields();
 77 | 
 78 |         Map<String, SootField> srcFieldMap = new HashMap<>();
 79 |         Map<String, SootField> dstFieldMap = new HashMap<>();
 80 | 
 81 |         for (SootField field : srcFields) {
 82 |             srcFieldMap.put(field.getName(), field);
 83 |         }
 84 | 
 85 |         for (SootField field : dstFields) {
 86 |             dstFieldMap.put(field.getName(), field);
 87 |         }
 88 | 
 89 |         // matching fields by name, type, and modifiers to try and report update instructions where sensible
 90 |         Set<String> matchedFields = new HashSet<>();
 91 | 
 92 |         // firstly attempting to match by name
 93 |         for (SootField srcField : srcFields) {
 94 |             String fieldName = srcField.getName();
 95 |             SootField dstField = dstFieldMap.get(fieldName);
 96 | 
 97 |             if (dstField != null) {
 98 |                 matchedFields.add(fieldName);
 99 |                 if (!fieldsAreEqual(srcField, dstField)) {
100 |                     // update if field types or modifiers differ
101 |                     int oldLineNumber = CodeAnalysisUtils.getFieldLineNumber(srcField, srcCodeMapper);
102 |                     int newLineNumber = CodeAnalysisUtils.getFieldLineNumber(dstField, dstCodeMapper);
103 |                     String oldCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(srcField, srcCodeMapper);
104 |                     String newCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(dstField, dstCodeMapper);
105 |                     if (oldCodeSnippet.equals(newCodeSnippet)) {
106 |                         EditOperation fieldMove = new Move(
107 |                                 null,
108 |                                 oldLineNumber,
109 |                                 newLineNumber,
110 |                                 oldCodeSnippet
111 |                         );
112 |                         editScriptSet.add(fieldMove);
113 |                     } else {
114 |                         EditOperation fieldUpdate = new Update(
115 |                                 null,
116 |                                 oldLineNumber,
117 |                                 newLineNumber,
118 |                                 oldCodeSnippet,
119 |                                 newCodeSnippet,
120 |                                 new SyntaxDifference("ClassMetadataDiff: Field " + fieldName + " differs")
121 |                         );
122 |                         editScriptSet.add(fieldUpdate);
123 |                     }
124 |                 }
125 |             }
126 |         }
127 | 
128 |         // secondary matching by type / modifier
129 |         for (SootField srcField : srcFields) {
130 |             String fieldName = srcField.getName();
131 |             if (matchedFields.contains(fieldName)) continue;
132 | 
133 |             // look for a destination field with similar properties
134 |             SootField bestMatch = null;
135 |             for (SootField dstField : dstFields) {
136 |                 if (matchedFields.contains(dstField.getName())) continue;
137 | 
138 |                 if (fieldsAreSimilar(srcField, dstField)) {
139 |                     bestMatch = dstField;
140 |                     break;
141 |                 }
142 |             }
143 | 
144 |             if (bestMatch != null) {
145 |                 // field has a close match, so treat as an update
146 |                 matchedFields.add(bestMatch.getName());
147 |                 int oldLineNumber = CodeAnalysisUtils.getFieldLineNumber(srcField, srcCodeMapper);
148 |                 int newLineNumber = CodeAnalysisUtils.getFieldLineNumber(bestMatch, dstCodeMapper);
149 |                 String oldCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(srcField, srcCodeMapper);
150 |                 String newCodeSnippet = CodeAnalysisUtils.getFieldDeclaration(bestMatch, dstCodeMapper);
151 | 
152 |                 EditOperation fieldUpdate = new Update(
153 |                         null,
154 |                         oldLineNumber,
155 |                         newLineNumber,
156 |                         oldCodeSnippet,
157 |                         newCodeSnippet,
158 |                         new SyntaxDifference("ClassMetadataDiff: Field " + fieldName + " differs")
159 |                 );
160 |                 editScriptSet.add(fieldUpdate);
161 |             } else {
162 |                 // no similar field found, treat as a delete
163 |                 int lineNumber = CodeAnalysisUtils.getFieldLineNumber(srcField, srcCodeMapper);
164 |                 String codeSnippet = CodeAnalysisUtils.getFieldDeclaration(srcField, srcCodeMapper);
165 |                 editScriptSet.add(new Delete(null, lineNumber, codeSnippet));
166 |             }
167 |         }
168 | 
169 |         // cleanup with insertion operations
170 |         for (SootField dstField : dstFields) {
171 |             if (!matchedFields.contains(dstField.getName())) {
172 |                 int lineNumber = CodeAnalysisUtils.getFieldLineNumber(dstField, dstCodeMapper);
173 |                 String codeSnippet = CodeAnalysisUtils.getFieldDeclaration(dstField, dstCodeMapper);
174 |                 editScriptSet.add(new Insert(null, lineNumber, codeSnippet));
175 |             }
176 |         }
177 |     }
178 | 
179 | 
180 |     private static boolean fieldsAreSimilar(SootField field1, SootField field2) {
181 |         // check if same protectness and type
182 |         // cannot compare actual objects (getType()) because these are loaded in difference Soot Scenes, and hence dont
183 |         // hash as expected with .equals(), so using the string repr of each!!!
184 |         // todo check isStatic, isFinal, etc. and consider name, annotations, initial values
185 |         return  ((field1.getModifiers() & Modifier.PUBLIC) == (field2.getModifiers() & Modifier.PUBLIC) ||
186 |                 (field1.getModifiers() & Modifier.PRIVATE) == (field2.getModifiers() & Modifier.PRIVATE) ||
187 |                 (field1.getModifiers() & Modifier.PROTECTED) == (field2.getModifiers() & Modifier.PROTECTED))
188 |                 & (field1.getType().toString().equals(field2.getType().toString()));
189 |     }
190 | 
191 |     private static boolean fieldsAreEqual(SootField field1, SootField field2) {
192 |         // cmp field types
193 |         if (!field1.getType().equals(field2.getType())) {
194 |             return false;
195 |         }
196 |         // cmp modifiers
197 |         if (field1.getModifiers() != field2.getModifiers()) {
198 |             return false;
199 |         }
200 |         // TODO: cmp annotations or initial values if necessary
201 |         return true;
202 |     }
203 | }
204 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/util/CodeAnalysisUtils.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.util;
  2 | 
  3 | import soot.*;
  4 | import soot.tagkit.LineNumberTag;
  5 | 
  6 | import java.util.ArrayList;
  7 | import java.util.List;
  8 | import java.util.regex.Matcher;
  9 | import java.util.regex.Pattern;
 10 | 
 11 | 
 12 | /**
 13 |  * This class aims to assist with parsing when Soot struggles.
 14 |  * A lot of these functions are to supplement Soot when it struggles to parse, and have a O(n) complexity. As further
 15 |  * work this could probably be optimised further.
 16 |  */
 17 | public class CodeAnalysisUtils {
 18 | 
 19 |     public static int getClassLineNumber(SootClass sootClass, SourceCodeMapper codeMapper) {
 20 |         int lineNumber = sootClass.getJavaSourceStartLineNumber();
 21 |         if (lineNumber > 0) {
 22 |             return lineNumber;
 23 |         }
 24 | 
 25 |         // if line number is not directly available, search for it
 26 |         String className = sootClass.getShortName();
 27 |         String classPattern = String.format(".*\\b(class|interface|enum)\\b\\s+\\b%s\\b.*\\{", Pattern.quote(className));
 28 |         Pattern pattern = Pattern.compile(classPattern);
 29 | 
 30 |         int totalLines = codeMapper.getTotalLines();
 31 |         for (int i = 1; i <= totalLines; i++) {
 32 |             String line = codeMapper.getCodeLine(i).trim();
 33 |             Matcher matcher = pattern.matcher(line);
 34 |             if (matcher.matches()) {
 35 |                 return i;
 36 |             }
 37 |         }
 38 | 
 39 |         return -1;
 40 |     }
 41 | 
 42 |     public static String getClassDeclaration(SootClass sootClass, SourceCodeMapper codeMapper) {
 43 |         int lineNumber = getClassLineNumber(sootClass, codeMapper);
 44 |         if (lineNumber > 0) {
 45 |             return codeMapper.getCodeLine(lineNumber).trim();
 46 |         }
 47 |         return "";
 48 |     }
 49 | 
 50 |     public static int getFieldLineNumber(SootField field, SourceCodeMapper codeMapper) {
 51 |         int lineNumber = field.getJavaSourceStartLineNumber();
 52 |         if (lineNumber > 0) {
 53 |             return lineNumber;
 54 |         }
 55 | 
 56 |         String fieldName = field.getName();
 57 |         String fieldType = field.getType().toString();
 58 | 
 59 |         // parse simple type name without full package declaration (e.g. String instead of java.lang.String)
 60 |         String simpleFieldType = fieldType.substring(fieldType.lastIndexOf('.') + 1);
 61 |         // regex pattern, possibility of missed case here
 62 |         String fieldPattern = String.format(
 63 |                 ".*\\b(?:public|protected|private|static|final|transient|volatile|abstract|synchronized|native|strictfp|\\s)*\\b%s\\s*(?:<[^>]+>)?\\s+%s\\b.*;",
 64 |                 Pattern.quote(simpleFieldType),
 65 |                 Pattern.quote(fieldName)
 66 |         );
 67 |         Pattern pattern = Pattern.compile(fieldPattern);
 68 | 
 69 |         int totalLines = codeMapper.getTotalLines();
 70 |         for (int i = 1; i <= totalLines; i++) {
 71 |             String line = codeMapper.getCodeLine(i).trim();
 72 |             Matcher matcher = pattern.matcher(line);
 73 |             if (matcher.matches()) {
 74 |                 return i;
 75 |             }
 76 |         }
 77 | 
 78 |         return -1;
 79 |     }
 80 | 
 81 | 
 82 |     public static String getFieldDeclaration(SootField field, SourceCodeMapper codeMapper) {
 83 |         int lineNumber = getFieldLineNumber(field, codeMapper);
 84 |         if (lineNumber > 0) {
 85 |             return codeMapper.getCodeLine(lineNumber).trim();
 86 |         }
 87 |         return "";
 88 |     }
 89 | 
 90 | 
 91 |     public static int[] getMethodLineRange(SootMethod method, SourceCodeMapper srcCodeMapper) {
 92 |         int initialLine = method.getJavaSourceStartLineNumber();
 93 |         if (initialLine <= 0) {
 94 |             return new int[]{-1, -1};
 95 |         }
 96 | 
 97 |         String methodName = method.getName();
 98 |         String methodPattern = String.format(".*\\b%s\\b\\s*\\(.*", Pattern.quote(methodName));
 99 |         Pattern signatureStartPattern = Pattern.compile(methodPattern);
100 | 
101 |         int totalLines = srcCodeMapper.getTotalLines();
102 |         int startLine = initialLine;
103 |         int endLine = initialLine;
104 | 
105 |         for (int i = initialLine; i > 0; i--) {
106 |             String line = srcCodeMapper.getCodeLine(i).trim();
107 |             if (line.isEmpty()) continue;
108 | 
109 |             Matcher m = signatureStartPattern.matcher(line);
110 |             if (m.matches()) {
111 |                 startLine = i;
112 |                 break;
113 |             }
114 |         }
115 | 
116 |         boolean foundBrace = false;
117 |         for (int i = startLine; i <= totalLines; i++) {
118 |             String line = srcCodeMapper.getCodeLine(i).trim();
119 |             if (line.contains("{")) {
120 |                 endLine = i;
121 |                 break;
122 |             }
123 |             if (!foundBrace) {
124 |                 endLine = i;
125 |             }
126 |         }
127 | 
128 |         return new int[]{startLine, endLine};
129 |     }
130 | 
131 |     public static List<String> getParamTokensAndLines(
132 |             SootMethod method,
133 |             SourceCodeMapper mapper,
134 |             List<Integer> paramLinesOut
135 |     ) {
136 |         paramLinesOut.clear();
137 |         List<String> paramTokens = new ArrayList<>();
138 |         int[] range = getMethodLineRange(method, mapper);
139 |         if (range[0] < 0 || range[1] < 0) {
140 |             return paramTokens;
141 |         }
142 | 
143 |         int startLine = range[0];
144 |         int endLine   = range[1];
145 |         int totalLines = mapper.getTotalLines();
146 | 
147 |         // collect the lines for the signature block
148 |         StringBuilder sb = new StringBuilder();
149 |         for (int ln = startLine; ln <= Math.min(endLine, totalLines); ln++) {
150 |             sb.append(mapper.getCodeLine(ln)).append("\n");
151 |         }
152 |         String signatureText = sb.toString();
153 | 
154 |         int openParenIndex  = signatureText.indexOf('(');
155 |         int closeParenIndex = signatureText.lastIndexOf(')');
156 |         if (openParenIndex < 0 || closeParenIndex < 0 || closeParenIndex < openParenIndex) {
157 |             return paramTokens; // no parameters
158 |         }
159 | 
160 |         String paramBlock = signatureText.substring(openParenIndex + 1, closeParenIndex).trim();
161 |         if (paramBlock.isEmpty()) {
162 |             return paramTokens;
163 |         }
164 | 
165 |         // naive split on commas
166 |         String[] rawParams = paramBlock.split(",");
167 | 
168 |         // which line contains the param substr is assigned to be line num of that param
169 |         List<String> lines = new ArrayList<>();
170 |         for (int ln = startLine; ln <= endLine; ln++) {
171 |             lines.add(mapper.getCodeLine(ln));
172 |         }
173 | 
174 |         for (String raw : rawParams) {
175 |             String trimmed = raw.trim();
176 |             if (trimmed.isEmpty()) {
177 |                 continue;
178 |             }
179 |             int bestLine = startLine; // fallback
180 |             for (int offset = 0; offset < lines.size(); offset++) {
181 |                 if (lines.get(offset).contains(trimmed)) {
182 |                     bestLine = startLine + offset;
183 |                     break;
184 |                 }
185 |             }
186 |             paramTokens.add(trimmed);
187 |             paramLinesOut.add(bestLine);
188 |         }
189 |         return paramTokens;
190 |     }
191 | 
192 |     public static List<String> getMethodAnnotationsWithLines(
193 |             SootMethod method,
194 |             SourceCodeMapper codeMapper,
195 |             List<Integer> annoLinesOut
196 |     ) {
197 |         annoLinesOut.clear();
198 |         List<String> annoTokens = new ArrayList<>();
199 |         int[] range = getMethodLineRange(method, codeMapper);
200 |         if (range[0] <= 0 || range[1] <= 0) {
201 |             return annoTokens;
202 |         }
203 | 
204 |         int startLine = range[0];
205 |         // climb upward until finding lines not starting with '@' i.e. non annotations
206 |         int lineNum = startLine - 1;
207 |         while (lineNum > 0) {
208 |             String line = codeMapper.getCodeLine(lineNum).trim();
209 |             if (line.startsWith("@")) {
210 |                 String[] rawAnnos = line.split("\\s+@");
211 |                 for (int i = 0; i < rawAnnos.length; i++) {
212 |                     String annoRaw = (i == 0) ? rawAnnos[i] : "@" + rawAnnos[i];
213 |                     annoRaw = annoRaw.trim();
214 |                     if (!annoRaw.isEmpty()) {
215 |                         annoTokens.add(annoRaw);
216 |                         annoLinesOut.add(lineNum);
217 |                     }
218 |                 }
219 |                 lineNum--;
220 |             } else {
221 |                 break;
222 |             }
223 |         }
224 |         return annoTokens;
225 |     }
226 | 
227 |     public static List<Integer> getAnnotationsLineNumbers(SootMethod method, SourceCodeMapper codeMapper) {
228 |         List<Integer> annotationLines = new ArrayList<>();
229 |         int[] range = getMethodLineRange(method, codeMapper);
230 |         if (range[0] <= 0) {
231 |             return annotationLines;
232 |         }
233 |         int startLine = range[0];
234 | 
235 |         // crawl upwards until reaching an empty line or a line that does not start with an @ i.e. non annotations
236 |         int lineNum = startLine - 1;
237 |         while (lineNum > 0) {
238 |             String code = codeMapper.getCodeLine(lineNum).trim();
239 |             if (code.startsWith("@")) {
240 |                 annotationLines.add(lineNum);
241 |                 lineNum--;
242 |             } else if (code.isEmpty()) {
243 |                 break;
244 |             } else {
245 |                 break;
246 |             }
247 |         }
248 |         return annotationLines;
249 |     }
250 | 
251 |     public static int getLineNumber(Unit unit) {
252 |         if (unit == null) {
253 |             return -1;
254 |         }
255 |         LineNumberTag tag = (LineNumberTag) unit.getTag("LineNumberTag");
256 |         if (tag != null) {
257 |             return tag.getLineNumber();
258 |         }
259 |         return -1;
260 |     }
261 | }
262 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/matching/models/vf2/VF2State.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.matching.models.vf2;
  2 | 
  3 | import org.pdgdiff.matching.NodeFeasibility;
  4 | import org.pdgdiff.graph.GraphTraversal;
  5 | import org.pdgdiff.graph.PDG;
  6 | import soot.toolkits.graph.pdg.PDGNode;
  7 | 
  8 | import java.util.*;
  9 | 
 10 | /**
 11 |  * VF2State class to store the state of the VF2 algorithm. This class contains methods to store the current state
 12 |  * of the VF2 algorithm and perform operations on the state.
 13 |  */
 14 | class VF2State {
 15 |     private final PDG srcPdg;
 16 |     private final PDG dstPdg;
 17 |     private final Map<PDGNode, PDGNode> mapping;  // The current partial mapping
 18 | 
 19 |     private final Set<PDGNode> T1;  // Nodes in PDG1 that are in the mapping or adjacent to mapped nodes
 20 |     private final Set<PDGNode> T2;  // Same for PDG2
 21 | 
 22 |     private final Set<PDGNode> unmappedSrcNodes;  // Unmapped nodes in PDG1 (the source pdg)
 23 |     private final Set<PDGNode> unmappedDstNodes;  // Unmapped nodes in PDG2 (the dest pdg)
 24 | 
 25 |     public VF2State(PDG srcPdg, PDG dstPdg) {
 26 |         this.srcPdg = srcPdg;
 27 |         this.dstPdg = dstPdg;
 28 |         this.mapping = new LinkedHashMap<>();
 29 | 
 30 |         this.unmappedSrcNodes = new LinkedHashSet<>(GraphTraversal.collectNodesBFS(srcPdg));
 31 |         this.unmappedDstNodes = new LinkedHashSet<>(GraphTraversal.collectNodesBFS(dstPdg));
 32 | 
 33 |         this.T1 = new LinkedHashSet<>();
 34 |         this.T2 = new LinkedHashSet<>();
 35 |     }
 36 | 
 37 |     public boolean isComplete() {
 38 |         // once one of the graphs is fully matched (hence this is subgraph isomorphism)
 39 | //TODO: consider allowing this:
 40 | //        return mapping.size() >= Math.min(GraphTraversal.getNodeCount(srcPdg) * 0.5 , GraphTraversal.getNodeCount(dstPdg) * 0.5);
 41 |         return mapping.size() >= Math.min(GraphTraversal.getNodeCount(srcPdg), GraphTraversal.getNodeCount(dstPdg));
 42 |     }
 43 | 
 44 |     public Map<PDGNode, PDGNode> getMapping() {
 45 |         return mapping;
 46 |     }
 47 | 
 48 |     public List<CandidatePair> generateCandidates() {
 49 |         // TODO: If non determinism prevails, consider implementing a sort on these candidates
 50 |         // TODO: probably need to sort by id e.g. CFGNODE 1 sorta thing. should hopefully work,
 51 |         // If not implementing this here, possibly need to implement it in the matchRecursvie function.
 52 |         List<CandidatePair> candidates = new ArrayList<>();
 53 | 
 54 |         if (!T1.isEmpty() && !T2.isEmpty()) {
 55 |             // Pick nodes from T1 and T2
 56 |             PDGNode n1 = selectNode(T1);
 57 |             for (PDGNode n2 : T2) {
 58 |                 if (nodesAreCompatible(n1, n2)) {
 59 |                     candidates.add(new CandidatePair(n1, n2));
 60 |                 }
 61 |             }
 62 |         } else {
 63 |             // If T1 and T2 are empty, pick any unmapped nodes
 64 |             PDGNode n1 = selectNode(unmappedSrcNodes);
 65 |             for (PDGNode n2 : unmappedDstNodes) {
 66 |                 if (nodesAreCompatible(n1, n2)) {
 67 |                     candidates.add(new CandidatePair(n1, n2));
 68 |                 }
 69 |             }
 70 |         }
 71 | 
 72 |         return candidates;
 73 |     }
 74 | 
 75 |     public boolean isFeasible(CandidatePair pair) {
 76 |         // Implement feasibility checks:
 77 |         // - Syntactic feasibility: node attributes match
 78 |         // - Semantic feasibility: the mapping is consistent with the graph structure
 79 |         // TODO arguably there is no point in doing checkSyntacticFeasibility here,
 80 |         //  as this is already tested when generating the candidates.
 81 |         return checkSyntacticFeasibility(pair) && checkSemanticFeasibility(pair);
 82 |     }
 83 | 
 84 |     public void addPair(CandidatePair pair) {
 85 |         mapping.put(pair.n1, pair.n2);
 86 |         unmappedSrcNodes.remove(pair.n1);
 87 |         unmappedDstNodes.remove(pair.n2);
 88 | 
 89 |         // Update T1 and T2
 90 |         updateTerminalSets(pair.n1, pair.n2);
 91 |     }
 92 | 
 93 |     public void removePair(CandidatePair pair) {
 94 |         mapping.remove(pair.n1);
 95 |         unmappedSrcNodes.add(pair.n1);
 96 |         unmappedDstNodes.add(pair.n2);
 97 | 
 98 |         // Recalculate T1 and T2
 99 |         recalculateTerminalSets();
100 |     }
101 | 
102 |     // Helper methods...
103 | 
104 |     private boolean nodesAreCompatible(PDGNode n1, PDGNode n2) {
105 |         // check if the nodes are of the same semantic category (Stmt, Decl, etc.), todo should move this into semantic check section.
106 |         if (!NodeFeasibility.isSameNodeCategory(n1, n2)) {
107 |             return false;
108 |         }
109 |         // checks from teh following attributes; NORMAL, ENTRY, CONDHEADER, LOOPHEADER
110 |         if (!n1.getAttrib().equals(n2.getAttrib())) {
111 |             return false;
112 |         }
113 | 
114 |         return true;
115 |     }
116 | 
117 | 
118 |     private boolean checkSyntacticFeasibility(CandidatePair pair) {
119 |         // Ensure that the nodes can be mapped based on their attributes
120 |         return nodesAreCompatible(pair.n1, pair.n2);
121 |     }
122 | 
123 |     private boolean checkSemanticFeasibility(CandidatePair pair) {
124 |         // cmp successors in PDG1 vs mapped successors in PDG2
125 |         for (PDGNode succInSrcPdg : srcPdg.getSuccsOf(pair.n1)) {
126 |             PDGNode succMappedInDstPdg = this.getMapping().get(succInSrcPdg);
127 |             if (succMappedInDstPdg != null) {
128 |                 boolean dataEdge1 = srcPdg.hasDataEdge(pair.n1, succInSrcPdg);
129 |                 boolean dataEdge2 = dstPdg.hasDataEdge(pair.n2, succMappedInDstPdg);
130 |                 if (dataEdge1 != dataEdge2) {
131 |                     return false;
132 |                 }
133 | 
134 |                 boolean ctrlEdge1 = srcPdg.hasControlEdge(pair.n1, succInSrcPdg);
135 |                 boolean ctrlEdge2 = dstPdg.hasControlEdge(pair.n2, succMappedInDstPdg);
136 |                 if (ctrlEdge1 != ctrlEdge2) {
137 |                     return false;
138 |                 }
139 |             }
140 |         }
141 | 
142 |         // cmp predecessors in PDG1 vs. mapped predecessors in PDG2
143 |         for (PDGNode predInSrcPdg : srcPdg.getPredsOf(pair.n1)) {
144 |             PDGNode predMappedInDstPdg = this.getMapping().get(predInSrcPdg);
145 |             if (predMappedInDstPdg != null) {
146 |                 boolean dataEdge1 = srcPdg.hasDataEdge(predInSrcPdg, pair.n1);
147 |                 boolean dataEdge2 = dstPdg.hasDataEdge(predMappedInDstPdg, pair.n2);
148 |                 if (dataEdge1 != dataEdge2) {
149 |                     return false;
150 |                 }
151 | 
152 |                 boolean ctrlEdge1 = srcPdg.hasControlEdge(predInSrcPdg, pair.n1);
153 |                 boolean ctrlEdge2 = dstPdg.hasControlEdge(predMappedInDstPdg, pair.n2);
154 |                 if (ctrlEdge1 != ctrlEdge2) {
155 |                     return false;
156 |                 }
157 |             }
158 |         }
159 | 
160 |         // cross-check every existing mapping pair so that edges from (pair.n1->mappedN1) in PDG1 match edges from (pair.n2->mappedN2) in PDG2.
161 |         for (Map.Entry<PDGNode, PDGNode> entry : this.getMapping().entrySet()) {
162 |             PDGNode alreadyMappedN1 = entry.getKey();
163 |             PDGNode alreadyMappedN2 = entry.getValue();
164 | 
165 |             // Forward edges:
166 |             // if PDG1 has data/control edge from (pair.n1 -> alreadyMappedN1), then PDG2 must have the same edge type from (pair.n2 -> alreadyMappedN2)
167 |             boolean dataEdge1 = srcPdg.hasDataEdge(pair.n1, alreadyMappedN1);
168 |             boolean dataEdge2 = dstPdg.hasDataEdge(pair.n2, alreadyMappedN2);
169 |             if (dataEdge1 != dataEdge2) {
170 |                 return false;
171 |             }
172 |             boolean ctrlEdge1 = srcPdg.hasControlEdge(pair.n1, alreadyMappedN1);
173 |             boolean ctrlEdge2 = dstPdg.hasControlEdge(pair.n2, alreadyMappedN2);
174 |             if (ctrlEdge1 != ctrlEdge2) {
175 |                 return false;
176 |             }
177 | 
178 |             // Reverse edges:
179 |             // if PDG1 has data/control edge from (alreadyMappedN1 -> pair.n1), then PDG2 must have the same edge type from (alreadyMappedN2 -> pair.n2).
180 |             dataEdge1 = srcPdg.hasDataEdge(alreadyMappedN1, pair.n1);
181 |             dataEdge2 = dstPdg.hasDataEdge(alreadyMappedN2, pair.n2);
182 |             if (dataEdge1 != dataEdge2) {
183 |                 return false;
184 |             }
185 |             ctrlEdge1 = srcPdg.hasControlEdge(alreadyMappedN1, pair.n1);
186 |             ctrlEdge2 = dstPdg.hasControlEdge(alreadyMappedN2, pair.n2);
187 |             if (ctrlEdge1 != ctrlEdge2) {
188 |                 return false;
189 |             }
190 |         }
191 | 
192 |         return true;
193 | 
194 |     }
195 | 
196 |     private void updateTerminalSets(PDGNode n1, PDGNode n2) {
197 |         // Add neighbours of n1 to T1 if they are not mapped
198 |         for (PDGNode neighbour : n1.getDependents()) {
199 |             if (!mapping.containsKey(neighbour)) {
200 |                 T1.add(neighbour);
201 |             }
202 |         }
203 |         for (PDGNode neighbour : n1.getBackDependets()) {
204 |             if (!mapping.containsKey(neighbour)) {
205 |                 T1.add(neighbour);
206 |             }
207 |         }
208 | 
209 |         // Same for n2
210 |         for (PDGNode neighbour : n2.getDependents()) {
211 |             if (!mapping.containsValue(neighbour)) {
212 |                 T2.add(neighbour);
213 |             }
214 |         }
215 |         for (PDGNode neighbour : n2.getBackDependets()) {
216 |             if (!mapping.containsValue(neighbour)) {
217 |                 T2.add(neighbour);
218 |             }
219 |         }
220 | 
221 |         // Remove n1 and n2 from T1 and T2
222 |         T1.remove(n1);
223 |         T2.remove(n2);
224 |     }
225 | 
226 |     private void recalculateTerminalSets() {
227 |         T1.clear();
228 |         T2.clear();
229 |         for (PDGNode mappedNode1 : mapping.keySet()) {
230 |             for (PDGNode neighbour : mappedNode1.getDependents()) {
231 |                 if (!mapping.containsKey(neighbour)) {
232 |                     T1.add(neighbour);
233 |                 }
234 |             }
235 |             for (PDGNode neighbour : mappedNode1.getBackDependets()) {
236 |                 if (!mapping.containsKey(neighbour)) {
237 |                     T1.add(neighbour);
238 |                 }
239 |             }
240 |         }
241 |         for (PDGNode mappedNode2 : mapping.values()) {
242 |             for (PDGNode neighbour : mappedNode2.getDependents()) {
243 |                 if (!mapping.containsValue(neighbour)) {
244 |                     T2.add(neighbour);
245 |                 }
246 |             }
247 |             for (PDGNode neighbour : mappedNode2.getBackDependets()) {
248 |                 if (!mapping.containsValue(neighbour)) {
249 |                     T2.add(neighbour);
250 |                 }
251 |             }
252 |         }
253 |     }
254 | 
255 |     private PDGNode selectNode(Set<PDGNode> nodeSet) {
256 |         // TODO: implement a more sophisticated node selection strategy here
257 |         // ATM return any node from the set
258 |         return nodeSet.iterator().next();
259 |     }
260 | }
261 | 


--------------------------------------------------------------------------------
/benchmark/evaluation-scripts/analysis_line_num_granularity.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import ast
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import seaborn as sns
  6 | 
  7 | df = pd.read_csv("diff_results_gumtree_indiv_line_nums.csv")
  8 | 
  9 | src_del = 'Deleted Lines (Src) (SootOK)'
 10 | src_upd = 'Updated Lines (Src) (SootOk)'
 11 | src_move = 'Moved Lines (Src) (SootOk)'
 12 | 
 13 | dest_ins = 'Inserted Lines (Dst) (SootOK)'
 14 | dest_upd = 'Updated Lines (Dst) (SootOk)'
 15 | dest_move = 'Moved Lines (Dst) (SootOk)'
 16 | 
 17 | # helper function to safely convert a string representation of a list into an actual list
 18 | def parse_list(cell):
 19 |     if pd.isna(cell) or cell == "":
 20 |         return []
 21 |     try:
 22 |         return ast.literal_eval(cell) if isinstance(cell, str) else cell
 23 |     except Exception:
 24 |         return []
 25 | 
 26 | for col in [src_del, src_upd, src_move, dest_ins, dest_upd, dest_move]:
 27 |     df[col] = df[col].apply(parse_list)
 28 | 
 29 | def aggregate_sootok(row):
 30 |     # for gumtree, consider moved lines as well, for pdg we dont.
 31 |     if row["Approach"] == "GumTree":
 32 |         src = row[src_del] + row[src_upd] + row[src_move]
 33 |         dest = row[dest_ins] + row[dest_upd] + row[dest_move]
 34 |     else:
 35 |         src = row[src_del] + row[src_upd]
 36 |         dest = row[dest_ins] + row[dest_upd]
 37 |     return pd.Series({
 38 |         'Aggregated_Src_SootOk': sorted(set(src)),
 39 |         'Aggregated_Dest_SootOk': sorted(set(dest))
 40 |     })
 41 | 
 42 | df[['Aggregated_Src_SootOk', 'Aggregated_Dest_SootOk']] = df.apply(aggregate_sootok, axis=1)
 43 | 
 44 | results = []
 45 | 
 46 | for (file, commit), group in df.groupby(["Changed File", "Commit ID"]):
 47 |     baseline = group[group["Approach"] == "GumTree"]
 48 |     if baseline.empty:
 49 |         continue
 50 |     baseline_row = baseline.iloc[0]
 51 | 
 52 |     baseline_lines = set(baseline_row["Aggregated_Src_SootOk"]) | set(baseline_row["Aggregated_Dest_SootOk"])
 53 |     baseline_src = set(baseline_row["Aggregated_Src_SootOk"])
 54 |     baseline_dest = set(baseline_row["Aggregated_Dest_SootOk"])
 55 |     
 56 |     for idx, row in group.iterrows():
 57 |         if row["Approach"] == "GumTree":
 58 |             continue  # skip the baseline itself
 59 |         
 60 |         # union of lines for the current approach
 61 |         approach_lines = set(row["Aggregated_Src_SootOk"]) | set(row["Aggregated_Dest_SootOk"])
 62 |         approach_src = set(row["Aggregated_Src_SootOk"])
 63 |         approach_dest = set(row["Aggregated_Dest_SootOk"])
 64 | 
 65 |         misses_src = baseline_src - approach_src
 66 |         misses_dest = baseline_dest - approach_dest
 67 |         hallucinations_src = approach_src - baseline_src
 68 |         hallucinations_dest = approach_dest - baseline_dest
 69 | 
 70 |         # lines that GumTree reports but the approach does not: MISS
 71 |         misses = baseline_lines - approach_lines
 72 |         # lines that the approach reports but are not in GumTree: HALLUCINATIONS
 73 |         hallucinations = approach_lines - baseline_lines
 74 | 
 75 |         results.append({
 76 |             "Changed File": file,
 77 |             "Commit ID": commit,
 78 |             "Approach": row["Approach"],
 79 |             "GumTree_Count": len(baseline_lines),
 80 |             "Approach_Count": len(approach_lines),
 81 |             "Misses": len(misses),
 82 |             "Hallucinations": len(hallucinations),
 83 |             "Misses_Src": len(misses_src),
 84 |             "Misses_Dest": len(misses_dest),
 85 |             "Hallucinations_Src": len(hallucinations_src),
 86 |             "Hallucinations_Dest": len(hallucinations_dest),
 87 |         })
 88 | 
 89 | diff_df = pd.DataFrame(results)
 90 | 
 91 | hybrid_rows = []
 92 | for (file, commit), group in diff_df.groupby(["Changed File", "Commit ID"]):
 93 |     pdg_vf2 = group[group["Approach"] == "PDGdiff-VF2"] # vf2
 94 |     pdg_ged = group[group["Approach"] == "PDGdiff-GED"] # ged
 95 |     if not pdg_vf2.empty and not pdg_ged.empty:
 96 |         vf2_err = pdg_vf2.iloc[0]["Misses"] + pdg_vf2.iloc[0]["Hallucinations"]
 97 |         ged_err = pdg_ged.iloc[0]["Misses"] + pdg_ged.iloc[0]["Hallucinations"]
 98 |         chosen_row = pdg_vf2.iloc[0] if vf2_err <= ged_err else pdg_ged.iloc[0]
 99 |         chosen_row = chosen_row.copy()
100 |         chosen_row["Approach"] = "PDG-Hybrid"
101 |         hybrid_rows.append(chosen_row)
102 | 
103 | hybrid_df = pd.DataFrame(hybrid_rows)
104 | diff_df = pd.concat([diff_df, hybrid_df], ignore_index=True)
105 | 
106 | # agg summary statistics per approach
107 | summary = diff_df.groupby("Approach").agg({
108 |     "Misses": ["mean", "sum"],
109 |     "Hallucinations": ["mean", "sum"]
110 | })
111 | print("\nSummary statistics by approach:")
112 | print(summary)
113 | 
114 | print("\nOverall Misses/Hallucinations describe():")
115 | print(diff_df[["Misses", "Hallucinations"]].describe())
116 | 
117 | # sanity check only
118 | print("\nRows with negative Misses or Hallucinations (should be empty):")
119 | print(diff_df[(diff_df["Misses"] < 0) | (diff_df["Hallucinations"] < 0)])
120 | 
121 | 
122 | for approach in diff_df["Approach"].unique():
123 |     group = diff_df[diff_df["Approach"] == approach]
124 |     count = group.shape[0]
125 |     mean_miss = group["Misses"].mean()
126 |     mean_halluc = group["Hallucinations"].mean()
127 |     median_miss = group["Misses"].median()
128 |     median_halluc = group["Hallucinations"].median()
129 |     pct80_miss = group["Misses"].quantile(0.8)
130 |     pct80_halluc = group["Hallucinations"].quantile(0.8)
131 |     pct90_miss = group["Misses"].quantile(0.9)
132 |     pct90_halluc = group["Hallucinations"].quantile(0.9)
133 | 
134 |     # cmp Pearson correlation with GumTree counts. TODO: arguably this is a bit primitive as we only take counts
135 |     pearson_corr = group["Approach_Count"].corr(group["GumTree_Count"])
136 | 
137 |     print(f"-- {approach} --")
138 |     print(f"Count (rows)               : {count}")
139 |     print(f"Mean Abs Error (Misses)    : {mean_miss:.2f}")
140 |     print(f"Median Abs Error (Misses)  : {median_miss:.2f}")
141 |     print(f"80th pct Abs Error (Misses): {pct80_miss:.2f}")
142 |     print(f"90th pct Abs Error (Misses): {pct90_miss:.2f}")
143 |     print(f"Mean Abs Error (Halluc)    : {mean_halluc:.2f}")
144 |     print(f"Median Hallucinations      : {median_halluc:.2f}")
145 |     print(f"80th pct Hallucinations    : {pct80_halluc:.2f}")
146 |     print(f"90th pct Hallucinations    : {pct90_halluc:.2f}")
147 | 
148 |     print(f"Pearson correlation with GumTree: {pearson_corr:.3f}" if pd.notna(pearson_corr) else 
149 |           "Pearson correlation with GumTree: N/A (not enough variation or data points)")
150 |     print("")
151 | 
152 | 
153 | approaches = sorted(diff_df["Approach"].unique())
154 | 
155 | # prep data for boxplots/violin plots
156 | data_misses = [diff_df[diff_df["Approach"] == app]["Misses"] for app in approaches]
157 | data_halluc = [diff_df[diff_df["Approach"] == app]["Hallucinations"] for app in approaches]
158 | 
159 | plt.figure(figsize=(12, 6))
160 | 
161 | plt.subplot(1, 2, 1)
162 | sns.violinplot(data=diff_df, x="Approach", y="Misses", inner="quartile", hue="Approach", palette="coolwarm", cut=0)
163 | plt.title("Misses Distribution by Approach")
164 | plt.xticks(rotation=45)
165 | 
166 | plt.subplot(1, 2, 2)
167 | sns.violinplot(data=diff_df, x="Approach", y="Hallucinations", inner="quartile", hue="Approach", palette="coolwarm", cut=0)
168 | plt.title("Hallucinations Distribution by Approach")
169 | plt.xticks(rotation=45)
170 | 
171 | plt.tight_layout()
172 | plt.savefig("plots/violin.png", dpi=600, bbox_inches='tight')
173 | # plt.show()
174 | 
175 | percentiles = np.arange(0, 101)
176 | tick_step = 5
177 | 
178 | plt.figure(figsize=(12, 6))
179 | for approach in approaches:
180 |     data = diff_df[diff_df["Approach"] == approach]["Misses"]
181 |     perc_values = np.percentile(data, percentiles)
182 |     sns.lineplot(x=percentiles, y=perc_values, label=approach)
183 | plt.xlabel("Percentile")
184 | plt.ylabel("Misses")
185 | plt.title("Percentile Curve for Misses by Approach")
186 | plt.legend()
187 | ax = plt.gca()
188 | y_max = diff_df["Misses"].max()
189 | ax.set_yticks(np.arange(0, y_max + tick_step, tick_step))
190 | plt.grid(True)
191 | plt.savefig("plots/misses.png", dpi=600, bbox_inches='tight')
192 | 
193 | plt.figure(figsize=(12, 6))
194 | for approach in approaches:
195 |     data = diff_df[diff_df["Approach"] == approach]["Hallucinations"]
196 |     perc_values = np.percentile(data, percentiles)
197 |     sns.lineplot(x=percentiles, y=perc_values, label=approach)
198 | plt.xlabel("Percentile")
199 | plt.ylabel("Hallucinations")
200 | plt.title("Percentile Curve for Hallucinations by Approach")
201 | plt.legend()
202 | ax = plt.gca()
203 | y_max = diff_df["Hallucinations"].max()
204 | ax.set_yticks(np.arange(0, y_max + tick_step, tick_step))
205 | plt.grid(True)
206 | plt.savefig("plots/hallucinations.png", dpi=600, bbox_inches='tight')
207 | 
208 | summary_src_dest = diff_df.groupby("Approach").agg({
209 |     "Misses_Src": "mean",
210 |     "Misses_Dest": "mean",
211 |     "Hallucinations_Src": "mean",
212 |     "Hallucinations_Dest": "mean"
213 | }).reset_index()
214 | 
215 | print("\nAverage Values by Approach (Source vs Destination):")
216 | print(summary_src_dest)
217 | 
218 | approaches = summary_src_dest["Approach"]
219 | x = np.arange(len(approaches))
220 | width = 0.35
221 | 
222 | fig, ax = plt.subplots(figsize=(10,6))
223 | bars_src = ax.bar(x - width/2, summary_src_dest["Misses_Src"], width, label="Source Misses")
224 | bars_dest = ax.bar(x + width/2, summary_src_dest["Misses_Dest"], width, label="Destination Misses")
225 | 
226 | ax.set_ylabel("Average Misses (lines)")
227 | ax.set_title("Average Misses by Approach: Source vs Destination")
228 | ax.set_xticks(x)
229 | ax.set_xticklabels(approaches)
230 | ax.legend()
231 | plt.tight_layout()
232 | plt.savefig("plots/avg_misses.png", dpi=600, bbox_inches='tight')
233 | 
234 | fig, ax = plt.subplots(figsize=(10,6))
235 | bars_src = ax.bar(x - width/2, summary_src_dest["Hallucinations_Src"], width, label="Source Hallucinations")
236 | bars_dest = ax.bar(x + width/2, summary_src_dest["Hallucinations_Dest"], width, label="Destination Hallucinations")
237 | 
238 | ax.set_ylabel("Average Hallucinations (lines)")
239 | ax.set_title("Average Hallucinations by Approach: Source vs Destination")
240 | ax.set_xticks(x)
241 | ax.set_xticklabels(approaches)
242 | ax.legend()
243 | plt.tight_layout()
244 | plt.savefig("plots/avg_hallucinations.png", dpi=600, bbox_inches='tight')
245 | 
246 | all_op_cols = [src_del, dest_ins, src_upd, dest_upd, src_move, dest_move]
247 | all_op_labels = ["Deleted (Src)", "Inserted (Dst)", "Updated (Src)", "Updated (Dst)", "Moved (Src)", "Moved (Dst)"]
248 | 
249 | # for pdg-based, exclude moves
250 | non_move_op_cols = [src_del, dest_ins, src_upd, dest_upd]
251 | non_move_op_labels = ["Deleted (Src)", "Inserted (Dst)", "Updated (Src)", "Updated (Dst)"]
252 | 
253 | def count_lines(series):
254 |     return series.apply(lambda x: len(x) if isinstance(x, list) else 0).sum()
255 | 
256 | op_stats = []
257 | for approach, group in df.groupby("Approach"):
258 |     if approach in ["PDGdiff-GED", "PDGdiff-VF2"]:
259 |         op_sum = {}
260 |         for col, label in zip(non_move_op_cols, non_move_op_labels):
261 |             op_sum[label] = count_lines(group[col])
262 |         op_sum["Moved (Src)"] = 0
263 |         op_sum["Moved (Dst)"] = 0
264 |         total_lines = sum(op_sum.values())
265 |         percentages = {label: (op_sum[label] / total_lines) * 100 if total_lines > 0 else 0 for label in all_op_labels}
266 |     else:
267 |         op_sum = {}
268 |         for col, label in zip(all_op_cols, all_op_labels):
269 |             op_sum[label] = count_lines(group[col])
270 |         total_lines = sum(op_sum.values())
271 |         percentages = {label: (op_sum[label] / total_lines) * 100 if total_lines > 0 else 0 for label in all_op_labels}
272 | 
273 |     record = {"Approach": approach, "Total_Lines": total_lines}
274 |     record.update(percentages)
275 |     op_stats.append(record)
276 | 
277 | op_stats_df = pd.DataFrame(op_stats).set_index("Approach").sort_index()
278 | 
279 | print("\n--- Operation Type Percentages by Approach (SootOK columns, excluding moves for GED/VF2) ---")
280 | print(op_stats_df)
281 | 
282 | plt.figure(figsize=(10,6))
283 | approaches = op_stats_df.index.tolist()
284 | x = np.arange(len(approaches))
285 | bottom = np.zeros(len(approaches))
286 | 
287 | for label in all_op_labels:
288 |     perc = op_stats_df[label].values
289 |     plt.bar(x, perc, bottom=bottom, label=label)
290 |     bottom += perc
291 | 
292 | plt.xticks(x, approaches, rotation=45, ha='right')
293 | plt.ylabel("Percentage (%)")
294 | plt.title("Operation Types as Percentage of Total Changed Lines (excluding lines beyond the scope of Soot)")
295 | plt.legend()
296 | plt.tight_layout()
297 | plt.savefig("plots/operation_types_percentage_stacked.png", dpi=600, bbox_inches='tight')
298 | # plt.show()
299 | 
300 | 
301 | 
302 | 
303 | operation_cols = [src_del, dest_ins, src_upd, dest_upd]
304 | operation_labels = ["Deleted (Src)", "Inserted (Dst)", "Updated (Src)", "Updated (Dst)"]
305 | 
306 | # hlper: count total lines in a column (each cell is a list)
307 | def count_lines(series):
308 |     return series.apply(lambda x: len(x) if isinstance(x, list) else 0).sum()
309 | 
310 | # calc the total count of each operation type per approach
311 | op_summary = df.groupby("Approach").apply(
312 |     lambda group: pd.Series({
313 |         label: count_lines(group[col])
314 |         for label, col in zip(operation_labels, operation_cols)
315 |     })
316 | )
317 | 
318 | op_totals = op_summary.sum(axis=1)
319 | op_percentages = op_summary.div(op_totals, axis=0) * 100
320 | 
321 | fig, ax = plt.subplots(figsize=(10, 6))
322 | approaches = op_percentages.index.tolist()
323 | x = np.arange(len(approaches))
324 | bottom = np.zeros(len(approaches))
325 | 
326 | for label in operation_labels:
327 |     percentages = op_percentages[label].values
328 |     ax.bar(x, percentages, bottom=bottom, label=label)
329 |     bottom += percentages
330 | 
331 | ax.set_xlabel("Approach")
332 | ax.set_ylabel("Percentage (%)")
333 | ax.set_title("Operation Types as Percentage of Total Changed Lines (Excluding Moves)")
334 | ax.set_xticks(x)
335 | ax.set_xticklabels(approaches, rotation=45)
336 | ax.legend(title="Operation Type")
337 | 
338 | plt.tight_layout()
339 | plt.savefig("plots/percentage_operations_no_moves_all.png", dpi=600, bbox_inches='tight')
340 | plt.show()


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/SignatureDiffGenerator.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.edit;
  2 | 
  3 | import org.pdgdiff.edit.model.*;
  4 | import org.pdgdiff.matching.models.heuristic.JaroWinklerSimilarity;
  5 | import org.pdgdiff.util.CodeAnalysisUtils;
  6 | import org.pdgdiff.util.SourceCodeMapper;
  7 | import soot.Modifier;
  8 | import soot.SootClass;
  9 | import soot.SootMethod;
 10 | 
 11 | import java.io.IOException;
 12 | import java.util.*;
 13 | 
 14 | public class SignatureDiffGenerator {
 15 | 
 16 |     public static class ParsedSignature {
 17 |         Set<String> modifiers;
 18 |         String returnType;
 19 |         String methodName;
 20 |         List<String> paramTokens;
 21 |         List<String> annotations;
 22 |         List <String> thrownExceptions;
 23 | 
 24 |         ParsedSignature(Set<String> modifiers, String returnType, String methodName, List<String> paramTokens, List<String> annotations, List<String> thrownExceptions) {
 25 |             this.modifiers = modifiers;
 26 |             this.returnType = returnType;
 27 |             this.methodName = methodName;
 28 |             this.paramTokens = paramTokens;
 29 |             this.annotations = annotations;
 30 |             this.thrownExceptions = thrownExceptions;
 31 |         }
 32 |     }
 33 | 
 34 |     public static ParsedSignature parseMethodSignature(SootMethod method, SourceCodeMapper mapper) throws IOException {
 35 |         // convert integer modifiers to a set of strings: e.g. {"public", "static"}
 36 |         Set<String> modifierSet = new HashSet<>();
 37 |         int mods = method.getModifiers();
 38 |         String modsString = Modifier.toString(mods); // e.g. "public static final"
 39 |         if (!modsString.isEmpty()) {
 40 |             // split on whitespace to get indiv tokens
 41 |             modifierSet.addAll(Arrays.asList(modsString.split("\\s+")));
 42 |         }
 43 | 
 44 |         String retType = method.getReturnType() != null ? method.getReturnType().toString() : "";
 45 |         String name = method.getName();
 46 | 
 47 |         List<SootClass> exceptionClasses = method.getExceptions();
 48 |         List<String> thrownExceptions = new ArrayList<>();
 49 |         for (SootClass exception : exceptionClasses) {
 50 |             thrownExceptions.add(exception.getName());
 51 |         }
 52 | 
 53 |         // to be populated later, no soot native way to get all the info required afaik
 54 |         List<Integer> paramLines = new ArrayList<>();
 55 |         List<String> paramTokens = CodeAnalysisUtils.getParamTokensAndLines(method, mapper, paramLines);
 56 | 
 57 |         // Annotation tokens (e.g. "@Override") + line nums for reporting
 58 |         List<Integer> annoLines = new ArrayList<>();
 59 |         List<String> annotations = CodeAnalysisUtils.getMethodAnnotationsWithLines(method, mapper, annoLines);
 60 | 
 61 |         return new ParsedSignature(modifierSet, retType, name, paramTokens, annotations, thrownExceptions);
 62 |     }
 63 | 
 64 | 
 65 |     static List<EditOperation> compareSignatures(
 66 |             ParsedSignature oldSig, ParsedSignature newSig,
 67 |             SootMethod oldMethod, SootMethod newMethod,
 68 |             SourceCodeMapper oldMapper, SourceCodeMapper newMapper
 69 |     ) {
 70 |         List<EditOperation> ops = new ArrayList<>();
 71 | 
 72 |         // these are approx'd and could actually return slightly off numbers if hard to parse.
 73 |         int[] oldRange = CodeAnalysisUtils.getMethodLineRange(oldMethod, oldMapper);
 74 |         int[] newRange = CodeAnalysisUtils.getMethodLineRange(newMethod, newMapper);
 75 | 
 76 |         int oldLine = (oldRange[0] > 0) ? oldRange[0] : -1;
 77 |         int newLine = (newRange[0] > 0) ? newRange[0] : -1;
 78 | 
 79 |         // cmp modifiers todo test this, not sure how useful this is
 80 |         Set<String> removedModifiers = new HashSet<>(oldSig.modifiers);
 81 |         removedModifiers.removeAll(newSig.modifiers);
 82 | 
 83 |         Set<String> addedModifiers = new HashSet<>(newSig.modifiers);
 84 |         addedModifiers.removeAll(oldSig.modifiers);
 85 | 
 86 |         for (String mod : removedModifiers) {
 87 |             ops.add(new Delete(
 88 |                     null, oldLine,
 89 |                     "Removed modifier: " + mod
 90 |             ));
 91 |         }
 92 |         for (String mod : addedModifiers) {
 93 |             ops.add(new Insert(
 94 |                     null, newLine,
 95 |                     "Added modifier: " + mod
 96 |             ));
 97 |         }
 98 | 
 99 |         // cmp return type
100 |         if (!oldSig.returnType.equals(newSig.returnType)) {
101 |             SyntaxDifference diff = new SyntaxDifference(
102 |                     "Return type changed from " + oldSig.returnType + " to " + newSig.returnType
103 |             );
104 |             ops.add(
105 |                     new Update(null, oldLine, newLine,
106 |                             oldSig.returnType, newSig.returnType, diff)
107 |             );
108 |         }
109 | 
110 |         // cmp method name
111 |         if (!oldSig.methodName.equals(newSig.methodName)) {
112 |             SyntaxDifference diff = new SyntaxDifference(
113 |                     "Method name changed from " + oldSig.methodName + " to " + newSig.methodName
114 |             );
115 |             ops.add(
116 |                     new Update(null, oldLine, newLine,
117 |                             oldSig.methodName, newSig.methodName, diff)
118 |             );
119 |         }
120 | 
121 |         List<Integer> oldParamLines = new ArrayList<>();
122 |         List<String> oldParamTokens = CodeAnalysisUtils.getParamTokensAndLines(oldMethod, oldMapper, oldParamLines);
123 | 
124 |         List<Integer> newParamLines = new ArrayList<>();
125 |         List<String> newParamTokens = CodeAnalysisUtils.getParamTokensAndLines(newMethod, newMapper, newParamLines);;
126 | 
127 | 
128 |         ops.addAll(compareStringListsDP(oldParamTokens, newParamTokens,
129 |                 oldParamLines, newParamLines,
130 |                 "Parameter changed"));
131 | //
132 | //        if (oldParamLines.size() == 1 && newParamLines.size() == 1) {
133 | //            // TODO  : avoid accidently marking a inserted param as a insert to the entire line, if the param changed adn multiple params exist on the same li
134 | //                      This is debatable, if i mark just one side as an insert it will be more equatable with gumtree. However, I do think its less useful as a tool. hard to know.
135 | //            if (!oldSig.paramTypes.equals(newSig.paramTypes)) {
136 | //                SyntaxDifference diff = new SyntaxDifference("Parameter list changed");
137 | //                ops.add(
138 | //                        new Update(null, oldParamLines.get(0), newParamLines.get(0),
139 | //                                oldMapper.getCodeLine(oldParamLines.get(0)),newMapper.getCodeLine(newParamLines.get(0)), diff)
140 | //                );
141 | //            }
142 | //        } else {
143 | //            // handle multi line parameters;
144 | //            ops.addAll(
145 | //                    compareStringListsDP(oldSig.paramTypes, newSig.paramTypes, oldParamLines, newParamLines)
146 | //            );
147 | //        }
148 | 
149 | 
150 | //        List<Integer> oldAnnotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(oldMethod, oldMapper);
151 | //        List<Integer> newAnnotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(newMethod, newMapper);
152 | //
153 | //        // NB this is not accounting for field annotations. todo fix
154 | //        // overwrite annotations using line numbers, unfortunately soot does not provide a way to get annotations
155 | //
156 | //        oldSig.annotations = new ArrayList<>();
157 | //        newSig.annotations = new ArrayList<>();
158 | //        for (int i = 0; i < oldAnnotationLines.size(); i++) {
159 | //            oldSig.annotations.add(oldMapper.getCodeLine(oldAnnotationLines.get(i)));
160 | //        }
161 | //        for (int i = 0; i < newAnnotationLines.size(); i++) {
162 | //            newSig.annotations.add(newMapper.getCodeLine(newAnnotationLines.get(i)));
163 | //        }
164 | //
165 | //
166 | //        if (oldSig.annotations.size() == 1 && newSig.annotations.size() == 1) {
167 | //            if (!Objects.equals(oldSig.annotations.get(0), newSig.annotations.get(0))) {
168 | //                SyntaxDifference diff = new SyntaxDifference("Annotation changed");
169 | //                ops.add(
170 | //                        new Update(null, oldAnnotationLines.get(0), newAnnotationLines.get(0),
171 | //                                oldSig.annotations.get(0), newSig.annotations.get(0), diff)
172 | //                );
173 | //            }
174 | //        } else {
175 | //            ops.addAll(
176 | //                    compareStringListsDP(oldSig.annotations, newSig.annotations, oldAnnotationLines, newAnnotationLines)
177 | //            );
178 | //        }
179 | 
180 |         List<Integer> oldAnnoLines = new ArrayList<>();
181 |         List<String> oldAnnoTokens = CodeAnalysisUtils.getMethodAnnotationsWithLines(oldMethod, oldMapper, oldAnnoLines);
182 | 
183 | 
184 |         List<Integer> newAnnoLines = new ArrayList<>();
185 |         List<String> newAnnoTokens = CodeAnalysisUtils.getMethodAnnotationsWithLines(newMethod, newMapper, newAnnoLines);
186 | 
187 |         ops.addAll(compareStringListsDP(oldAnnoTokens, newAnnoTokens,
188 |                 oldAnnoLines, newAnnoLines,
189 |                 "Annotation changed"));
190 | 
191 | 
192 |         List<String> oldExceptions = oldSig.thrownExceptions;
193 |         List<String> newExceptions = newSig.thrownExceptions;
194 | 
195 |         // following are being classified as deletes in order to remain more consitent with gumtree, but perhaps
196 |         // they should be updates (esp based on how other bits of this impl are treating these sorta changes)
197 | 
198 |         Set<String> removedExceptions = new HashSet<>(oldExceptions);
199 |         removedExceptions.removeAll(newExceptions);
200 |         // todo: again should this be deletes or updates...
201 |         for (String ex : removedExceptions) {
202 |             ops.add(new Delete(null, oldLine, "Removed exception from func sig: " + ex));
203 |         }
204 | 
205 |         Set<String> addedExceptions = new HashSet<>(newExceptions);
206 |         addedExceptions.removeAll(oldExceptions);
207 |         // todo: again should this be inserts or updates...
208 |         for (String ex : addedExceptions) {
209 |             ops.add(new Insert(null, newLine, "Added exception from func sig: " + ex));
210 |         }
211 | 
212 |         return ops;
213 |     }
214 | 
215 |     // left to right dynamic programming approach to try and match up parameters (or annos), basically a edit distance optimiation
216 |     // nb soot gives parameter types, not names
217 | 
218 | 
219 |     // generic DP function used for params and for annotations
220 |     private static List<EditOperation> compareStringListsDP(
221 |             List<String> oldEntries,  // old parameter types or old annotation lines
222 |             List<String> newEntries,  // new parameter types or new annotation lines
223 |             List<Integer> oldEntriesLines,  // old parameter line numbers or old annotation line numbers
224 |             List<Integer> newEntriesLines,   // new parameter line numbers or new annotation line numbers
225 |             String label
226 |     ) {
227 |         List<EditOperation> ops = new ArrayList<>();
228 |         int m = oldEntries.size();
229 |         int n = newEntries.size();
230 | 
231 |         double[][] dp = new double[m + 1][n + 1];
232 |         String[][] opsTable = new String[m + 1][n + 1];
233 | 
234 |         // init DP table
235 |         for (int i = 0; i <= m; i++) {
236 |             dp[i][0] = i;
237 |             opsTable[i][0] = "DELETE";
238 |         }
239 |         for (int j = 0; j <= n; j++) {
240 |             dp[0][j] = j;
241 |             opsTable[0][j] = "INSERT";
242 |         }
243 |         opsTable[0][0] = "NO_CHANGE";
244 | 
245 |         // fill DP
246 |         for (int i = 1; i <= m; i++) {
247 |             for (int j = 1; j <= n; j++) {
248 |                 String oldStr = oldEntries.get(i - 1);
249 |                 String newStr = newEntries.get(j - 1);
250 | 
251 |                 if (oldStr.equals(newStr)) {
252 |                     dp[i][j] = dp[i - 1][j - 1];
253 |                     opsTable[i][j] = "NO_CHANGE";
254 |                 } else {
255 |                     double deleteCost = dp[i - 1][j] + 1;
256 |                     double insertCost = dp[i][j - 1] + 1;
257 | 
258 |                     double similarity = JaroWinklerSimilarity.jaroSimilarity(oldStr, newStr);
259 |                     double updateCost = dp[i - 1][j - 1] + (1.0 - similarity);
260 | 
261 |                     if (deleteCost <= insertCost && deleteCost <= updateCost) {
262 |                         dp[i][j] = deleteCost;
263 |                         opsTable[i][j] = "DELETE";
264 |                     } else if (insertCost <= deleteCost && insertCost <= updateCost) {
265 |                         dp[i][j] = insertCost;
266 |                         opsTable[i][j] = "INSERT";
267 |                     } else {
268 |                         dp[i][j] = updateCost;
269 |                         opsTable[i][j] = "UPDATE";
270 |                     }
271 |                 }
272 |             }
273 |         }
274 | 
275 |         // backtrack
276 |         int i = m, j = n;
277 |         while (i > 0 || j > 0) {
278 |             String operation = opsTable[i][j];
279 |             if ("NO_CHANGE".equals(operation)) {
280 |                 i--;
281 |                 j--;
282 |             } else if ("DELETE".equals(operation)) {
283 |                 int oldLineNum = oldEntriesLines.get(i - 1);
284 |                 String entry = oldEntries.get(i - 1);
285 |                 ops.add(new Delete(null, oldLineNum, entry));
286 |                 i--;
287 |             } else if ("INSERT".equals(operation)) {
288 |                 int newLineNum = newEntriesLines.get(j - 1);
289 |                 String entry = newEntries.get(j - 1);
290 |                 ops.add(new Insert(null, newLineNum, entry));
291 |                 j--;
292 |             } else if ("UPDATE".equals(operation)) {
293 |                 int oldLineNum = oldEntriesLines.get(i - 1);
294 |                 int newLineNum = newEntriesLines.get(j - 1);
295 |                 String oldEntry = oldEntries.get(i - 1);
296 |                 String newEntry = newEntries.get(j - 1);
297 | 
298 |                 SyntaxDifference diff = new SyntaxDifference(
299 |                         label + " from \"" + oldEntry + "\" to \"" + newEntry + "\""
300 |                 );
301 |                 ops.add(new Update(null, oldLineNum, newLineNum, oldEntry, newEntry, diff));
302 |                 i--;
303 |                 j--;
304 |             }
305 |         }
306 | 
307 |         Collections.reverse(ops);
308 |         return ops;
309 |     }
310 | }
311 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/export/DiffGraphExporter.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.export;
  2 | 
  3 | import org.pdgdiff.graph.GraphGenerator;
  4 | import org.pdgdiff.graph.PDG;
  5 | import org.pdgdiff.matching.GraphMapping;
  6 | import org.pdgdiff.matching.NodeMapping;
  7 | import soot.Unit;
  8 | import soot.tagkit.LineNumberTag;
  9 | import soot.toolkits.graph.pdg.PDGNode;
 10 | 
 11 | import java.io.File;
 12 | import java.io.FileWriter;
 13 | import java.io.IOException;
 14 | import java.io.PrintWriter;
 15 | import java.util.*;
 16 | import java.util.stream.Collectors;
 17 | 
 18 | public class DiffGraphExporter {
 19 | 
 20 |     /**
 21 |      * This generates a singular 'delta' dot file, i.e. a way of representing the changes that have happeend on one graph and
 22 |      * taken it to another graph
 23 |      */
 24 |     public static void exportDiffPDGs(
 25 |             GraphMapping graphMapping,
 26 |             List<PDG> pdgListSrc,
 27 |             List<PDG> pdgListDst,
 28 |             String outputDir
 29 |     ) {
 30 |         File outDir = new File(outputDir);
 31 |         if (!outDir.exists()) {
 32 |             outDir.mkdirs();
 33 |         }
 34 | 
 35 |         // one pdg diff's dot file for each matched pair
 36 |         Map<PDG, PDG> matchedPairs = graphMapping.getGraphMapping();
 37 |         for (Map.Entry<PDG, PDG> entry : matchedPairs.entrySet()) {
 38 |             PDG srcPDG = entry.getKey();
 39 |             PDG dstPDG = entry.getValue();
 40 |             NodeMapping nodeMapping = graphMapping.getNodeMapping(srcPDG);
 41 | 
 42 |             String srcMethod = (srcPDG.getCFG() != null)
 43 |                     ? srcPDG.getCFG().getBody().getMethod().getName()
 44 |                     : "UnknownSrcMethod";
 45 |             String dstMethod = (dstPDG.getCFG() != null)
 46 |                     ? dstPDG.getCFG().getBody().getMethod().getName()
 47 |                     : "UnknownDstMethod";
 48 | 
 49 |             String dotFileName = "diff_" + srcMethod + "_TO_" + dstMethod + ".dot";
 50 |             File dotFile = new File(outDir, dotFileName);
 51 | 
 52 |             exportSingleDiffPDG(srcPDG, dstPDG, nodeMapping, dotFile);
 53 |         }
 54 | 
 55 |         // identify unmatched PDGs in source vs. destination
 56 |         List<PDG> unmatchedInSrc = pdgListSrc.stream()
 57 |                 .filter(pdg -> !matchedPairs.containsKey(pdg))
 58 |                 .collect(Collectors.toList());
 59 |         List<PDG> unmatchedInDst = pdgListDst.stream()
 60 |                 .filter(pdg -> !matchedPairs.containsValue(pdg))
 61 |                 .collect(Collectors.toList());
 62 | 
 63 | 
 64 |         // NB: if no match, i.e. a graph is inserted or deleted, we can't show a diff and no delta will be made.
 65 |     }
 66 | 
 67 |     /**
 68 |      * exprts a single .dot file showing the diff between one src PDG and one dst PDG
 69 |      *
 70 |      * This aims to follow similar logic to Editscriptgeneration
 71 |      */
 72 |     private static void exportSingleDiffPDG(
 73 |             PDG srcPDG,
 74 |             PDG dstPDG,
 75 |             NodeMapping nodeMapping,
 76 |             File outputDotFile
 77 |     ) {
 78 |         try (PrintWriter writer = new PrintWriter(new FileWriter(outputDotFile))) {
 79 |             writer.println("digraph PDG_DIFF {");
 80 |             writer.println("  rankdir=TB;");
 81 |             writer.println("  node [shape=box, style=filled, fontname=Arial];");
 82 |             writer.println("  edge [fontname=Arial];");
 83 | 
 84 |             Map<PDGNode, PDGNode> srcToDst = nodeMapping.getNodeMapping();
 85 |             Map<PDGNode, PDGNode> dstToSrc = nodeMapping.getReverseNodeMapping();
 86 | 
 87 |             Set<PDGNode> srcNodes = new HashSet<>();
 88 |             srcPDG.iterator().forEachRemaining(srcNodes::add);
 89 |             Set<PDGNode> dstNodes = new HashSet<>();
 90 |             dstPDG.iterator().forEachRemaining(dstNodes::add);
 91 | 
 92 |             // map to store node details (label and color) keyed by their dot id
 93 |             Map<String, NodeData> nodeDataMap = new HashMap<>();
 94 | 
 95 |             // process nodes from source PDG (matched or deleted nodes)
 96 |             for (PDGNode srcNode : srcNodes) {
 97 |                 PDGNode dstNode = srcToDst.get(srcNode);
 98 |                 String nodeId = getNodeId(srcNode, true);
 99 | 
100 |                 if (dstNode == null) {
101 |                     // node deleted in dst
102 |                     String label = removePrefix(srcNode.toString());
103 |                     String color = "#FFCCCC"; // red for deletion
104 |                     nodeDataMap.put(nodeId, new NodeData(createNodeLabel(label, srcNode), color));
105 |                 } else {
106 |                     // matched (poss either unchanged, moved, or updated)
107 |                     String label, color;
108 |                     if (Objects.equals(removePrefix(srcNode.toString()), removePrefix(dstNode.toString()))) {
109 |                         label = removePrefix(srcNode.toString());
110 |                         color = "lightgrey"; // unchanged
111 |                     } else {
112 |                         label = String.format("%s!NEWLINE!----!NEWLINE!%s",
113 |                                 removePrefix(srcNode.toString()),
114 |                                 removePrefix(dstNode.toString()));
115 |                         color = "#FFCC99"; // orange for update
116 |                     }
117 |                     nodeDataMap.put(nodeId, new NodeData(createNodeLabel(label, srcNode, dstNode), color));
118 |                 }
119 |             }
120 | 
121 |             // processing nodes added in destination
122 |             for (PDGNode dstNode : dstNodes) {
123 |                 if (!dstToSrc.containsKey(dstNode)) {
124 |                     String nodeId = getNodeId(dstNode, false);
125 |                     String label = removePrefix(dstNode.toString());
126 |                     String color = "#CCFFCC"; // green for added
127 |                     nodeDataMap.put(nodeId, new NodeData(createNodeLabel(label, dstNode), color));
128 |                 }
129 |             }
130 | 
131 |             // process edges and record dependency labels
132 |             Map<EdgeKey, Set<String>> edgeMap = new HashMap<>();
133 |             Set<String> connectedNodeIds = new HashSet<>();
134 | 
135 |             // process edges from the src PDG
136 |             for (PDGNode srcNode : srcNodes) {
137 |                 for (PDGNode succ : srcPDG.getSuccsOf(srcNode)) {
138 |                     String srcId = getMergedNodeId(srcNode, true, srcToDst);
139 |                     String tgtId = getMergedNodeId(succ, true, srcToDst);
140 |                     EdgeKey key = new EdgeKey(srcId, tgtId);
141 | 
142 |                     // get dependency types for the edge in srcPDG.
143 |                     List<GraphGenerator.DependencyTypes> depTypes = srcPDG.getEdgeLabels(srcNode, succ);
144 |                     String depLabel = depTypes.stream()
145 |                             .map(DiffGraphExporter::mapDependencyType)
146 |                             .collect(Collectors.joining(","));
147 |                     edgeMap.computeIfAbsent(key, k -> new HashSet<>()).add("src:" + depLabel);
148 | 
149 |                     connectedNodeIds.add(srcId);
150 |                     connectedNodeIds.add(tgtId);
151 |                 }
152 |             }
153 | 
154 |             // process edges from the dest PDG
155 |             for (PDGNode dstNode : dstNodes) {
156 |                 for (PDGNode succ : dstPDG.getSuccsOf(dstNode)) {
157 |                     String srcId = getMergedNodeId(dstNode, false, dstToSrc);
158 |                     String tgtId = getMergedNodeId(succ, false, dstToSrc);
159 |                     EdgeKey key = new EdgeKey(srcId, tgtId);
160 | 
161 |                     List<GraphGenerator.DependencyTypes> depTypes = dstPDG.getEdgeLabels(dstNode, succ);
162 |                     String depLabel = depTypes.stream()
163 |                             .map(DiffGraphExporter::mapDependencyType)
164 |                             .collect(Collectors.joining(","));
165 |                     edgeMap.computeIfAbsent(key, k -> new HashSet<>()).add("dst:" + depLabel);
166 | 
167 |                     connectedNodeIds.add(srcId);
168 |                     connectedNodeIds.add(tgtId);
169 |                 }
170 |             }
171 | 
172 |             for (String nodeId : connectedNodeIds) {
173 |                 NodeData data = nodeDataMap.get(nodeId);
174 |                 if (data != null) {
175 |                     writer.printf("  %s [label=%s, fillcolor=\"%s\"];%n",
176 |                             nodeId, data.label, data.color);
177 |                 }
178 |             }
179 | 
180 |             // write edges with colour and label
181 |             for (Map.Entry<EdgeKey, Set<String>> entry : edgeMap.entrySet()) {
182 |                 EdgeKey key = entry.getKey();
183 |                 Set<String> sources = entry.getValue();
184 |                 String color;
185 |                 if (sources.stream().anyMatch(s -> s.startsWith("src:"))
186 |                         && sources.stream().anyMatch(s -> s.startsWith("dst:"))) {
187 |                     color = "black";
188 |                 } else if (sources.stream().anyMatch(s -> s.startsWith("src:"))) {
189 |                     color = "red";
190 |                 } else {
191 |                     color = "green";
192 |                 }
193 |                 String edgeLabel = sources.stream()
194 |                         .map(s -> s.substring(4))
195 |                         .distinct()
196 |                         .collect(Collectors.joining("/"));
197 |                 writer.printf("  %s -> %s [color=%s, label=\"%s\"];%n",
198 |                         key.srcId, key.tgtId, color, edgeLabel);
199 |             }
200 | 
201 |             writer.println("}");
202 |             System.out.println("Created PDG diff: " + outputDotFile.getAbsolutePath());
203 |         } catch (IOException e) {
204 |             e.printStackTrace();
205 |         }
206 |     }
207 | 
208 |     // this is overloaded, depending on update or single-line number operation
209 |     private static String createNodeLabel(String originalLabel, PDGNode node) {
210 |         return createNodeLabel(originalLabel, node, null);
211 |     }
212 | 
213 | 
214 |     private static String createNodeLabel(String originalLabel, PDGNode node1, PDGNode node2) {
215 |         int lineNum = getNodeLineNumber(node1);
216 |         int lineNum2 = -1;
217 |         if (node2 != null) {
218 |             lineNum2 = getNodeLineNumber(node2);
219 |         }
220 |         String safeLabel = escape(originalLabel);
221 | 
222 |         StringBuilder sb = new StringBuilder();
223 |         sb.append("<");
224 |         sb.append("<b>").append(safeLabel).append("</b>");
225 |         if (lineNum != -1 && lineNum2 == -1) {
226 |             sb.append("<br/>")
227 |                     .append("<font point-size=\"10\" color=\"gray\">")
228 |                     .append("Line: ").append(lineNum)
229 |                     .append("</font>");
230 |         } else if(lineNum != -1) {
231 |             sb.append("<br/>")
232 |                     .append("<font point-size=\"10\" color=\"gray\">")
233 |                     .append("Line: ").append(lineNum)
234 |                     .append(" -&gt; ")
235 |                     .append("Line: ").append(lineNum2)
236 |                     .append("</font>");
237 |         }
238 | 
239 |         sb.append(">");
240 |         return sb.toString();
241 |     }
242 | 
243 |     // helper classes and methods
244 | 
245 |     private static int getNodeLineNumber(PDGNode node) {
246 |         if (node.getType() == PDGNode.Type.CFGNODE) {
247 |             Object underlying = node.getNode();
248 |             if (underlying instanceof Unit) {
249 |                 Unit unit = (Unit) underlying;
250 |                 LineNumberTag tag = (LineNumberTag) unit.getTag("LineNumberTag");
251 |                 if (tag != null) {
252 |                     return tag.getLineNumber();
253 |                 }
254 |             }
255 |         }
256 |         return -1;
257 |     }
258 | 
259 |     private static class NodeData {
260 |         String label;
261 |         String color;
262 |         NodeData(String label, String color) {
263 |             this.label = label;
264 |             this.color = color;
265 |         }
266 |     }
267 | 
268 |     private static String mapDependencyType(GraphGenerator.DependencyTypes depType) {
269 |         if (depType == GraphGenerator.DependencyTypes.CONTROL_DEPENDENCY) {
270 |             return "CTRL_DEP";
271 |         } else if (depType == GraphGenerator.DependencyTypes.DATA_DEPENDENCY) {
272 |             return "DATA_DEP";
273 |         } else {
274 |             return "UNKNOWN";
275 |         }
276 |     }
277 | 
278 |     // generates a node ID
279 |     private static String getNodeId(PDGNode node, boolean isSrc) {
280 |         String prefix = isSrc ? "SRC_" : "DST_";
281 |         return prefix + System.identityHashCode(node);
282 |     }
283 | 
284 |     // generates a node ID for merged nodes
285 |     private static String getMergedNodeId(PDGNode node, boolean isSourceNode, Map<PDGNode, PDGNode> mapping) {
286 |         PDGNode mappedNode = mapping.get(node);
287 |         if (mappedNode != null) {
288 |             if (isSourceNode) {
289 |                 return getNodeId(node, true);
290 |             } else {
291 |                 return getNodeId(mappedNode, true);
292 |             }
293 |         } else {
294 |             return getNodeId(node, isSourceNode);
295 |         }
296 |     }
297 | 
298 |     // removes the prefix from a node label
299 |     private static String removePrefix(String label) {
300 |         String prefix = "Type: CFGNODE: ";
301 |         return label.startsWith(prefix) ? label.substring(prefix.length()) : label;
302 |     }
303 | 
304 |     // for dot formatting
305 |     private static String escape(String text) {
306 |         return text.replace("<", "&lt;")
307 |                 .replace(">", "&gt;")
308 |                 .replace("\"", "\\\"")
309 |                 .replace("!NEWLINE!", "<br/>");
310 | 
311 |     }
312 | 
313 |     private static class EdgeKey {
314 |         final String srcId;
315 |         final String tgtId;
316 | 
317 |         EdgeKey(String srcId, String tgtId) {
318 |             this.srcId = srcId;
319 |             this.tgtId = tgtId;
320 |         }
321 | 
322 |         // for comp
323 |         @Override
324 |         public boolean equals(Object o) {
325 |             if (this == o) return true;
326 |             if (o == null || getClass() != o.getClass()) return false;
327 |             EdgeKey edgeKey = (EdgeKey) o;
328 |             return Objects.equals(srcId, edgeKey.srcId) && Objects.equals(tgtId, edgeKey.tgtId);
329 |         }
330 | 
331 |         @Override
332 |         public int hashCode() {
333 |             return Objects.hash(srcId, tgtId);
334 |         }
335 |     }
336 | }
337 | 


--------------------------------------------------------------------------------
/src/main/java/org/pdgdiff/edit/EditScriptGenerator.java:
--------------------------------------------------------------------------------
  1 | package org.pdgdiff.edit;
  2 | 
  3 | import org.pdgdiff.edit.model.*;
  4 | import org.pdgdiff.graph.PDG;
  5 | import org.pdgdiff.matching.GraphMapping;
  6 | import org.pdgdiff.matching.NodeMapping;
  7 | import org.pdgdiff.util.CodeAnalysisUtils;
  8 | import org.pdgdiff.util.SourceCodeMapper;
  9 | import soot.SootMethod;
 10 | import soot.Unit;
 11 | import soot.toolkits.graph.pdg.PDGNode;
 12 | 
 13 | import java.io.IOException;
 14 | import java.util.*;
 15 | import java.util.stream.Collectors;
 16 | 
 17 | import org.pdgdiff.edit.SignatureDiffGenerator.ParsedSignature;
 18 | 
 19 | import static org.pdgdiff.edit.SignatureDiffGenerator.compareSignatures;
 20 | import static org.pdgdiff.edit.SignatureDiffGenerator.parseMethodSignature;
 21 | import static org.pdgdiff.graph.GraphTraversal.collectNodesBFS;
 22 | 
 23 | /**
 24 |  * Generates edit scripts based on PDG node mappings.
 25 |  */
 26 | public class EditScriptGenerator {
 27 | 
 28 |     public static List<EditOperation> generateEditScript(
 29 |             PDG srcPDG,
 30 |             PDG dstPDG,
 31 |             GraphMapping graphMapping,
 32 |             String srcSourceFilePath,
 33 |             String dstSourceFilePath,
 34 |             SootMethod srcMethod,
 35 |             SootMethod destMethod
 36 |     ) throws IOException {
 37 |         // using a set to prevent duplicates (order does not matter for now).
 38 |         Set<EditOperation> editScriptSet = new HashSet<>();
 39 | 
 40 |         SourceCodeMapper srcCodeMapper = new SourceCodeMapper(srcSourceFilePath);
 41 |         SourceCodeMapper dstCodeMapper = new SourceCodeMapper(dstSourceFilePath);
 42 | 
 43 |         NodeMapping nodeMapping = graphMapping.getNodeMapping(srcPDG);
 44 | 
 45 |         Map<PDGNode, PDGNode> mappings = nodeMapping.getNodeMapping();
 46 |         Set<PDGNode> srcNodesMapped = mappings.keySet();
 47 |         Set<PDGNode> dstNodesMapped = new HashSet<>(mappings.values());
 48 | 
 49 |         Set<PDGNode> visitedNodes = new HashSet<>();
 50 | 
 51 |         // process mapped nodes for updates or moves
 52 |         for (PDGNode srcNode : srcNodesMapped) {
 53 |             PDGNode dstNode = mappings.get(srcNode);
 54 | 
 55 |             if (!visitedNodes.contains(srcNode)) {
 56 |                 ComparisonResult compResult = nodesAreEqual(srcNode, dstNode, visitedNodes, srcCodeMapper, dstCodeMapper, nodeMapping);
 57 | 
 58 |                 if (!compResult.isEqual) {
 59 |                     if (compResult.isMove) {
 60 |                         int oldLineNumber = getNodeLineNumber(srcNode);
 61 |                         int newLineNumber = getNodeLineNumber(dstNode);
 62 |                         String codeSnippet = srcCodeMapper.getCodeLine(oldLineNumber);
 63 |                         editScriptSet.add(new Move(srcNode, oldLineNumber, newLineNumber, codeSnippet));
 64 |                     } else if (!compResult.syntaxDifferences.isEmpty()) {
 65 |                         for (SyntaxDifference syntaxDiff : compResult.syntaxDifferences) {
 66 |                             int oldLineNumber = syntaxDiff.getOldLineNumber();
 67 |                             int newLineNumber = syntaxDiff.getNewLineNumber();
 68 |                             String oldCodeSnippet = syntaxDiff.getOldCodeSnippet();
 69 |                             String newCodeSnippet = syntaxDiff.getNewCodeSnippet();
 70 |                             if (oldCodeSnippet.equals(newCodeSnippet)) {
 71 |                                 Move move  = new Move(srcNode, oldLineNumber, newLineNumber, oldCodeSnippet);
 72 |                                 editScriptSet.add(move);
 73 |                             } else {
 74 |                                 Update update = new Update(srcNode, oldLineNumber, newLineNumber, oldCodeSnippet, newCodeSnippet, syntaxDiff);
 75 |                                 editScriptSet.add(update);
 76 |                             }
 77 |                         }
 78 |                     }
 79 |                 }
 80 |             }
 81 |         }
 82 | 
 83 |         // handle deletions
 84 |         for (PDGNode srcNode : srcPDG) {
 85 |             if (!srcNodesMapped.contains(srcNode) && !visitedNodes.contains(srcNode)) {
 86 |                 int lineNumber = getNodeLineNumber(srcNode);
 87 |                 String codeSnippet = srcCodeMapper.getCodeLine(lineNumber);
 88 |                 editScriptSet.add(new Delete(srcNode, lineNumber, codeSnippet));
 89 |             }
 90 |         }
 91 | 
 92 |         // handle insertions
 93 |         for (PDGNode dstNode : dstPDG) {
 94 |             if (!dstNodesMapped.contains(dstNode) && !visitedNodes.contains(dstNode)) {
 95 |                 int lineNumber = getNodeLineNumber(dstNode);
 96 |                 String codeSnippet = dstCodeMapper.getCodeLine(lineNumber);
 97 |                 editScriptSet.add(new Insert(dstNode, lineNumber, codeSnippet));
 98 |             }
 99 |         }
100 | 
101 |         // structural signature diff, happens in every case to account for annotations changing even if signature itself doesnt.
102 |         ParsedSignature oldSig = parseMethodSignature(srcMethod, srcCodeMapper);
103 |         ParsedSignature newSig = parseMethodSignature(destMethod, dstCodeMapper);
104 | 
105 |         // misleading naming here , should probably rename to something including annotations
106 |         List<EditOperation> signatureDiffs =
107 |                 compareSignatures(oldSig, newSig, srcMethod, destMethod, srcCodeMapper, dstCodeMapper);
108 | 
109 |         editScriptSet.addAll(signatureDiffs);
110 | 
111 |         return new ArrayList<>(editScriptSet);
112 |     }
113 | 
114 | 
115 |     public static List<EditOperation> generateAddScript(PDG pdg, String sourceFilePath, SootMethod method) throws IOException {
116 |         SourceCodeMapper codeMapper = new SourceCodeMapper(sourceFilePath);
117 |         List<EditOperation> editOperations = new ArrayList<>();
118 | 
119 |         // insert the method signature lines (approx.), handling for annoataions
120 |         int[] methodRange = CodeAnalysisUtils.getMethodLineRange(method, codeMapper);
121 |         List<Integer> annotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(method, codeMapper);
122 |         if (!annotationLines.isEmpty() && Collections.min(annotationLines) < methodRange[0]) {
123 |             methodRange[0] = Collections.min(annotationLines);
124 |         }
125 |         if (methodRange[0] > 0 && methodRange[1] >= methodRange[0]) {
126 |             for (int i = methodRange[0]; i <= methodRange[1]; i++) {
127 |                 String signatureLine = codeMapper.getCodeLine(i);
128 |                 editOperations.add(new Insert(null, i, signatureLine));
129 |             }
130 |         }
131 | 
132 |         editOperations.addAll(
133 |                 collectNodesBFS(pdg).stream()
134 |                         .map(node -> {
135 |                             int lineNumber = getNodeLineNumber(node);
136 |                             String codeSnippet = codeMapper.getCodeLine(lineNumber);
137 |                             return new Insert(node, lineNumber, codeSnippet);
138 |                         })
139 |                         .collect(Collectors.toList())
140 |         );
141 | 
142 | 
143 |         // attempt to insert a trailing closing paren
144 |         int maxLine = editOperations.stream()
145 |                 .mapToInt(op -> {
146 |                     PDGNode node = op.getNode();
147 |                     return node == null ? -1 : getNodeLineNumber(node);
148 |                 })
149 |                 .max()
150 |                 .orElse(-1);
151 |         int nextLine = maxLine + 1;
152 |         if (nextLine <= codeMapper.getTotalLines()) {
153 |             String content = codeMapper.getCodeLine(nextLine).trim();
154 |             if (content.contains("}")) {
155 |                 editOperations.add(new Insert(null, nextLine, content));
156 |             }
157 |         }
158 | 
159 |         return editOperations;
160 |     }
161 | 
162 |     public static List<EditOperation> generateDeleteScript(PDG pdg, String sourceFilePath, SootMethod method) throws IOException {
163 |         SourceCodeMapper codeMapper = new SourceCodeMapper(sourceFilePath);
164 |         List<EditOperation> editOperations = new ArrayList<>();
165 | 
166 |         // delete the method signature lines (approx.)
167 |         int[] methodRange = CodeAnalysisUtils.getMethodLineRange(method, codeMapper);
168 |         List<Integer> annotationLines = CodeAnalysisUtils.getAnnotationsLineNumbers(method, codeMapper);
169 |         if (!annotationLines.isEmpty() && Collections.min(annotationLines) < methodRange[0]) {
170 |             methodRange[0] = Collections.min(annotationLines);
171 |         }
172 |         if (methodRange[0] > 0 && methodRange[1] >= methodRange[0]) {
173 |             for (int i = methodRange[0]; i <= methodRange[1]; i++) {
174 |                 String signatureLine = codeMapper.getCodeLine(i);
175 |                 editOperations.add(new Delete(null, i, signatureLine));
176 |             }
177 |         }
178 | 
179 |         editOperations.addAll(
180 |                 collectNodesBFS(pdg).stream()
181 |                         .map(node -> {
182 |                             int lineNumber = getNodeLineNumber(node);
183 |                             String codeSnippet = codeMapper.getCodeLine(lineNumber);
184 |                             return new Delete(node, lineNumber, codeSnippet);
185 |                         })
186 |                         .collect(Collectors.toList())
187 |         );
188 | 
189 | 
190 |         // attempt to delete a trailing closing paren
191 |         int maxLine = editOperations.stream()
192 |                 .mapToInt(op -> {
193 |                     PDGNode node = op.getNode();
194 |                     return node == null ? -1 : getNodeLineNumber(node);
195 |                 })
196 |                 .max()
197 |                 .orElse(-1);
198 |         int nextLine = maxLine + 1;
199 |         if (nextLine <= codeMapper.getTotalLines()) {
200 |             String content = codeMapper.getCodeLine(nextLine).trim();
201 |             if (content.contains("}")) {
202 |                 editOperations.add(new Delete(null, nextLine, content));
203 |             }
204 |         }
205 | 
206 |         return editOperations;
207 |     }
208 | 
209 | 
210 | 
211 |     private static class ComparisonResult {
212 |         public boolean isEqual;
213 |         public boolean isMove;
214 |         public Set<SyntaxDifference> syntaxDifferences;
215 | 
216 |         public ComparisonResult(boolean isEqual) {
217 |             this.isEqual = isEqual;
218 |             this.isMove = false;
219 |             this.syntaxDifferences = new HashSet<>();
220 |         }
221 | 
222 |         public ComparisonResult(boolean isEqual, boolean isMove, Set<SyntaxDifference> syntaxDifferences) {
223 |             this.isEqual = isEqual;
224 |             this.isMove = isMove;
225 |             this.syntaxDifferences = syntaxDifferences;
226 |         }
227 |     }
228 | 
229 | 
230 |     public static int getNodeLineNumber(PDGNode node) {
231 |         if (node.getType() == PDGNode.Type.CFGNODE) {
232 |             Unit headUnit = (Unit) node.getNode();
233 |             return getLineNumber(headUnit);
234 |         }
235 |         return -1;
236 |     }
237 | 
238 |     private static ComparisonResult nodesAreEqual(PDGNode n1, PDGNode n2, Set<PDGNode> visitedNodes,
239 |                                                   SourceCodeMapper srcCodeMapper, SourceCodeMapper dstCodeMapper,
240 |                                                   NodeMapping nodeMapping) {
241 |         if (visitedNodes.contains(n1)) {
242 |             return new ComparisonResult(true);
243 |         }
244 |         visitedNodes.add(n1);
245 |         visitedNodes.add(n2);
246 | 
247 |         if (!n1.getType().equals(n2.getType())) {
248 |             return new ComparisonResult(false);
249 |         }
250 | 
251 |         if (n1.getType() == PDGNode.Type.CFGNODE) {
252 |             return compareCFGNodes(n1, n2, srcCodeMapper, dstCodeMapper);
253 |         }
254 | 
255 |         return new ComparisonResult(true);
256 |     }
257 | 
258 |     private static ComparisonResult compareCFGNodes(PDGNode n1, PDGNode n2,
259 |                                                     SourceCodeMapper srcCodeMapper, SourceCodeMapper dstCodeMapper) {
260 |         Unit unit1 = (Unit) n1.getNode();
261 |         Unit unit2 = (Unit) n2.getNode();
262 | 
263 |         List<Unit> units1 = Collections.singletonList(unit1);
264 |         List<Unit> units2 = Collections.singletonList(unit2);
265 | 
266 |         Set<SyntaxDifference> differences = compareUnitLists(units1, units2, srcCodeMapper, dstCodeMapper);
267 | 
268 |         if (!differences.isEmpty()) {
269 |             return new ComparisonResult(false, false, differences);
270 |         } else {
271 |             // check for move operations based on line numbers
272 |             int lineNumber1 = getNodeLineNumber(n1);
273 |             int lineNumber2 = getNodeLineNumber(n2);
274 |             if (lineNumber1 != lineNumber2 && lineNumber1 != -1 && lineNumber2 != -1) {
275 |                 return new ComparisonResult(false, true, differences);
276 |             }
277 |         }
278 | 
279 |         return new ComparisonResult(true);
280 |     }
281 | 
282 |     private static Set<SyntaxDifference> compareUnitLists(List<Unit> units1, List<Unit> units2,
283 |                                                           SourceCodeMapper srcCodeMapper, SourceCodeMapper dstCodeMapper) {
284 |         Set<SyntaxDifference> differences = new HashSet<>();
285 | 
286 |         int i = 0, j = 0;
287 |         while (i < units1.size() && j < units2.size()) {
288 |             Unit unit1 = units1.get(i);
289 |             Unit unit2 = units2.get(j);
290 | 
291 |             if (unitsAreEqual(unit1, unit2)) {
292 |                 i++;
293 |                 j++;
294 |             } else {
295 |                 SyntaxDifference diff = new SyntaxDifference(unit1, unit2, srcCodeMapper, dstCodeMapper);
296 |                 differences.add(diff);
297 |                 i++;
298 |                 j++;
299 |             }
300 |         }
301 | 
302 |         // handle remaining units in units1 (deletions)
303 |         while (i < units1.size()) {
304 |             SyntaxDifference diff = new SyntaxDifference(units1.get(i), null, srcCodeMapper, dstCodeMapper);
305 |             differences.add(diff);
306 |             i++;
307 |         }
308 | 
309 |         // handle remaining units in units2 (insertions)
310 |         while (j < units2.size()) {
311 |             SyntaxDifference diff = new SyntaxDifference(null, units2.get(j), srcCodeMapper, dstCodeMapper);
312 |             differences.add(diff);
313 |             j++;
314 |         }
315 | 
316 |         return differences;
317 |     }
318 | 
319 |     private static boolean unitsAreEqual(Unit unit1, Unit unit2) {
320 |         if (unit1 == null || unit2 == null) {
321 |             return false;
322 |         }
323 |         // compares the actual body representation of the units
324 |         return unit1.toString().equals(unit2.toString());
325 |     }
326 | 
327 |     private static int getLineNumber(Unit unit) {
328 |         return CodeAnalysisUtils.getLineNumber(unit);
329 |     }
330 | }
331 | 


--------------------------------------------------------------------------------