├── settings.gradle ├── .gitignore ├── LICENSE ├── src ├── main │ └── java │ │ ├── at │ │ └── unisalzburg │ │ │ └── dbresearch │ │ │ └── apted │ │ │ ├── parser │ │ │ ├── InputParser.java │ │ │ └── BracketStringInputParser.java │ │ │ ├── node │ │ │ ├── StringNodeData.java │ │ │ ├── Node.java │ │ │ └── NodeIndexer.java │ │ │ ├── costmodel │ │ │ ├── CostModel.java │ │ │ ├── StringUnitCostModel.java │ │ │ └── PerEditOperationStringNodeDataCostModel.java │ │ │ ├── util │ │ │ ├── CommandLine.java │ │ │ └── FormatUtilities.java │ │ │ └── distance │ │ │ └── AllPossibleMappingsTED.java │ │ ├── parser │ │ ├── InputParser.java │ │ └── BracketStringInputParser.java │ │ ├── node │ │ ├── StringNodeData.java │ │ ├── Node.java │ │ └── NodeIndexer.java │ │ ├── costmodel │ │ ├── CostModel.java │ │ ├── StringUnitCostModel.java │ │ └── PerEditOperationStringNodeDataCostModel.java │ │ └── distance │ │ └── AllPossibleMappingsTED.java └── test │ ├── java │ ├── PerEditOperationCorrectnessTest.java │ └── CorrectnessTest.java │ └── resources │ ├── mini.json │ └── correctness_test_cases.json └── README.md /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'apted' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | 14 | # Gradle stuff 15 | build/ 16 | .gradle/ 17 | /.nb-gradle/ 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Mateusz Pawlik, Nikolaus Augsten 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/parser/InputParser.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.parser; 25 | 26 | import at.unisalzburg.dbresearch.apted.node.Node; 27 | 28 | /** 29 | * This interface specifies methods (currently only one) that must be 30 | * implemented for a custom input parser. 31 | * 32 | * @param the type of node data. 33 | */ 34 | public interface InputParser { 35 | 36 | /** 37 | * Converst the input tree passed as string (e.g., bracket notation, XML) 38 | * into the tree structure. 39 | * 40 | * @param s input tree as string. 41 | * @return tree structure. 42 | */ 43 | public Node fromString(String s); 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/parser/InputParser.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package parser; 25 | 26 | import node.Node; 27 | 28 | /** 29 | * This interface specifies methods (currently only one) that must be 30 | * implemented for a custom input parser. 31 | * 32 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.parser.InputParser} 33 | * 34 | * @param the type of node data. 35 | */ 36 | @Deprecated public interface InputParser { 37 | 38 | /** 39 | * Converst the input tree passed as string (e.g., bracket notation, XML) 40 | * into the tree structure. 41 | * 42 | * @param s input tree as string. 43 | * @return tree structure. 44 | */ 45 | public Node fromString(String s); 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/node/StringNodeData.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.node; 25 | 26 | /** 27 | * Represents a node label that consists of a single string value. Such label 28 | * belongs to a node. 29 | * 30 | * @see Node 31 | */ 32 | public class StringNodeData { 33 | 34 | /** 35 | * The label of a node. 36 | */ 37 | private String label; 38 | 39 | /** 40 | * Constructs node data with a specified label. 41 | * 42 | * @param label string label of a node. 43 | */ 44 | public StringNodeData(String label) { 45 | this.label = label; 46 | } 47 | 48 | /** 49 | * Returns the label of a node. 50 | * 51 | * @return node label. 52 | */ 53 | public String getLabel() { 54 | return label; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/node/StringNodeData.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package node; 25 | 26 | /** 27 | * Represents a node label that consists of a single string value. Such label 28 | * belongs to a node. 29 | * 30 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.node.StringNodeData} 31 | * 32 | * @see Node 33 | */ 34 | @Deprecated public class StringNodeData { 35 | 36 | /** 37 | * The label of a node. 38 | */ 39 | private String label; 40 | 41 | /** 42 | * Constructs node data with a specified label. 43 | * 44 | * @param label string label of a node. 45 | */ 46 | public StringNodeData(String label) { 47 | this.label = label; 48 | } 49 | 50 | /** 51 | * Returns the label of a node. 52 | * 53 | * @return node label. 54 | */ 55 | public String getLabel() { 56 | return label; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/costmodel/CostModel.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.costmodel; 25 | 26 | import at.unisalzburg.dbresearch.apted.node.Node; 27 | 28 | /** 29 | * This interface specifies the methods to implement for a custom cost model. 30 | * The methods represent the costs of edit operations (delete, insert, rename). 31 | * 32 | *

If the cost function is a metric, the tree edit distance is a metric too. 33 | * 34 | *

However, the cost function does not have to be a metric - the costs of 35 | * deletion, insertion and rename can be arbitrary. 36 | * 37 | *

IMPORTANT: Mind the float type use for costs. 38 | * 39 | * @param type of node data on which the cost model is defined. 40 | */ 41 | public interface CostModel { 42 | 43 | /** 44 | * Calculates the cost of deleting a node. 45 | * 46 | * @param n the node considered to be deleted. 47 | * @return the cost of deleting node n. 48 | */ 49 | public float del(Node n); 50 | 51 | /** 52 | * Calculates the cost of inserting a node. 53 | * 54 | * @param n the node considered to be inserted. 55 | * @return the cost of inserting node n. 56 | */ 57 | public float ins(Node n); 58 | 59 | /** 60 | * Calculates the cost of renaming (mapping) two nodes. 61 | * 62 | * @param n1 the source node of rename. 63 | * @param n2 the destination node of rename. 64 | * @return the cost of renaming (mapping) node n1 to n2. 65 | */ 66 | public float ren(Node n1, Node n2); 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/costmodel/CostModel.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package costmodel; 25 | 26 | import node.Node; 27 | 28 | /** 29 | * This interface specifies the methods to implement for a custom cost model. 30 | * The methods represent the costs of edit operations (delete, insert, rename). 31 | * 32 | *

If the cost function is a metric, the tree edit distance is a metric too. 33 | * 34 | *

However, the cost function does not have to be a metric - the costs of 35 | * deletion, insertion and rename can be arbitrary. 36 | * 37 | *

IMPORTANT: Mind the float type use for costs. 38 | * 39 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.costmodel.CostModel} 40 | * 41 | * @param type of node data on which the cost model is defined. 42 | */ 43 | @Deprecated public interface CostModel { 44 | 45 | /** 46 | * Calculates the cost of deleting a node. 47 | * 48 | * @param n the node considered to be deleted. 49 | * @return the cost of deleting node n. 50 | */ 51 | public float del(Node n); 52 | 53 | /** 54 | * Calculates the cost of inserting a node. 55 | * 56 | * @param n the node considered to be inserted. 57 | * @return the cost of inserting node n. 58 | */ 59 | public float ins(Node n); 60 | 61 | /** 62 | * Calculates the cost of renaming (mapping) two nodes. 63 | * 64 | * @param n1 the source node of rename. 65 | * @param n2 the destination node of rename. 66 | * @return the cost of renaming (mapping) node n1 to n2. 67 | */ 68 | public float ren(Node n1, Node n2); 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/parser/BracketStringInputParser.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik, Nikolaus Augsten 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.parser; 25 | 26 | import java.util.List; 27 | import at.unisalzburg.dbresearch.apted.util.FormatUtilities; 28 | import at.unisalzburg.dbresearch.apted.node.Node; 29 | import at.unisalzburg.dbresearch.apted.node.StringNodeData; 30 | 31 | // [TODO] Make this parser independent from FormatUtilities - move here relevant elements. 32 | 33 | /** 34 | * Parser for the input trees in the bracket notation with a single string-value 35 | * label of type {@link StringNodeData}. 36 | * 37 | *

Bracket notation encodes the trees with nested parentheses, for example, 38 | * in tree {A{B{X}{Y}{F}}{C}} the root node has label A and two children with 39 | * labels B and C. Node with label B has three children with labels X, Y, F. 40 | * 41 | * @see Node 42 | * @see StringNodeData 43 | */ 44 | public class BracketStringInputParser implements InputParser { 45 | 46 | /** 47 | * Parses the input tree as a string and converts it to our tree 48 | * representation using the {@link Node} class. 49 | * 50 | * @param s input tree as string in bracket notation. 51 | * @return tree representation of the bracket notation input. 52 | * @see Node 53 | */ 54 | public Node fromString(String s) { 55 | s = s.substring(s.indexOf("{"), s.lastIndexOf("}") + 1); 56 | Node node = new Node(new StringNodeData(FormatUtilities.getRoot(s))); 57 | List c = FormatUtilities.getChildren(s); 58 | for(int i = 0; i < c.size(); i++) 59 | node.addChild(fromString(c.get(i))); 60 | return node; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/costmodel/StringUnitCostModel.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package costmodel; 25 | 26 | import costmodel.CostModel; 27 | import node.Node; 28 | import node.StringNodeData; 29 | 30 | /** 31 | * This is a unit-nost model defined on string labels. 32 | * 33 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.costmodel.StringUnitCostModel} 34 | * 35 | * @see CostModel 36 | * @see StringNodeData 37 | */ 38 | // TODO: Use a label dictionary to encode string labels with integers for 39 | // faster rename cost computation. 40 | @Deprecated public class StringUnitCostModel implements CostModel { 41 | 42 | /** 43 | * Calculates the cost of deleting a node. 44 | * 45 | * @param n a node considered to be deleted. 46 | * @return {@code 1} - a fixed cost of deleting a node. 47 | */ 48 | public float del(Node n) { 49 | return 1.0f; 50 | } 51 | 52 | /** 53 | * Calculates the cost of inserting a node. 54 | * 55 | * @param n a node considered to be inserted. 56 | * @return {@code 1} - a fixed cost of inserting a node. 57 | */ 58 | public float ins(Node n) { 59 | return 1.0f; 60 | } 61 | 62 | /** 63 | * Calculates the cost of renaming the label of the source node to the label 64 | * of the destination node. 65 | * 66 | * @param n1 a source node for rename. 67 | * @param n2 a destination node for rename. 68 | * @return {@code 1} if labels of renamed nodes are equal, and {@code 0} otherwise. 69 | */ 70 | public float ren(Node n1, Node n2) { 71 | return (n1.getNodeData().getLabel().equals(n2.getNodeData().getLabel())) ? 0.0f : 1.0f; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/costmodel/StringUnitCostModel.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.costmodel; 25 | 26 | import at.unisalzburg.dbresearch.apted.costmodel.CostModel; 27 | import at.unisalzburg.dbresearch.apted.node.Node; 28 | import at.unisalzburg.dbresearch.apted.node.StringNodeData; 29 | 30 | /** 31 | * This is a unit-nost model defined on string labels. 32 | * 33 | * @see CostModel 34 | * @see StringNodeData 35 | */ 36 | // TODO: Use a label dictionary to encode string labels with integers for 37 | // faster rename cost computation. 38 | public class StringUnitCostModel implements CostModel { 39 | 40 | /** 41 | * Calculates the cost of deleting a node. 42 | * 43 | * @param n a node considered to be deleted. 44 | * @return {@code 1} - a fixed cost of deleting a node. 45 | */ 46 | public float del(Node n) { 47 | return 1.0f; 48 | } 49 | 50 | /** 51 | * Calculates the cost of inserting a node. 52 | * 53 | * @param n a node considered to be inserted. 54 | * @return {@code 1} - a fixed cost of inserting a node. 55 | */ 56 | public float ins(Node n) { 57 | return 1.0f; 58 | } 59 | 60 | /** 61 | * Calculates the cost of renaming the label of the source node to the label 62 | * of the destination node. 63 | * 64 | * @param n1 a source node for rename. 65 | * @param n2 a destination node for rename. 66 | * @return {@code 1} if labels of renamed nodes are equal, and {@code 0} otherwise. 67 | */ 68 | public float ren(Node n1, Node n2) { 69 | return (n1.getNodeData().getLabel().equals(n2.getNodeData().getLabel())) ? 0.0f : 1.0f; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/parser/BracketStringInputParser.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik, Nikolaus Augsten 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package parser; 25 | 26 | import java.util.List; 27 | import at.unisalzburg.dbresearch.apted.util.FormatUtilities; 28 | import node.Node; 29 | import node.StringNodeData; 30 | 31 | // [TODO] Make this parser independent from FormatUtilities - move here relevant elements. 32 | 33 | /** 34 | * Parser for the input trees in the bracket notation with a single string-value 35 | * label of type {@link StringNodeData}. 36 | * 37 | *

Bracket notation encodes the trees with nested parentheses, for example, 38 | * in tree {A{B{X}{Y}{F}}{C}} the root node has label A and two children with 39 | * labels B and C. Node with label B has three children with labels X, Y, F. 40 | * 41 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.parser.BracketStringInputParser} 42 | * 43 | * @see Node 44 | * @see StringNodeData 45 | */ 46 | @Deprecated public class BracketStringInputParser implements InputParser { 47 | 48 | /** 49 | * Parses the input tree as a string and converts it to our tree 50 | * representation using the {@link Node} class. 51 | * 52 | * @param s input tree as string in bracket notation. 53 | * @return tree representation of the bracket notation input. 54 | * @see Node 55 | */ 56 | public Node fromString(String s) { 57 | s = s.substring(s.indexOf("{"), s.lastIndexOf("}") + 1); 58 | Node node = new Node(new StringNodeData(FormatUtilities.getRoot(s))); 59 | List c = FormatUtilities.getChildren(s); 60 | for(int i = 0; i < c.size(); i++) 61 | node.addChild(fromString(c.get(i))); 62 | return node; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/costmodel/PerEditOperationStringNodeDataCostModel.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.costmodel; 25 | 26 | import at.unisalzburg.dbresearch.apted.costmodel.CostModel; 27 | import at.unisalzburg.dbresearch.apted.node.Node; 28 | import at.unisalzburg.dbresearch.apted.node.StringNodeData; 29 | 30 | /** 31 | * This is a cost model defined on {@link node.StringNodeData} with a fixed cost 32 | * per edit operation. 33 | */ 34 | public class PerEditOperationStringNodeDataCostModel implements CostModel { 35 | 36 | /** 37 | * Stores the cost of deleting a node. 38 | */ 39 | private float delCost; 40 | 41 | /** 42 | * Stores the cost of inserting a node. 43 | */ 44 | private float insCost; 45 | 46 | /** 47 | * Stores the cost of mapping two nodes (renaming their labels). 48 | */ 49 | private float renCost; 50 | 51 | /** 52 | * Initialises the cost model with the passed edit operation costs. 53 | * 54 | * @param delCost deletion cost. 55 | * @param insCost insertion cost. 56 | * @param renCost rename cost. 57 | */ 58 | public PerEditOperationStringNodeDataCostModel(float delCost, float insCost, float renCost) { 59 | this.delCost = delCost; 60 | this.insCost = insCost; 61 | this.renCost = renCost; 62 | } 63 | 64 | /** 65 | * Calculates the cost of deleting a node. 66 | * 67 | * @param n the node considered to be deleted. 68 | * @return the cost of deleting node n. 69 | */ 70 | public float del(Node n) { 71 | return delCost; 72 | } 73 | 74 | /** 75 | * Calculates the cost of inserting a node. 76 | * 77 | * @param n the node considered to be inserted. 78 | * @return the cost of inserting node n. 79 | */ 80 | public float ins(Node n) { 81 | return insCost; 82 | } 83 | 84 | /** 85 | * Calculates the cost of renaming the string labels of two nodes. 86 | * 87 | * @param n1 the source node of rename. 88 | * @param n2 the destination node of rename. 89 | * @return the cost of renaming node n1 to n2. 90 | */ 91 | public float ren(Node n1, Node n2) { 92 | return (n1.getNodeData().getLabel().equals(n2.getNodeData().getLabel())) ? 0.0f : renCost; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/costmodel/PerEditOperationStringNodeDataCostModel.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package costmodel; 25 | 26 | import costmodel.CostModel; 27 | import node.Node; 28 | import node.StringNodeData; 29 | 30 | /** 31 | * This is a cost model defined on {@link node.StringNodeData} with a fixed cost 32 | * per edit operation. 33 | * 34 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.costmodel.PerEditOperationStringNodeDataCostModel} 35 | */ 36 | @Deprecated public class PerEditOperationStringNodeDataCostModel implements CostModel { 37 | 38 | /** 39 | * Stores the cost of deleting a node. 40 | */ 41 | private float delCost; 42 | 43 | /** 44 | * Stores the cost of inserting a node. 45 | */ 46 | private float insCost; 47 | 48 | /** 49 | * Stores the cost of mapping two nodes (renaming their labels). 50 | */ 51 | private float renCost; 52 | 53 | /** 54 | * Initialises the cost model with the passed edit operation costs. 55 | * 56 | * @param delCost deletion cost. 57 | * @param insCost insertion cost. 58 | * @param renCost rename cost. 59 | */ 60 | public PerEditOperationStringNodeDataCostModel(float delCost, float insCost, float renCost) { 61 | this.delCost = delCost; 62 | this.insCost = insCost; 63 | this.renCost = renCost; 64 | } 65 | 66 | /** 67 | * Calculates the cost of deleting a node. 68 | * 69 | * @param n the node considered to be deleted. 70 | * @return the cost of deleting node n. 71 | */ 72 | public float del(Node n) { 73 | return delCost; 74 | } 75 | 76 | /** 77 | * Calculates the cost of inserting a node. 78 | * 79 | * @param n the node considered to be inserted. 80 | * @return the cost of inserting node n. 81 | */ 82 | public float ins(Node n) { 83 | return insCost; 84 | } 85 | 86 | /** 87 | * Calculates the cost of renaming the string labels of two nodes. 88 | * 89 | * @param n1 the source node of rename. 90 | * @param n2 the destination node of rename. 91 | * @return the cost of renaming node n1 to n2. 92 | */ 93 | public float ren(Node n1, Node n2) { 94 | return (n1.getNodeData().getLabel().equals(n2.getNodeData().getLabel())) ? 0.0f : renCost; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/node/Node.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.node; 25 | 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | 29 | /** 30 | * This is a recursive representation of an ordered tree. Each node stores a 31 | * list of pointers to its children. The order of children is significant and 32 | * must be observed while implmeneting a custom input parser. 33 | * 34 | * @param the type of node data (node label). 35 | */ 36 | public class Node { 37 | 38 | /** 39 | * Information associated to and stored at each node. This can be anything 40 | * and depends on the application, for example, string label, key-value pair, 41 | * list of values, etc. 42 | */ 43 | private D nodeData; 44 | 45 | /** 46 | * Array of pointers to this node's children. The order of children is 47 | * significant due to the definition of ordered trees. 48 | */ 49 | private List> children; 50 | 51 | /** 52 | * Constructs a new node with the passed node data and an empty list of 53 | * children. 54 | * 55 | * @param nodeData instance of node data (node label). 56 | */ 57 | public Node(D nodeData) { 58 | this.children = new ArrayList<>(); 59 | setNodeData(nodeData); 60 | } 61 | 62 | /** 63 | * Counts the number of nodes in a tree rooted at this node. 64 | * 65 | *

This method runs in linear time in the tree size. 66 | * 67 | * @return number of nodes in the tree rooted at this node. 68 | */ 69 | public int getNodeCount() { 70 | int sum = 1; 71 | for(Node child : getChildren()) { 72 | sum += child.getNodeCount(); 73 | } 74 | return sum; 75 | } 76 | 77 | /** 78 | * Adds a new child at the end of children list. The added child will be 79 | * the last child of this node. 80 | * 81 | * @param c child node to add. 82 | */ 83 | public void addChild(Node c) { 84 | this.children.add(c); 85 | } 86 | 87 | /** 88 | * Returns a string representation of the tree in bracket notation. 89 | * 90 | *

IMPORTANT: Works only for nodes storing {@link node.StringNodeData} 91 | * due to using {@link node.StringNodeData#getLabel()}. 92 | * 93 | * @return tree in bracket notation. 94 | */ 95 | public String toString() { 96 | String res = (new StringBuilder("{")).append(((StringNodeData)getNodeData()).getLabel()).toString(); 97 | for(Node child : getChildren()) { 98 | res = (new StringBuilder(String.valueOf(res))).append(child.toString()).toString(); 99 | } 100 | res = (new StringBuilder(String.valueOf(res))).append("}").toString(); 101 | return res; 102 | } 103 | 104 | /** 105 | * Returns node data. Used especially for calculating rename cost. 106 | * 107 | * @return node data (label of a node). 108 | */ 109 | public D getNodeData() { 110 | return nodeData; 111 | } 112 | 113 | /** 114 | * Sets the node data of this node. 115 | * 116 | * @param nodeData instance of node data (node label). 117 | */ 118 | public void setNodeData(D nodeData) { 119 | this.nodeData = nodeData; 120 | } 121 | 122 | /** 123 | * Returns the list with all node's children. 124 | * 125 | * @return children of the node. 126 | */ 127 | public List> getChildren() { 128 | return children; 129 | } 130 | 131 | } 132 | -------------------------------------------------------------------------------- /src/main/java/node/Node.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package node; 25 | 26 | import java.util.Vector; 27 | 28 | /** 29 | * This is a recursive representation of an ordered tree. Each node stores a 30 | * vector of pointers to its children. The order of children is significant and 31 | * must be observed while implmeneting a custom input parser. 32 | * 33 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.node.Node} 34 | * 35 | * @param the type of node data (node label). 36 | */ 37 | @Deprecated public class Node { 38 | 39 | /** 40 | * Information associated to and stored at each node. This can be anything 41 | * and depends on the application, for example, string label, key-value pair, 42 | * vector of values, etc. 43 | */ 44 | private D nodeData; 45 | 46 | /** 47 | * Array of pointers to this node's children. The order of children is 48 | * significant due to the definition of ordered trees. 49 | */ 50 | private Vector> children; 51 | 52 | /** 53 | * Constructs a new node with the passed node data and an empty vector of 54 | * children. 55 | * 56 | * @param nodeData instance of node data (node label). 57 | */ 58 | public Node(D nodeData) { 59 | this.children = new Vector>(); 60 | setNodeData(nodeData); 61 | } 62 | 63 | /** 64 | * Counts the number of nodes in a tree rooted at this node. 65 | * 66 | *

This method runs in linear time in the tree size. 67 | * 68 | * @return number of nodes in the tree rooted at this node. 69 | */ 70 | public int getNodeCount() { 71 | int sum = 1; 72 | for(Node child : getChildren()) { 73 | sum += child.getNodeCount(); 74 | } 75 | return sum; 76 | } 77 | 78 | /** 79 | * Adds a new child at the end of children vector. The added child will be 80 | * the last child of this node. 81 | * 82 | * @param c child node to add. 83 | */ 84 | public void addChild(Node c) { 85 | this.children.add(c); 86 | } 87 | 88 | /** 89 | * Returns a string representation of the tree in bracket notation. 90 | * 91 | *

IMPORTANT: Works only for nodes storing {@link node.StringNodeData} 92 | * due to using {@link node.StringNodeData#getLabel()}. 93 | * 94 | * @return tree in bracket notation. 95 | */ 96 | public String toString() { 97 | String res = (new StringBuilder("{")).append(((StringNodeData)getNodeData()).getLabel()).toString(); 98 | for(Node child : getChildren()) { 99 | res = (new StringBuilder(String.valueOf(res))).append(child.toString()).toString(); 100 | } 101 | res = (new StringBuilder(String.valueOf(res))).append("}").toString(); 102 | return res; 103 | } 104 | 105 | /** 106 | * Returns node data. Used especially for calculating rename cost. 107 | * 108 | * @return node data (label of a node). 109 | */ 110 | public D getNodeData() { 111 | return nodeData; 112 | } 113 | 114 | /** 115 | * Sets the node data of this node. 116 | * 117 | * @param nodeData instance of node data (node label). 118 | */ 119 | public void setNodeData(D nodeData) { 120 | this.nodeData = nodeData; 121 | } 122 | 123 | /** 124 | * Returns the vector with all node's children. 125 | * 126 | * @return children of the node. 127 | */ 128 | public Vector> getChildren() { 129 | return children; 130 | } 131 | 132 | } 133 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # APTED algorithm for the Tree Edit Distance 2 | 3 | ## Information 4 | 5 | This is an implementation of the APTED algorithm, the state-of-the-art 6 | solution for computing the tree edit distance [1,2], which supersedes the RTED 7 | algorithm [3]. 8 | 9 | You can find more information on our Tree Edit Distance website 10 | http://tree-edit-distance.dbresearch.uni-salzburg.at/ 11 | 12 | ### Deprecated API 13 | 14 | As we've been pointed, our API had incorrect packaging causing some troubles 15 | (especially, the `util` package). 16 | We've fixed the packaging. For the sake of current users, we've left also the 17 | old one that we've annotated as deprecated in both, source code and javadoc. 18 | We're planning on removing it from the repository at some point. 19 | 20 | ## Citing APTED 21 | 22 | If you want to refer to APTED in a publication, please cite [1] and [2]. 23 | 24 | ## Licence 25 | 26 | The source code is published under the **MIT licence** found in the root 27 | directory of the project and in the header of each source file. 28 | 29 | ## Input 30 | 31 | Currently, we support only the so-called bracket notation for the input trees, 32 | for example, encoding `{A{B{X}{Y}{F}}{C}}` corresponds to the following tree: 33 | ``` 34 | A 35 | / \ 36 | B C 37 | /|\ 38 | X Y F 39 | ``` 40 | 41 | ## Output 42 | 43 | Our tool computes two outputs: 44 | - tree edit **distance** value - the minimum cost of transforming the source 45 | tree into the destination tree. 46 | - tree edit **mapping** - a mapping between nodes that corresponds to the 47 | tree edit distance value. Nodes that are not mapped are deleted (source tree) 48 | or inserted (destination tree). 49 | 50 | ## Customising 51 | 52 | If the nodes of your trees have labels different from simple strings and you 53 | need a more sophisticated cost model than unit cost, you can customise that. 54 | There are three elements that you have to consider. 55 | See [Javadoc](#javadoc-documentation) documentation for further details. 56 | 57 | ### Parsing the input 58 | 59 | Our current parser `BracketStringInputParser` takes the bracket-encoded input 60 | tree as a string and transforms it to tree structure composed of `Node` objects. 61 | If you'd like to use other encoding, you have to write a custom class that 62 | implements `InputParser` interface. 63 | 64 | ### Node data 65 | 66 | The parser creates nodes and stores the corresponding information in 67 | `Node.nodeData`. We use `StringNodeData` to store simple string labels. If 68 | you need anything else, you have to implement your own class. It can be 69 | anything, we don't provide any interface. 70 | 71 | ### Cost model 72 | 73 | The cost model decides on the costs of edit operations for every node 74 | (insertion and deletion) and every node pair (rename). We've implemented a 75 | simple `StringUnitCostModel` that returns `1` for deleting and inserting any 76 | node. The rename cost depends on label (`StringNodeData`) equality. 77 | 78 | Write a class that implements `CostModel` interface if you need a more 79 | sophisticated cost model. See `PerEditOperationStringNodeDataCostModel` which 80 | allows different costs for each edit operation. 81 | 82 | ### Using customised APTED 83 | 84 | When you have all the bricks ready (`MyInputParser`, `MyNodeData`, `MyCostModel`), 85 | execute APTED as follows for `sourceTree` and `destinationTree`: 86 | ```Java 87 | // Parse the input and transform to Node objects storing node information in MyNodeData. 88 | MyInputParser parser = new MyInputParser(); 89 | Node t1 = parser.fromString(sourceTree); 90 | Node t2 = parser.fromString(destinationTree); 91 | // Initialise APTED. 92 | APTED apted = new APTED<>(new MyCostModel()); 93 | // Execute APTED. 94 | float result = apted.computeEditDistance(t1, t2); 95 | ``` 96 | 97 | ## Execution manual 98 | 99 | Execute `java -jar apted.jar -h` for manual and help. 100 | 101 | ## Building APTED 102 | 103 | You can clone the code, compile, and build the JAR file the regular command-line 104 | way. 105 | 106 | We use [Gradle](https://gradle.org/) for convenience. 107 | - [install Gradle](https://gradle.org/install) 108 | - run `gradle test` for unit tests (currently correctness tests) 109 | - run `gradle build` to find the `apted.jar` file in `build/libs/` 110 | 111 | ### Gradle wrapper 112 | 113 | We intentionally do not put automatically generated Gradle wrapper files in the 114 | repository. We don't like that. However, if it helps, we've added wrapper task section to `build.gradle` file. 115 | 116 | ## Javadoc documentation 117 | 118 | Run `gradle javadoc` to generate documentation. Then, open in your browser 119 | `build/docs/javadoc/index.html`. 120 | 121 | The current and future documentation should cover all classes and their members, 122 | including private. The internals of the algorithms and methods are documented 123 | within the source code. If anything is missing or unclear, please send us 124 | a feedback. 125 | 126 | ## References 127 | 128 | 1. M. Pawlik and N. Augsten. *Tree edit distance: Robust and memory- 129 | efficient*. Information Systems 56. 2016. 130 | 131 | 2. M. Pawlik and N. Augsten. *Efficient Computation of the Tree Edit 132 | Distance*. ACM Transactions on Database Systems (TODS) 40(1). 2015. 133 | 134 | 3. M. Pawlik and N. Augsten. *RTED: A Robust Algorithm for the Tree Edit 135 | Distance*. PVLDB 5(4). 2011. 136 | -------------------------------------------------------------------------------- /src/test/java/PerEditOperationCorrectnessTest.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | import java.util.Collection; 25 | import java.util.Arrays; 26 | import java.util.LinkedList; 27 | import java.io.BufferedReader; 28 | import java.io.FileReader; 29 | import java.io.IOException; 30 | import org.junit.Before; 31 | import org.junit.Test; 32 | import org.junit.runner.RunWith; 33 | import org.junit.runners.Parameterized; 34 | import org.junit.runners.Parameterized.Parameters; 35 | import com.google.gson.Gson; 36 | import static org.junit.Assert.assertEquals; 37 | import at.unisalzburg.dbresearch.apted.distance.APTED; 38 | import at.unisalzburg.dbresearch.apted.distance.AllPossibleMappingsTED; 39 | import at.unisalzburg.dbresearch.apted.parser.BracketStringInputParser; 40 | import at.unisalzburg.dbresearch.apted.node.Node; 41 | import at.unisalzburg.dbresearch.apted.node.StringNodeData; 42 | import at.unisalzburg.dbresearch.apted.costmodel.PerEditOperationStringNodeDataCostModel; 43 | 44 | /** 45 | * Correctness unit tests of distance computation for node labels with a single 46 | * string value and per-edit-operation cost model. 47 | * 48 | * @see node.StringNodeData 49 | * @see costmodel.PerEditOperationStringNodeDataCostModel 50 | */ 51 | @RunWith(Parameterized.class) 52 | public class PerEditOperationCorrectnessTest { 53 | 54 | /** 55 | * Test case object holding parameters of a single test case. 56 | * 57 | *

Could be also deserialized here but without much benefit. 58 | */ 59 | private TestCase testCase; 60 | 61 | /** 62 | * This class represents a single test case from the JSON file. JSON keys 63 | * are mapped to fiels of this class. 64 | */ 65 | // [TODO] Verify if this is the best placement for this class. 66 | private static class TestCase { 67 | 68 | /** 69 | * Test identifier to quickly find failed test case in JSON file. 70 | */ 71 | private int testID; 72 | 73 | /** 74 | * Source tree as string. 75 | */ 76 | private String t1; 77 | 78 | /** 79 | * Destination tree as string. 80 | */ 81 | private String t2; 82 | 83 | /** 84 | * Correct distance value between source and destination trees. 85 | */ 86 | private int d; 87 | 88 | /** 89 | * Used in printing the test case details on failure with '(name = "{0}")'. 90 | * 91 | * @return test case details. 92 | * @see CorrectnessTest#data() 93 | */ 94 | public String toString() { 95 | return "testID:" + testID + ",t1:" + t1 + ",t2:" + t2 + ",d:" + d; 96 | } 97 | 98 | /** 99 | * Returns identifier of this test case. 100 | * 101 | * @return test case identifier. 102 | */ 103 | public int getTestID() { 104 | return testID; 105 | } 106 | 107 | /** 108 | * Returns source tree of this test case. 109 | * 110 | * @return source tree. 111 | */ 112 | public String getT1() { 113 | return t1; 114 | } 115 | 116 | /** 117 | * Returns destination tree of this test case. 118 | * 119 | * @return destination tree. 120 | */ 121 | public String getT2() { 122 | return t2; 123 | } 124 | 125 | /** 126 | * Returns correct distance value between source and destination trees 127 | * of this test case. 128 | * 129 | * @return correct distance. 130 | */ 131 | public int getD() { 132 | return d; 133 | } 134 | 135 | } 136 | 137 | /** 138 | * Constructs a single test for a single test case. Used for parameterised 139 | * tests. 140 | * 141 | * @param testCase single test case. 142 | */ 143 | public PerEditOperationCorrectnessTest(TestCase testCase) { 144 | this.testCase = testCase; 145 | } 146 | 147 | /** 148 | * Returns a list of test cases read from external JSON file. 149 | * 150 | *

Uses google.gson for reading JSON document. 151 | * 152 | *

In case of a failure, the parameter values from {@link TestCase} object 153 | * are printed '(name = "{0}")'. 154 | * 155 | * @return list of all test cases read from JSON file. 156 | * @throws IOException in case of failure of reading the JSON file. 157 | */ 158 | @Parameters(name = "{0}") 159 | public static Collection data() throws IOException { 160 | BufferedReader br = new BufferedReader(new FileReader(CorrectnessTest.class.getResource("/mini.json").getPath())); 161 | Gson gson = new Gson(); 162 | TestCase[] testCases = new Gson().fromJson(br, TestCase[].class); 163 | return Arrays.asList(testCases); 164 | } 165 | 166 | /** 167 | * Compute TED for a single test case and compare to the correct value. Uses 168 | * node labels with a single string value and per-edit-operation cost model. 169 | * 170 | *

The correct value is calculated using AllPossibleMappingsTED algorithm. 171 | *

The costs of edit operations are set to some example values different 172 | * than in the unit cost model. 173 | * 174 | * @see node.StringNodeData 175 | * @see costmodel.PerEditOperationStringNodeDataCostModel 176 | * @see distance.AllPossibleMappingsTED 177 | */ 178 | @Test 179 | public void distancePerEditOperationStringNodeDataCostModel() { 180 | // Parse the input. 181 | BracketStringInputParser parser = new BracketStringInputParser(); 182 | Node t1 = parser.fromString(testCase.getT1()); 183 | Node t2 = parser.fromString(testCase.getT2()); 184 | // Initialise algorithms. 185 | APTED apted = new APTED<>(new PerEditOperationStringNodeDataCostModel(0.4f, 0.4f, 0.6f)); 186 | AllPossibleMappingsTED apmted = new AllPossibleMappingsTED<>(new PerEditOperationStringNodeDataCostModel(0.4f, 0.4f, 0.6f)); 187 | // Calculate distances using both algorithms. 188 | float result = apted.computeEditDistance(t1, t2); 189 | float correctResult = apmted.computeEditDistance(t1, t2); 190 | assertEquals(correctResult, result, 0.0001); 191 | } 192 | 193 | } 194 | -------------------------------------------------------------------------------- /src/test/java/CorrectnessTest.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | import java.util.Collection; 25 | import java.util.Arrays; 26 | import java.util.List; 27 | import java.io.BufferedReader; 28 | import java.io.FileReader; 29 | import java.io.IOException; 30 | import org.junit.Test; 31 | import org.junit.runner.RunWith; 32 | import org.junit.runners.Parameterized; 33 | import org.junit.runners.Parameterized.Parameters; 34 | import com.google.gson.Gson; 35 | import static org.junit.Assert.assertEquals; 36 | import at.unisalzburg.dbresearch.apted.distance.APTED; 37 | import at.unisalzburg.dbresearch.apted.parser.BracketStringInputParser; 38 | import at.unisalzburg.dbresearch.apted.node.Node; 39 | import at.unisalzburg.dbresearch.apted.node.StringNodeData; 40 | import at.unisalzburg.dbresearch.apted.costmodel.StringUnitCostModel; 41 | 42 | /** 43 | * Correctness unit tests of distance and mapping computation. 44 | * 45 | *

In case of mapping, only mapping cost is verified against the correct 46 | * distance. 47 | * 48 | *

Currently tests only for unit-cost model and single string-value labels. 49 | * 50 | * @see StringNodeData 51 | * @see StringUnitCostModel 52 | */ 53 | @RunWith(Parameterized.class) 54 | public class CorrectnessTest { 55 | 56 | /** 57 | * Test case object holding parameters of a single test case. 58 | * 59 | *

Could be also deserialized here but without much benefit. 60 | */ 61 | private TestCase testCase; 62 | 63 | /** 64 | * This class represents a single test case from the JSON file. JSON keys 65 | * are mapped to fiels of this class. 66 | */ 67 | // [TODO] Verify if this is the best placement for this class. 68 | private static class TestCase { 69 | 70 | /** 71 | * Test identifier to quickly find failed test case in JSON file. 72 | */ 73 | private int testID; 74 | 75 | /** 76 | * Source tree as string. 77 | */ 78 | private String t1; 79 | 80 | /** 81 | * Destination tree as string. 82 | */ 83 | private String t2; 84 | 85 | /** 86 | * Correct distance value between source and destination trees. 87 | */ 88 | private int d; 89 | 90 | /** 91 | * Used in printing the test case details on failure with '(name = "{0}")'. 92 | * 93 | * @return test case details. 94 | * @see CorrectnessTest#data() 95 | */ 96 | public String toString() { 97 | return "testID:" + testID + ",t1:" + t1 + ",t2:" + t2 + ",d:" + d; 98 | } 99 | 100 | /** 101 | * Returns identifier of this test case. 102 | * 103 | * @return test case identifier. 104 | */ 105 | public int getTestID() { 106 | return testID; 107 | } 108 | 109 | /** 110 | * Returns source tree of this test case. 111 | * 112 | * @return source tree. 113 | */ 114 | public String getT1() { 115 | return t1; 116 | } 117 | 118 | /** 119 | * Returns destination tree of this test case. 120 | * 121 | * @return destination tree. 122 | */ 123 | public String getT2() { 124 | return t2; 125 | } 126 | 127 | /** 128 | * Returns correct distance value between source and destination trees 129 | * of this test case. 130 | * 131 | * @return correct distance. 132 | */ 133 | public int getD() { 134 | return d; 135 | } 136 | 137 | } 138 | 139 | /** 140 | * Constructs a single test for a single test case. Used for parameterised 141 | * tests. 142 | * 143 | * @param testCase single test case. 144 | */ 145 | public CorrectnessTest(TestCase testCase) { 146 | this.testCase = testCase; 147 | } 148 | 149 | /** 150 | * Returns a list of test cases read from external JSON file. 151 | * 152 | *

Uses google.gson for reading JSON document. 153 | * 154 | *

In case of a failure, the parameter values from {@link TestCase} object 155 | * are printed '(name = "{0}")'. 156 | * 157 | * @return list of all test cases read from JSON file. 158 | * @throws IOException in case of failure of reading the JSON file. 159 | */ 160 | @Parameters(name = "{0}") 161 | public static Collection data() throws IOException { 162 | BufferedReader br = new BufferedReader(new FileReader(CorrectnessTest.class.getResource("/correctness_test_cases.json").getPath())); 163 | Gson gson = new Gson(); 164 | TestCase[] testCases = new Gson().fromJson(br, TestCase[].class); 165 | return Arrays.asList(testCases); 166 | } 167 | 168 | /** 169 | * Parse trees from bracket notation to {node.StringNodeData}, convert back 170 | * to strings and verify equality with the input. 171 | */ 172 | @Test 173 | public void parsingBracketNotationToStringNodeData() { 174 | // Parse the input. 175 | BracketStringInputParser parser = new BracketStringInputParser(); 176 | Node t1 = parser.fromString(testCase.getT1()); 177 | Node t2 = parser.fromString(testCase.getT2()); 178 | assertEquals(testCase.getT1(), t1.toString()); 179 | assertEquals(testCase.getT2(), t2.toString()); 180 | } 181 | 182 | /** 183 | * Compute TED for a single test case and compare to the correct value. Uses 184 | * node labels with a single string value and unit cost model. 185 | * 186 | * @see node.StringNodeData 187 | * @see costmodel.StringUnitCostModel 188 | */ 189 | @Test 190 | public void distanceUnitCostStringNodeDataCostModel() { 191 | // Parse the input. 192 | BracketStringInputParser parser = new BracketStringInputParser(); 193 | Node t1 = parser.fromString(testCase.getT1()); 194 | Node t2 = parser.fromString(testCase.getT2()); 195 | // Initialise APTED. 196 | APTED apted = new APTED<>(new StringUnitCostModel()); 197 | // This cast is safe due to unit cost. 198 | int result = (int)apted.computeEditDistance(t1, t2); 199 | assertEquals(testCase.getD(), result); 200 | // Verify the symmetric case. 201 | result = (int)apted.computeEditDistance(t2, t1); 202 | assertEquals(testCase.getD(), result); 203 | } 204 | 205 | /** 206 | * Compute TED for a single test case and compare to the correct value. Uses 207 | * node labels with a single string value and unit cost model. 208 | * 209 | *

Triggers spf_L to execute. The strategy is fixed to left paths in the 210 | * left-hand tree. 211 | * 212 | * @see node.StringNodeData 213 | * @see costmodel.StringUnitCostModel 214 | */ 215 | @Test 216 | public void distanceUnitCostStringNodeDataCostModelSpfL() { 217 | // Parse the input. 218 | BracketStringInputParser parser = new BracketStringInputParser(); 219 | Node t1 = parser.fromString(testCase.getT1()); 220 | Node t2 = parser.fromString(testCase.getT2()); 221 | // Initialise APTED. 222 | APTED apted = new APTED<>(new StringUnitCostModel()); 223 | // This cast is safe due to unit cost. 224 | int result = (int)apted.computeEditDistance_spfTest(t1, t2, 0); 225 | assertEquals(testCase.getD(), result); 226 | } 227 | 228 | /** 229 | * Compute TED for a single test case and compare to the correct value. Uses 230 | * node labels with a single string value and unit cost model. 231 | * 232 | *

Triggers spf_R to execute. The strategy is fixed to right paths in the 233 | * left-hand tree. 234 | * 235 | * @see node.StringNodeData 236 | * @see costmodel.StringUnitCostModel 237 | */ 238 | @Test 239 | public void distanceUnitCostStringNodeDataCostModelSpfR() { 240 | // Parse the input. 241 | BracketStringInputParser parser = new BracketStringInputParser(); 242 | Node t1 = parser.fromString(testCase.getT1()); 243 | Node t2 = parser.fromString(testCase.getT2()); 244 | // Initialise APTED. 245 | APTED apted = new APTED<>(new StringUnitCostModel()); 246 | // This cast is safe due to unit cost. 247 | int result = (int)apted.computeEditDistance_spfTest(t1, t2, 1); 248 | assertEquals(testCase.getD(), result); 249 | } 250 | 251 | // IDEA: Write test that triggers spf_A for each subtree pair - disallow 252 | // using spf_L and spf_R. 253 | 254 | /** 255 | * Compute minimum-cost edit mapping for a single test case and compare its 256 | * cost to the correct TED value. Uses node labels with a single string value 257 | * and unit cost model. 258 | * 259 | * @see node.StringNodeData 260 | * @see costmodel.StringUnitCostModel 261 | */ 262 | @Test 263 | public void mappingCostUnitCostStringNodeDataCostModel() { 264 | // Parse the input. 265 | BracketStringInputParser parser = new BracketStringInputParser(); 266 | Node t1 = parser.fromString(testCase.getT1()); 267 | Node t2 = parser.fromString(testCase.getT2()); 268 | // Initialise APTED. 269 | APTED apted = new APTED<>(new StringUnitCostModel()); 270 | // Although we don't need TED value yet, TED must be computed before the 271 | // mapping. This cast is safe due to unit cost. 272 | apted.computeEditDistance(t1, t2); 273 | // Get TED value corresponding to the computed mapping. 274 | List mapping = apted.computeEditMapping(); 275 | // This cast is safe due to unit cost. 276 | int result = (int)apted.mappingCost(mapping); 277 | assertEquals(testCase.getD(), result); 278 | } 279 | 280 | } 281 | -------------------------------------------------------------------------------- /src/test/resources/mini.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "testID" : 1, 4 | "t1" : "{f{a}{e{c{b}}{d}}}", 5 | "t2" : "{f{a}{c{e{b}{d}}}}", 6 | "d" : 2 7 | }, 8 | { 9 | "testID" : 2, 10 | "t1" : "{a{b}{c{e}{f}}{d}}", 11 | "t2" : "{a{b}{c{e}}{d}}", 12 | "d" : 1 13 | }, 14 | 15 | { 16 | "testID" : 3, 17 | "t1" : "{a{b}{c{e}{f}}{d}}", 18 | "t2" : "{a{b}{c}{d}}", 19 | "d" : 2 20 | }, 21 | { 22 | "testID" : 4, 23 | "t1" : "{a{x}{c{e}{f}}{d}}", 24 | "t2" : "{a{b}{c}{d}}", 25 | "d" : 3 26 | }, 27 | { 28 | "testID" : 5, 29 | "t1" : "{a{b}{c{e}{f}}{d}}", 30 | "t2" : "{a{b}{c}}", 31 | "d" : 3 32 | }, 33 | { 34 | "testID" : 6, 35 | "t1" : "{a{b{i}{j{u}}}{c{d}{e{q{n}{m}}}}{f{w}}}", 36 | "t2" : "{a}", 37 | "d" : 12 38 | }, 39 | { 40 | "testID" : 7, 41 | "t1" : "{a{b{i}{j{u}}}{c{d}{e{q{n}{m}}}}{f{w}}}", 42 | "t2" : "{x}", 43 | "d" : 13 44 | }, 45 | { 46 | "testID" : 8, 47 | "t1" : "{a{b{i}{j{u}}}{c{d}{e}}{f{w}}}", 48 | "t2" : "{x}", 49 | "d" : 10 50 | }, 51 | { 52 | "testID" : 9, 53 | "t1" : "{a{b}{c{e}{f}}{d}}", 54 | "t2" : "{a{b}{c{e}{f}}{d}}", 55 | "d" : 0 56 | }, 57 | { 58 | "testID" : 10, 59 | "t1" : "{a{b{i}{j}}{c{d}{e}}{f{w}}}", 60 | "t2" : "{x}", 61 | "d" : 9 62 | }, 63 | { 64 | "testID" : 11, 65 | "t1" : "{a{b{i}}{c{d}{e}}{f{w}}}", 66 | "t2" : "{x}", 67 | "d" : 8 68 | }, 69 | { 70 | "testID" : 12, 71 | "t1" : "{a{b}{c{d}{e}}{f{w}}}", 72 | "t2" : "{x}", 73 | "d" : 7 74 | }, 75 | { 76 | "testID" : 13, 77 | "t1" : "{a{b}{c{d}{e}}{f}}", 78 | "t2" : "{x}", 79 | "d" : 6 80 | }, 81 | { 82 | "testID" : 14, 83 | "t1" : "{a{m}{r}{d}{e{z}{i}{l}{t{o}{k}{g}{h}}}}", 84 | "t2" : "{x}", 85 | "d" : 13 86 | }, 87 | { 88 | "testID" : 15, 89 | "t1" : "{a{m{z{o}{k}{g}{h}}{i}{l}{t}}{r}{d}{e}}", 90 | "t2" : "{x}", 91 | "d" : 13 92 | }, 93 | { 94 | "testID" : 16, 95 | "t1" : "{a{r}{d}{e{i}{l}{t{k}{g}{h}}}}", 96 | "t2" : "{x}", 97 | "d" : 10 98 | }, 99 | { 100 | "testID" : 17, 101 | "t1" : "{x}", 102 | "t2" : "{a{r}{d}{e{i}{l}{t{k}{g}{h}}}}", 103 | "d" : 10 104 | }, 105 | { 106 | "testID" : 18, 107 | "t1" : "{a{r}{d}{e{s}{t}}}", 108 | "t2" : "{x}", 109 | "d" : 6 110 | }, 111 | { 112 | "testID" : 19, 113 | "t1" : "{x}", 114 | "t2" : "{a{d}{e{l}{t{g}{h}}}}", 115 | "d" : 7 116 | }, 117 | { 118 | "testID" : 20, 119 | "t1" : "{a{d}{e{l}{t{g}{h}}}}", 120 | "t2" : "{x}", 121 | "d" : 7 122 | }, 123 | { 124 | "testID" : 21, 125 | "t1" : "{a{d}{e}{f}{l}{t}}", 126 | "t2" : "{f}", 127 | "d" : 5 128 | }, 129 | { 130 | "testID" : 22, 131 | "t1" : "{a{d}{e}{f}{l}{t}}", 132 | "t2" : "{a}", 133 | "d" : 5 134 | }, 135 | { 136 | "testID" : 23, 137 | "t1" : "{a{d}{e}{f}{l}{t}}", 138 | "t2" : "{x}", 139 | "d" : 6 140 | }, 141 | { 142 | "testID" : 24, 143 | "t1" : "{a{d}{e}{f}}", 144 | "t2" : "{x}", 145 | "d" : 4 146 | }, 147 | { 148 | "testID" : 25, 149 | "t1" : "{x}", 150 | "t2" : "{a{d}{e}{f}}", 151 | "d" : 4 152 | }, 153 | { 154 | "testID" : 26, 155 | "t1" : "{a{b{c}{d{e{f}{g}}{h}}}{i}}", 156 | "t2" : "{e{f}{g}}", 157 | "d" : 6 158 | }, 159 | { 160 | "testID" : 27, 161 | "t1" : "{a{b}{c{d}{e{f}{g{h}{i}}}}}", 162 | "t2" : "{g{h}{i}}", 163 | "d" : 6 164 | }, 165 | { 166 | "testID" : 28, 167 | "t1" : "{a{b{d{f{h}{i}}{g}}{e}}{c}}", 168 | "t2" : "{f{h}{i}}", 169 | "d" : 6 170 | }, 171 | { 172 | "testID" : 29, 173 | "t1" : "{a{b}{c{d{f}{g{h}{i}}}{e}}}", 174 | "t2" : "{g{h}{i}}", 175 | "d" : 6 176 | }, 177 | { 178 | "testID" : 30, 179 | "t1" : "{b{d}{e}}", 180 | "t2" : "{g{h}{i}}", 181 | "d" : 3 182 | }, 183 | { 184 | "testID" : 31, 185 | "t1" : "{a{b{d}{e}}{c}}", 186 | "t2" : "{f{g{h}{i}}{k}}", 187 | "d" : 5 188 | }, 189 | { 190 | "testID" : 32, 191 | "t1" : "{f{d{a}{c{b}}}{e}}", 192 | "t2" : "{f{c{d{a}{b}}{e}}}", 193 | "d" : 2 194 | }, 195 | { 196 | "testID" : 33, 197 | "t1" : "{f{d{a}{c{b}}}{e}}", 198 | "t2" : "{f{c{d{a}{b}}}{x}}", 199 | "d" : 3 200 | }, 201 | { 202 | "testID" : 34, 203 | "t1" : "{f{d{a}{c{b}}}{e}}", 204 | "t2" : "{f{c{d{a}{b}}}{e}}", 205 | "d" : 2 206 | }, 207 | { 208 | "testID" : 35, 209 | "t1" : "{a{a{a}{a}}}", 210 | "t2" : "{a{a{a}}}", 211 | "d" : 1 212 | }, 213 | { 214 | "testID" : 36, 215 | "t1" : "{a{b}{c{d}{e}}}", 216 | "t2" : "{a{b{c}}{d}{e}}", 217 | "d" : 2 218 | }, 219 | { 220 | "testID" : 37, 221 | "t1" : "{a{b{c}{d{e}{f}}}{x}}", 222 | "t2" : "{b{c}{d{e}{f}}}", 223 | "d" : 2 224 | }, 225 | { 226 | "testID" : 38, 227 | "t1" : "{a{b{c}{d{e}{f}}}}", 228 | "t2" : "{b{c}{d{e}{f}}}", 229 | "d" : 1 230 | }, 231 | { 232 | "testID" : 39, 233 | "t1" : "{a{b{c}{d{e}{f}}}}", 234 | "t2" : "{a{c}{e}{f}}", 235 | "d" : 2 236 | }, 237 | { 238 | "testID" : 40, 239 | "t1" : "{a{b{c}{d}}}", 240 | "t2" : "{a{c}{d}}", 241 | "d" : 1 242 | }, 243 | { 244 | "testID" : 41, 245 | "t1" : "{a{b{c}}{d}}", 246 | "t2" : "{b{c}{a{d}}}", 247 | "d" : 3 248 | }, 249 | { 250 | "testID" : 42, 251 | "t1" : "{a{b}{c}}", 252 | "t2" : "{b{c}}", 253 | "d" : 2 254 | }, 255 | { 256 | "testID" : 43, 257 | "t1" : "{a{b}{c}}", 258 | "t2" : "{b}", 259 | "d" : 2 260 | }, 261 | { 262 | "testID" : 44, 263 | "t1" : "{a{b}}", 264 | "t2" : "{b}", 265 | "d" : 1 266 | }, 267 | { 268 | "testID" : 45, 269 | "t1" : "{a{b}{c}}", 270 | "t2" : "{x}", 271 | "d" : 3 272 | }, 273 | { 274 | "testID" : 46, 275 | "t1" : "{a{b}{c}}", 276 | "t2" : "{a}", 277 | "d" : 2 278 | }, 279 | { 280 | "testID" : 47, 281 | "t1" : "{a{b}}", 282 | "t2" : "{x{z}}", 283 | "d" : 2 284 | }, 285 | { 286 | "testID" : 48, 287 | "t1" : "{a{b}}", 288 | "t2" : "{a{b}}", 289 | "d" : 0 290 | }, 291 | { 292 | "testID" : 49, 293 | "t1" : "{a{b}}", 294 | "t2" : "{x}", 295 | "d" : 2 296 | }, 297 | { 298 | "testID" : 50, 299 | "t1" : "{a{b}}", 300 | "t2" : "{a}", 301 | "d" : 1 302 | }, 303 | { 304 | "testID" : 51, 305 | "t1" : "{a}", 306 | "t2" : "{x}", 307 | "d" : 1 308 | }, 309 | { 310 | "testID" : 52, 311 | "t1" : "{a}", 312 | "t2" : "{a}", 313 | "d" : 0 314 | }, 315 | { 316 | "testID" : 53, 317 | "t1" : "{a}", 318 | "t2" : "{b}", 319 | "d" : 1 320 | }, 321 | { 322 | "testID" : 54, 323 | "t1" : "{a{b}}", 324 | "t2" : "{b}", 325 | "d" : 1 326 | }, 327 | { 328 | "testID" : 55, 329 | "t1" : "{a{b}{c}}", 330 | "t2" : "{b}", 331 | "d" : 2 332 | }, 333 | { 334 | "testID" : 56, 335 | "t1" : "{a{b}{c}}", 336 | "t2" : "{b{c}}", 337 | "d" : 2 338 | }, 339 | { 340 | "testID" : 57, 341 | "t1" : "{a{b{c}}{d}}", 342 | "t2" : "{b{c}{a{d}}}", 343 | "d" : 3 344 | }, 345 | { 346 | "testID" : 58, 347 | "t1" : "{a{b{c}{d}}}", 348 | "t2" : "{a{c}{d}}", 349 | "d" : 1 350 | }, 351 | { 352 | "testID" : 59, 353 | "t1" : "{a{b{c}{d{e}{f}}}}", 354 | "t2" : "{a{c}{e}{f}}", 355 | "d" : 2 356 | }, 357 | { 358 | "testID" : 60, 359 | "t1" : "{a{b{c}{d{e}{f}}}}", 360 | "t2" : "{b{c}{d{e}{f}}}", 361 | "d" : 1 362 | }, 363 | { 364 | "testID" : 61, 365 | "t1" : "{a{b{c}{d{e}{f}}}{x}}", 366 | "t2" : "{b{c}{d{e}{f}}}", 367 | "d" : 2 368 | }, 369 | { 370 | "testID" : 62, 371 | "t1" : "{a{b}{c{d}{e}}}", 372 | "t2" : "{a{b{c}}{d}{e}}", 373 | "d" : 2 374 | }, 375 | { 376 | "testID" : 63, 377 | "t1" : "{a{a{a}{a}}}", 378 | "t2" : "{a{a{a}}}", 379 | "d" : 1 380 | }, 381 | { 382 | "testID" : 65, 383 | "t1" : "{a{b{d}{e}}{c}}", 384 | "t2" : "{f{g{h}{i}}{k}}", 385 | "d" : 5 386 | }, 387 | { 388 | "testID" : 66, 389 | "t1" : "{a{b{c}{d{e}{f}}}}", 390 | "t2" : "{b{c}{e}{f}}", 391 | "d" : 2 392 | }, 393 | { 394 | "testID" : 67, 395 | "t1" : "{f{d{a}{c{b}}}{e}}", 396 | "t2" : "{f{c{d{a}{b}}}{e}}", 397 | "d" : 2 398 | }, 399 | { 400 | "testID" : 68, 401 | "t1" : "{f{d{a}{c{b}}}{e}}", 402 | "t2" : "{f{c{d{a}{b}}}{x}}", 403 | "d" : 3 404 | }, 405 | { 406 | "testID" : 69, 407 | "t1" : "{f{d{a}{c{b}}}{e}}", 408 | "t2" : "{f{c{d{a}{b}}{e}}}", 409 | "d" : 2 410 | }, 411 | { 412 | "testID" : 70, 413 | "t1" : "{2{6}{8{5{3{6}{3}{2}}}}{6}}", 414 | "t2" : "{1{2}{0}}", 415 | "d" : 8 416 | }, 417 | { 418 | "testID" : 72, 419 | "t1" : "{1{1}}", 420 | "t2" : "{c{a{b{4{b}{0{3}}{0{c}{9}}}}{1}{a}}{7}{d}}", 421 | "d" : 13 422 | }, 423 | { 424 | "testID" : 73, 425 | "t1" : "{1{2{2{5}{7{6{7}}}}}{0}}", 426 | "t2" : "{0{2}{1}{1}}", 427 | "d" : 8 428 | }, 429 | { 430 | "testID" : 74, 431 | "t1" : "{7{1{3{c{j{c}{j}}{2{d}}{8{5}{a}{3}}}}}{m{2{c{e}{4}{b{h}{f}{k}}}}}{d}}", 432 | "t2" : "{1{1}}", 433 | "d" : 22 434 | }, 435 | { 436 | "testID" : 75, 437 | "t1" : "{1{1}{2}}", 438 | "t2" : "{3{8{4{3}{9{0}{4}{7}}}}{7}{8}}", 439 | "d" : 10 440 | }, 441 | { 442 | "testID" : 77, 443 | "t1" : "{1{3}{5{2{6}{5}{5}}}{5}}", 444 | "t2" : "{0{3}{2}{1}}", 445 | "d" : 6 446 | }, 447 | { 448 | "testID" : 78, 449 | "t1" : "{3{8{n{q}{3{i{r}}}}{p{n{4}{n{s}{l}}}}}{n{3{e{h}{g}{m{j}{6}}}}{a}{r}}{2{p{j{n{f}}}}{n}}}", 450 | "t2" : "{0{1}}", 451 | "d" : 29 452 | } 453 | ] 454 | -------------------------------------------------------------------------------- /src/test/resources/correctness_test_cases.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "testID" : 1, 4 | "t1" : "{f{a}{e{c{b}}{d}}}", 5 | "t2" : "{f{a}{c{e{b}{d}}}}", 6 | "d" : 2 7 | }, 8 | { 9 | "testID" : 2, 10 | "t1" : "{a{b}{c{e}{f}}{d}}", 11 | "t2" : "{a{b}{c{e}}{d}}", 12 | "d" : 1 13 | }, 14 | 15 | { 16 | "testID" : 3, 17 | "t1" : "{a{b}{c{e}{f}}{d}}", 18 | "t2" : "{a{b}{c}{d}}", 19 | "d" : 2 20 | }, 21 | { 22 | "testID" : 4, 23 | "t1" : "{a{x}{c{e}{f}}{d}}", 24 | "t2" : "{a{b}{c}{d}}", 25 | "d" : 3 26 | }, 27 | { 28 | "testID" : 5, 29 | "t1" : "{a{b}{c{e}{f}}{d}}", 30 | "t2" : "{a{b}{c}}", 31 | "d" : 3 32 | }, 33 | { 34 | "testID" : 6, 35 | "t1" : "{a{b{i}{j{u}}}{c{d}{e{q{n}{m}}}}{f{w}}}", 36 | "t2" : "{a}", 37 | "d" : 12 38 | }, 39 | { 40 | "testID" : 7, 41 | "t1" : "{a{b{i}{j{u}}}{c{d}{e{q{n}{m}}}}{f{w}}}", 42 | "t2" : "{x}", 43 | "d" : 13 44 | }, 45 | { 46 | "testID" : 8, 47 | "t1" : "{a{b{i}{j{u}}}{c{d}{e}}{f{w}}}", 48 | "t2" : "{x}", 49 | "d" : 10 50 | }, 51 | { 52 | "testID" : 9, 53 | "t1" : "{a{b}{c{e}{f}}{d}}", 54 | "t2" : "{a{b}{c{e}{f}}{d}}", 55 | "d" : 0 56 | }, 57 | { 58 | "testID" : 10, 59 | "t1" : "{a{b{i}{j}}{c{d}{e}}{f{w}}}", 60 | "t2" : "{x}", 61 | "d" : 9 62 | }, 63 | { 64 | "testID" : 11, 65 | "t1" : "{a{b{i}}{c{d}{e}}{f{w}}}", 66 | "t2" : "{x}", 67 | "d" : 8 68 | }, 69 | { 70 | "testID" : 12, 71 | "t1" : "{a{b}{c{d}{e}}{f{w}}}", 72 | "t2" : "{x}", 73 | "d" : 7 74 | }, 75 | { 76 | "testID" : 13, 77 | "t1" : "{a{b}{c{d}{e}}{f}}", 78 | "t2" : "{x}", 79 | "d" : 6 80 | }, 81 | { 82 | "testID" : 14, 83 | "t1" : "{a{m}{r}{d}{e{z}{i}{l}{t{o}{k}{g}{h}}}}", 84 | "t2" : "{x}", 85 | "d" : 13 86 | }, 87 | { 88 | "testID" : 15, 89 | "t1" : "{a{m{z{o}{k}{g}{h}}{i}{l}{t}}{r}{d}{e}}", 90 | "t2" : "{x}", 91 | "d" : 13 92 | }, 93 | { 94 | "testID" : 16, 95 | "t1" : "{a{r}{d}{e{i}{l}{t{k}{g}{h}}}}", 96 | "t2" : "{x}", 97 | "d" : 10 98 | }, 99 | { 100 | "testID" : 17, 101 | "t1" : "{x}", 102 | "t2" : "{a{r}{d}{e{i}{l}{t{k}{g}{h}}}}", 103 | "d" : 10 104 | }, 105 | { 106 | "testID" : 18, 107 | "t1" : "{a{r}{d}{e{s}{t}}}", 108 | "t2" : "{x}", 109 | "d" : 6 110 | }, 111 | { 112 | "testID" : 19, 113 | "t1" : "{x}", 114 | "t2" : "{a{d}{e{l}{t{g}{h}}}}", 115 | "d" : 7 116 | }, 117 | { 118 | "testID" : 20, 119 | "t1" : "{a{d}{e{l}{t{g}{h}}}}", 120 | "t2" : "{x}", 121 | "d" : 7 122 | }, 123 | { 124 | "testID" : 21, 125 | "t1" : "{a{d}{e}{f}{l}{t}}", 126 | "t2" : "{f}", 127 | "d" : 5 128 | }, 129 | { 130 | "testID" : 22, 131 | "t1" : "{a{d}{e}{f}{l}{t}}", 132 | "t2" : "{a}", 133 | "d" : 5 134 | }, 135 | { 136 | "testID" : 23, 137 | "t1" : "{a{d}{e}{f}{l}{t}}", 138 | "t2" : "{x}", 139 | "d" : 6 140 | }, 141 | { 142 | "testID" : 24, 143 | "t1" : "{a{d}{e}{f}}", 144 | "t2" : "{x}", 145 | "d" : 4 146 | }, 147 | { 148 | "testID" : 25, 149 | "t1" : "{x}", 150 | "t2" : "{a{d}{e}{f}}", 151 | "d" : 4 152 | }, 153 | { 154 | "testID" : 26, 155 | "t1" : "{a{b{c}{d{e{f}{g}}{h}}}{i}}", 156 | "t2" : "{e{f}{g}}", 157 | "d" : 6 158 | }, 159 | { 160 | "testID" : 27, 161 | "t1" : "{a{b}{c{d}{e{f}{g{h}{i}}}}}", 162 | "t2" : "{g{h}{i}}", 163 | "d" : 6 164 | }, 165 | { 166 | "testID" : 28, 167 | "t1" : "{a{b{d{f{h}{i}}{g}}{e}}{c}}", 168 | "t2" : "{f{h}{i}}", 169 | "d" : 6 170 | }, 171 | { 172 | "testID" : 29, 173 | "t1" : "{a{b}{c{d{f}{g{h}{i}}}{e}}}", 174 | "t2" : "{g{h}{i}}", 175 | "d" : 6 176 | }, 177 | { 178 | "testID" : 30, 179 | "t1" : "{b{d}{e}}", 180 | "t2" : "{g{h}{i}}", 181 | "d" : 3 182 | }, 183 | { 184 | "testID" : 31, 185 | "t1" : "{a{b{d}{e}}{c}}", 186 | "t2" : "{f{g{h}{i}}{k}}", 187 | "d" : 5 188 | }, 189 | { 190 | "testID" : 32, 191 | "t1" : "{f{d{a}{c{b}}}{e}}", 192 | "t2" : "{f{c{d{a}{b}}{e}}}", 193 | "d" : 2 194 | }, 195 | { 196 | "testID" : 33, 197 | "t1" : "{f{d{a}{c{b}}}{e}}", 198 | "t2" : "{f{c{d{a}{b}}}{x}}", 199 | "d" : 3 200 | }, 201 | { 202 | "testID" : 34, 203 | "t1" : "{f{d{a}{c{b}}}{e}}", 204 | "t2" : "{f{c{d{a}{b}}}{e}}", 205 | "d" : 2 206 | }, 207 | { 208 | "testID" : 35, 209 | "t1" : "{a{a{a}{a}}}", 210 | "t2" : "{a{a{a}}}", 211 | "d" : 1 212 | }, 213 | { 214 | "testID" : 36, 215 | "t1" : "{a{b}{c{d}{e}}}", 216 | "t2" : "{a{b{c}}{d}{e}}", 217 | "d" : 2 218 | }, 219 | { 220 | "testID" : 37, 221 | "t1" : "{a{b{c}{d{e}{f}}}{x}}", 222 | "t2" : "{b{c}{d{e}{f}}}", 223 | "d" : 2 224 | }, 225 | { 226 | "testID" : 38, 227 | "t1" : "{a{b{c}{d{e}{f}}}}", 228 | "t2" : "{b{c}{d{e}{f}}}", 229 | "d" : 1 230 | }, 231 | { 232 | "testID" : 39, 233 | "t1" : "{a{b{c}{d{e}{f}}}}", 234 | "t2" : "{a{c}{e}{f}}", 235 | "d" : 2 236 | }, 237 | { 238 | "testID" : 40, 239 | "t1" : "{a{b{c}{d}}}", 240 | "t2" : "{a{c}{d}}", 241 | "d" : 1 242 | }, 243 | { 244 | "testID" : 41, 245 | "t1" : "{a{b{c}}{d}}", 246 | "t2" : "{b{c}{a{d}}}", 247 | "d" : 3 248 | }, 249 | { 250 | "testID" : 42, 251 | "t1" : "{a{b}{c}}", 252 | "t2" : "{b{c}}", 253 | "d" : 2 254 | }, 255 | { 256 | "testID" : 43, 257 | "t1" : "{a{b}{c}}", 258 | "t2" : "{b}", 259 | "d" : 2 260 | }, 261 | { 262 | "testID" : 44, 263 | "t1" : "{a{b}}", 264 | "t2" : "{b}", 265 | "d" : 1 266 | }, 267 | { 268 | "testID" : 45, 269 | "t1" : "{a{b}{c}}", 270 | "t2" : "{x}", 271 | "d" : 3 272 | }, 273 | { 274 | "testID" : 46, 275 | "t1" : "{a{b}{c}}", 276 | "t2" : "{a}", 277 | "d" : 2 278 | }, 279 | { 280 | "testID" : 47, 281 | "t1" : "{a{b}}", 282 | "t2" : "{x{z}}", 283 | "d" : 2 284 | }, 285 | { 286 | "testID" : 48, 287 | "t1" : "{a{b}}", 288 | "t2" : "{a{b}}", 289 | "d" : 0 290 | }, 291 | { 292 | "testID" : 49, 293 | "t1" : "{a{b}}", 294 | "t2" : "{x}", 295 | "d" : 2 296 | }, 297 | { 298 | "testID" : 50, 299 | "t1" : "{a{b}}", 300 | "t2" : "{a}", 301 | "d" : 1 302 | }, 303 | { 304 | "testID" : 51, 305 | "t1" : "{a}", 306 | "t2" : "{x}", 307 | "d" : 1 308 | }, 309 | { 310 | "testID" : 52, 311 | "t1" : "{a}", 312 | "t2" : "{a}", 313 | "d" : 0 314 | }, 315 | { 316 | "testID" : 53, 317 | "t1" : "{a}", 318 | "t2" : "{b}", 319 | "d" : 1 320 | }, 321 | { 322 | "testID" : 54, 323 | "t1" : "{a{b}}", 324 | "t2" : "{b}", 325 | "d" : 1 326 | }, 327 | { 328 | "testID" : 55, 329 | "t1" : "{a{b}{c}}", 330 | "t2" : "{b}", 331 | "d" : 2 332 | }, 333 | { 334 | "testID" : 56, 335 | "t1" : "{a{b}{c}}", 336 | "t2" : "{b{c}}", 337 | "d" : 2 338 | }, 339 | { 340 | "testID" : 57, 341 | "t1" : "{a{b{c}}{d}}", 342 | "t2" : "{b{c}{a{d}}}", 343 | "d" : 3 344 | }, 345 | { 346 | "testID" : 58, 347 | "t1" : "{a{b{c}{d}}}", 348 | "t2" : "{a{c}{d}}", 349 | "d" : 1 350 | }, 351 | { 352 | "testID" : 59, 353 | "t1" : "{a{b{c}{d{e}{f}}}}", 354 | "t2" : "{a{c}{e}{f}}", 355 | "d" : 2 356 | }, 357 | { 358 | "testID" : 60, 359 | "t1" : "{a{b{c}{d{e}{f}}}}", 360 | "t2" : "{b{c}{d{e}{f}}}", 361 | "d" : 1 362 | }, 363 | { 364 | "testID" : 61, 365 | "t1" : "{a{b{c}{d{e}{f}}}{x}}", 366 | "t2" : "{b{c}{d{e}{f}}}", 367 | "d" : 2 368 | }, 369 | { 370 | "testID" : 62, 371 | "t1" : "{a{b}{c{d}{e}}}", 372 | "t2" : "{a{b{c}}{d}{e}}", 373 | "d" : 2 374 | }, 375 | { 376 | "testID" : 63, 377 | "t1" : "{a{a{a}{a}}}", 378 | "t2" : "{a{a{a}}}", 379 | "d" : 1 380 | }, 381 | { 382 | "testID" : 64, 383 | "t1" : "{a{a{a}{a{a}{a}}}{a{a}{a{a}}{a}}}", 384 | "t2" : "{a{a{a}{a}{a}}{a{a{a}{a}{a}}}}", 385 | "d" : 3 386 | }, 387 | { 388 | "testID" : 65, 389 | "t1" : "{a{b{d}{e}}{c}}", 390 | "t2" : "{f{g{h}{i}}{k}}", 391 | "d" : 5 392 | }, 393 | { 394 | "testID" : 66, 395 | "t1" : "{a{b{c}{d{e}{f}}}}", 396 | "t2" : "{b{c}{e}{f}}", 397 | "d" : 2 398 | }, 399 | { 400 | "testID" : 67, 401 | "t1" : "{f{d{a}{c{b}}}{e}}", 402 | "t2" : "{f{c{d{a}{b}}}{e}}", 403 | "d" : 2 404 | }, 405 | { 406 | "testID" : 68, 407 | "t1" : "{f{d{a}{c{b}}}{e}}", 408 | "t2" : "{f{c{d{a}{b}}}{x}}", 409 | "d" : 3 410 | }, 411 | { 412 | "testID" : 69, 413 | "t1" : "{f{d{a}{c{b}}}{e}}", 414 | "t2" : "{f{c{d{a}{b}}{e}}}", 415 | "d" : 2 416 | }, 417 | { 418 | "testID" : 70, 419 | "t1" : "{2{6}{8{5{3{6}{3}{2}}}}{6}}", 420 | "t2" : "{1{2}{0}}", 421 | "d" : 8 422 | }, 423 | { 424 | "testID" : 71, 425 | "t1" : "{8{9{8{5}{9{6}{9}{7}}{0{6}}}{0}}}", 426 | "t2" : "{1{8}{4{4}{0{3{3{3}{0}}}}}}", 427 | "d" : 14 428 | }, 429 | { 430 | "testID" : 72, 431 | "t1" : "{1{1}}", 432 | "t2" : "{c{a{b{4{b}{0{3}}{0{c}{9}}}}{1}{a}}{7}{d}}", 433 | "d" : 13 434 | }, 435 | { 436 | "testID" : 73, 437 | "t1" : "{1{2{2{5}{7{6{7}}}}}{0}}", 438 | "t2" : "{0{2}{1}{1}}", 439 | "d" : 8 440 | }, 441 | { 442 | "testID" : 74, 443 | "t1" : "{7{1{3{c{j{c}{j}}{2{d}}{8{5}{a}{3}}}}}{m{2{c{e}{4}{b{h}{f}{k}}}}}{d}}", 444 | "t2" : "{1{1}}", 445 | "d" : 22 446 | }, 447 | { 448 | "testID" : 75, 449 | "t1" : "{1{1}{2}}", 450 | "t2" : "{3{8{4{3}{9{0}{4}{7}}}}{7}{8}}", 451 | "d" : 10 452 | }, 453 | { 454 | "testID" : 77, 455 | "t1" : "{1{3}{5{2{6}{5}{5}}}{5}}", 456 | "t2" : "{0{3}{2}{1}}", 457 | "d" : 6 458 | }, 459 | { 460 | "testID" : 78, 461 | "t1" : "{3{8{n{q}{3{i{r}}}}{p{n{4}{n{s}{l}}}}}{n{3{e{h}{g}{m{j}{6}}}}{a}{r}}{2{p{j{n{f}}}}{n}}}", 462 | "t2" : "{0{1}}", 463 | "d" : 29 464 | } 465 | ] 466 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/util/CommandLine.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.util; 25 | 26 | import java.io.BufferedReader; 27 | import java.io.FileReader; 28 | import java.util.Date; 29 | import java.util.List; 30 | import at.unisalzburg.dbresearch.apted.distance.APTED; 31 | import at.unisalzburg.dbresearch.apted.node.Node; 32 | import at.unisalzburg.dbresearch.apted.node.StringNodeData; 33 | import at.unisalzburg.dbresearch.apted.costmodel.CostModel; 34 | import at.unisalzburg.dbresearch.apted.costmodel.StringUnitCostModel; 35 | import at.unisalzburg.dbresearch.apted.parser.InputParser; 36 | import at.unisalzburg.dbresearch.apted.parser.BracketStringInputParser; 37 | 38 | /** 39 | * This is the command line interface for executing APTED algorithm. 40 | * 41 | * @param type of cost model. 42 | * @param

type of input parser. 43 | * @see CostModel 44 | * @see InputParser 45 | */ 46 | public class CommandLine { 47 | 48 | private String helpMessage = 49 | "\n" + 50 | "Compute the edit distance between two trees.\n" + 51 | "\n" + 52 | "SYNTAX\n" + 53 | "\n" + 54 | " java -jar APTED.jar {-t TREE1 TREE2 | -f FILE1 FILE2} [-m] [-v]\n" + 55 | "\n" + 56 | " java -jar APTED.jar -h\n" + 57 | "\n" + 58 | "DESCRIPTION\n" + 59 | "\n" + 60 | " Compute the edit distance between two trees with APTED algorithm [1,2].\n" + 61 | " APTED supersedes our RTED algorithm [3].\n" + 62 | " By default unit cost model is supported where each edit operation\n" + 63 | " has cost 1 (in case of equal labels the cost is 0).\n" + 64 | "\n" + 65 | " For implementing other cost models see the details on github website\n" + 66 | " (https://github.com/DatabaseGroup/apted).\n" + 67 | "\n" + 68 | "LICENCE\n" + 69 | "\n" + 70 | " The source code of this program is published under the MIT licence and\n" + 71 | " can be found on github (https://github.com/DatabaseGroup/apted).\n" + 72 | "\n" + 73 | "OPTIONS\n" + 74 | "\n" + 75 | " -h, --help \n" + 76 | " print this help message.\n" + 77 | "\n" + 78 | " -t TREE1 TREE2,\n" + 79 | " --trees TREE1 TREE2\n" + 80 | " compute the tree edit distance between TREE1 and TREE2. The\n" + 81 | " trees are encoded in the bracket notation, for example, in tree\n" + 82 | " {A{B{X}{Y}{F}}{C}} the root node has label A and two children\n" + 83 | " with labels B and C. B has three children with labels X, Y, F.\n" + 84 | "\n" + 85 | " -f FILE1 FILE2, \n" + 86 | " --files FILE1 FILE2\n" + 87 | " compute the tree edit distance between the two trees stored in\n" + 88 | " the files FILE1 and FILE2. The trees are encoded in bracket\n" + 89 | " notation.\n" + 90 | // "\n" + 91 | // " -c CD CI CR, \n" + 92 | // " --costs CD CI CR\n" + 93 | // " set custom cost for edit operations. Default is -c 1 1 1.\n" + 94 | // " CD - cost of node deletion\n" + 95 | // " CI - cost of node insertion\n" + 96 | // " CR - cost of node renaming\n" + 97 | "\n" + 98 | " -v, --verbose\n" + 99 | " print verbose output, including tree edit distance, runtime,\n" + 100 | " number of relevant subproblems and strategy statistics.\n" + 101 | "\n" + 102 | " -m, --mapping\n" + 103 | " compute the minimal edit mapping between two trees. There might\n" + 104 | " be multiple minimal edit mappings. This option computes only one\n" + 105 | " of them. The first line of the output is the cost of the mapping.\n" + 106 | " The following lines represent the edit operations. n and m are\n" + 107 | " postorder IDs (beginning with 1) of nodes in the left-hand and\n" + 108 | " the right-hand trees respectively.\n" + 109 | " n->m - rename node n to m\n" + 110 | " n->0 - delete node n\n" + 111 | " 0->m - insert node m\n" + 112 | "EXAMPLES\n" + 113 | "\n" + 114 | " java -jar APTED.jar -t {a{b}{c}} {a{b{d}}}\n" +// -c 1 1 0.5\n" + 115 | " java -jar APTED.jar -f 1.tree 2.tree\n" + 116 | " java -jar APTED.jar -t {a{b}{c}} {a{b{d}}} -m -v\n" + 117 | "\n" + 118 | "REFERENCES\n" + 119 | "\n" + 120 | " [1] M. Pawlik and N. Augsten. Efficient Computation of the Tree Edit\n" + 121 | " Distance. ACM Transactions on Database Systems (TODS) 40(1). 2015.\n" + 122 | " [2] M. Pawlik and N. Augsten. Tree edit distance: Robust and memory-\n" + 123 | " efficient. Information Systems 56. 2016.\n" + 124 | " [3] M. Pawlik and N. Augsten. RTED: A Robust Algorithm for the Tree Edit\n" + 125 | " Distance. PVLDB 5(4). 2011.\n" + 126 | "\n" + 127 | "AUTHORS\n" + 128 | "\n" + 129 | " Mateusz Pawlik, Nikolaus Augsten"; 130 | 131 | // TODO: Review if all fields are necessary. 132 | private String wrongArgumentsMessage = "Wrong arguments. Try \"java -jar RTED.jar --help\" for help."; 133 | 134 | private boolean run, custom, array, strategy, ifSwitch, sota, verbose, demaine, mapping; 135 | private int sotaStrategy; 136 | private String customStrategy, customStrategyArrayFile; 137 | private APTED rted; 138 | private double ted; 139 | 140 | private C costModel; 141 | private P inputParser; 142 | private Node t1; 143 | private Node t2; 144 | 145 | /** 146 | * Constructs the command line. Initialises the cost model and input parser 147 | * of specific types. 148 | * 149 | * @param costModel instance of a specific cost model. 150 | * @param inputParser instance of a specific inputParser. 151 | * @see CostModel 152 | * @see InputParser 153 | */ 154 | public CommandLine(C costModel, P inputParser) { 155 | this.costModel = costModel; 156 | this.inputParser = inputParser; 157 | } 158 | 159 | /** 160 | * Main method, invoced when executing the jar file. 161 | * 162 | * @param args array of command line arguments passed when executing jar file. 163 | */ 164 | public static void main(String[] args) { 165 | CommandLine rtedCL = new CommandLine<>(new StringUnitCostModel(), new BracketStringInputParser()); 166 | rtedCL.runCommandLine(args); 167 | } 168 | 169 | /** 170 | * Run the command line with given arguments. 171 | * 172 | * @param args array of command line arguments passed when executing jar file. 173 | */ 174 | public void runCommandLine(String[] args) { 175 | rted = new APTED(costModel); 176 | try { 177 | for (int i = 0; i < args.length; i++) { 178 | if (args[i].equals("--help") || args[i].equals("-h")) { 179 | System.out.println(helpMessage); 180 | System.exit(0); 181 | } else if (args[i].equals("-t") || args[i].equals("--trees")) { 182 | parseTreesFromCommandLine(args[i+1], args[i+2]); 183 | i = i+2; 184 | run = true; 185 | } else if (args[i].equals("-f") || args[i].equals("--files")) { 186 | parseTreesFromFiles(args[i+1], args[i+2]); 187 | i = i+2; 188 | run = true; 189 | // TODO: -f option temporarily disabled for refactoring. 190 | // } else if (args[i].equals("-c") || args[i].equals("--costs")) { 191 | // setCosts(args[i+1], args[i+2], args[i+3]); 192 | // i = i+3; 193 | } else if (args[i].equals("-v") || args[i].equals("--verbose")) { 194 | verbose = true; 195 | } else if (args[i].equals("-m") || args[i].equals("--mapping")) { 196 | mapping = true; 197 | } else { 198 | System.out.println(wrongArgumentsMessage); 199 | System.exit(0); 200 | } 201 | } 202 | } catch (ArrayIndexOutOfBoundsException e) { 203 | System.out.println("Too few arguments."); 204 | System.exit(0); 205 | } 206 | 207 | if (!run) { 208 | System.out.println(wrongArgumentsMessage); 209 | System.exit(0); 210 | } 211 | 212 | long time1 = (new Date()).getTime(); 213 | 214 | ted = rted.computeEditDistance(t1, t2); 215 | 216 | long time2 = (new Date()).getTime(); 217 | if (verbose) { 218 | System.out.println("distance: " + ted); 219 | System.out.println("runtime: " + ((time2 - time1) / 1000.0)); 220 | } else { 221 | System.out.println(ted); 222 | } 223 | 224 | if (mapping) { // TED is computed anyways. 225 | List editMapping = rted.computeEditMapping(); 226 | for (int[] nodeAlignment : editMapping) { 227 | System.out.println(nodeAlignment[0] + "->" + nodeAlignment[1]); 228 | } 229 | } 230 | 231 | } 232 | 233 | /** 234 | * Parse two input trees from the command line and convert them to tree 235 | * representation using {@link Node} class. 236 | * 237 | * @param ts1 source input tree as string. 238 | * @param ts2 destination input tree as string. 239 | * @see Node 240 | */ 241 | private void parseTreesFromCommandLine(String ts1, String ts2) { 242 | try { 243 | t1 = inputParser.fromString(ts1); 244 | } catch (Exception e) { 245 | System.out.println("TREE1 argument has wrong format"); 246 | System.exit(0); 247 | } 248 | try { 249 | t2 = inputParser.fromString(ts2); 250 | } catch (Exception e) { 251 | System.out.println("TREE2 argument has wrong format"); 252 | System.exit(0); 253 | } 254 | } 255 | 256 | 257 | /** 258 | * Parses two input trees from given files and convert them to tree 259 | * representation using {@link Node} class. 260 | * 261 | * @param fs1 path to file with source tree. 262 | * @param fs2 path to file with destination tree. 263 | * @see Node 264 | */ 265 | private void parseTreesFromFiles(String fs1, String fs2) { 266 | try { 267 | t1 = inputParser.fromString((new BufferedReader(new FileReader(fs1))).readLine()); 268 | } catch (Exception e) { 269 | System.out.println("TREE1 argument has wrong format"); 270 | System.exit(0); 271 | } 272 | try { 273 | t2 = inputParser.fromString((new BufferedReader(new FileReader(fs2))).readLine()); 274 | } catch (Exception e) { 275 | System.out.println("TREE2 argument has wrong format"); 276 | System.exit(0); 277 | } 278 | } 279 | 280 | // TODO: Bring the functionalitites below back to life. 281 | 282 | // /** 283 | // * Set custom costs for the edit operations. 284 | // * 285 | // * @deprecated 286 | // * @param cds cost of deletion. 287 | // * @param cis cost of insertion. 288 | // * @param cms cost of rename (mapping). 289 | // */ 290 | // private void setCosts(String cds, String cis, String cms) { 291 | // try { 292 | // rted.setCustomCosts(Float.parseFloat(cds), Float.parseFloat(cis), Float.parseFloat(cms)); 293 | // } catch (Exception e) { 294 | // System.out.println("One of the costs has wrong format."); 295 | // System.exit(0); 296 | // } 297 | // } 298 | 299 | } 300 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/util/FormatUtilities.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Nikolaus Augsten 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.util; 25 | 26 | import java.util.Date; 27 | import java.util.LinkedList; 28 | import java.util.Random; 29 | import java.util.List; 30 | import java.util.ArrayList; 31 | 32 | /** 33 | * Various formatting utilities. 34 | * 35 | * @author Nikolaus Augsten 36 | * 37 | */ 38 | public class FormatUtilities 39 | { 40 | 41 | public FormatUtilities() 42 | { 43 | } 44 | 45 | public static String getField(int fieldNr, String line, char seperator) 46 | { 47 | if(line != null) 48 | { 49 | int pos = 0; 50 | for(int i = 0; i < fieldNr; i++) 51 | { 52 | pos = line.indexOf(seperator, pos); 53 | if(pos == -1) 54 | return null; 55 | pos++; 56 | } 57 | 58 | int pos2 = line.indexOf(seperator, pos); 59 | String res; 60 | if(pos2 == -1) 61 | res = line.substring(pos); 62 | else 63 | res = line.substring(pos, pos2); 64 | return res.trim(); 65 | } else 66 | { 67 | return null; 68 | } 69 | } 70 | 71 | public static String[] getFields(String line, char separator) 72 | { 73 | if(line != null && !line.equals("")) 74 | { 75 | StringBuffer field = new StringBuffer(); 76 | LinkedList fieldArr = new LinkedList(); 77 | for(int i = 0; i < line.length(); i++) 78 | { 79 | char ch = line.charAt(i); 80 | if(ch == separator) 81 | { 82 | fieldArr.add(field.toString().trim()); 83 | field = new StringBuffer(); 84 | } else 85 | { 86 | field.append(ch); 87 | } 88 | } 89 | 90 | fieldArr.add(field.toString().trim()); 91 | return (String[])fieldArr.toArray(new String[fieldArr.size()]); 92 | } else 93 | { 94 | return new String[0]; 95 | } 96 | } 97 | 98 | public static String[] getFields(String line, char separator, char quote) 99 | { 100 | String parse[] = getFields(line, separator); 101 | for(int i = 0; i < parse.length; i++) 102 | parse[i] = stripQuotes(parse[i], quote); 103 | 104 | return parse; 105 | } 106 | 107 | public static String stripQuotes(String s, char quote) 108 | { 109 | if(s.length() >= 2 && s.charAt(0) == quote && s.charAt(s.length() - 1) == quote) 110 | return s.substring(1, s.length() - 1); 111 | else 112 | return s; 113 | } 114 | 115 | public static String resizeEnd(String s, int size) 116 | { 117 | return resizeEnd(s, size, ' '); 118 | } 119 | 120 | public static String getRandomString(int length) 121 | { 122 | Date d = new Date(); 123 | Random r = new Random(d.getTime()); 124 | String str = ""; 125 | for(int i = 0; i < length; i++) 126 | str = (new StringBuilder(String.valueOf(str))).append((char)(65 + r.nextInt(26))).toString(); 127 | 128 | return str; 129 | } 130 | 131 | public static String resizeEnd(String s, int size, char fillChar) 132 | { 133 | String res; 134 | try 135 | { 136 | res = s.substring(0, size); 137 | } 138 | catch(IndexOutOfBoundsException e) 139 | { 140 | res = s; 141 | for(int i = s.length(); i < size; i++) 142 | res = (new StringBuilder(String.valueOf(res))).append(fillChar).toString(); 143 | 144 | } 145 | return res; 146 | } 147 | 148 | public static String resizeFront(String s, int size) 149 | { 150 | return resizeFront(s, size, ' '); 151 | } 152 | 153 | public static String resizeFront(String s, int size, char fillChar) 154 | { 155 | String res; 156 | try 157 | { 158 | res = s.substring(0, size); 159 | } 160 | catch(IndexOutOfBoundsException e) 161 | { 162 | res = s; 163 | for(int i = s.length(); i < size; i++) 164 | res = (new StringBuilder(String.valueOf(fillChar))).append(res).toString(); 165 | 166 | } 167 | return res; 168 | } 169 | 170 | public static int matchingBracket(String s, int pos) 171 | { 172 | if(s == null || pos > s.length() - 1) 173 | return -1; 174 | char open = s.charAt(pos); 175 | char close; 176 | switch(open) 177 | { 178 | case 123: // '{' 179 | close = '}'; 180 | break; 181 | 182 | case 40: // '(' 183 | close = ')'; 184 | break; 185 | 186 | case 91: // '[' 187 | close = ']'; 188 | break; 189 | 190 | case 60: // '<' 191 | close = '>'; 192 | break; 193 | 194 | default: 195 | return -1; 196 | } 197 | pos++; 198 | int count; 199 | for(count = 1; count != 0 && pos < s.length(); pos++) 200 | if(s.charAt(pos) == open) 201 | count++; 202 | else 203 | if(s.charAt(pos) == close) 204 | count--; 205 | 206 | if(count != 0) 207 | return -1; 208 | else 209 | return pos - 1; 210 | } 211 | 212 | public static int getTreeID(String s) 213 | { 214 | if(s != null && s.length() > 0) 215 | { 216 | int end = s.indexOf(':', 1); 217 | if(end == -1) 218 | return -1; 219 | else 220 | return Integer.parseInt(s.substring(0, end)); 221 | } else 222 | { 223 | return -1; 224 | } 225 | } 226 | 227 | public static String getRoot(String s) 228 | { 229 | if(s != null && s.length() > 0 && s.startsWith("{") && s.endsWith("}")) 230 | { 231 | int end = s.indexOf('{', 1); 232 | if(end == -1) 233 | end = s.indexOf('}', 1); 234 | return s.substring(1, end); 235 | } else 236 | { 237 | return null; 238 | } 239 | } 240 | 241 | public static List getChildren(String s) 242 | { 243 | if(s != null && s.length() > 0 && s.startsWith("{") && s.endsWith("}")) 244 | { 245 | List children = new ArrayList<>(); 246 | int end = s.indexOf('{', 1); 247 | if(end == -1) 248 | return children; 249 | String rest = s.substring(end, s.length() - 1); 250 | for(int match = 0; rest.length() > 0 && (match = matchingBracket(rest, 0)) != -1;) 251 | { 252 | children.add(rest.substring(0, match + 1)); 253 | if(match + 1 < rest.length()) 254 | rest = rest.substring(match + 1); 255 | else 256 | rest = ""; 257 | } 258 | 259 | return children; 260 | } else 261 | { 262 | return null; 263 | } 264 | } 265 | 266 | public static String parseTree(String s, List children) 267 | { 268 | children.clear(); 269 | if(s != null && s.length() > 0 && s.startsWith("{") && s.endsWith("}")) 270 | { 271 | int end = s.indexOf('{', 1); 272 | if(end == -1) 273 | { 274 | end = s.indexOf('}', 1); 275 | return s.substring(1, end); 276 | } 277 | String root = s.substring(1, end); 278 | String rest = s.substring(end, s.length() - 1); 279 | for(int match = 0; rest.length() > 0 && (match = matchingBracket(rest, 0)) != -1;) 280 | { 281 | children.add(rest.substring(0, match + 1)); 282 | if(match + 1 < rest.length()) 283 | rest = rest.substring(match + 1); 284 | else 285 | rest = ""; 286 | } 287 | 288 | return root; 289 | } else 290 | { 291 | return null; 292 | } 293 | } 294 | 295 | public static String commaSeparatedList(String list[]) 296 | { 297 | StringBuffer s = new StringBuffer(); 298 | for(int i = 0; i < list.length; i++) 299 | { 300 | s.append(list[i]); 301 | if(i != list.length - 1) 302 | s.append(","); 303 | } 304 | 305 | return s.toString(); 306 | } 307 | 308 | public static String commaSeparatedList(String list[], char quote) 309 | { 310 | StringBuffer s = new StringBuffer(); 311 | for(int i = 0; i < list.length; i++) 312 | { 313 | s.append((new StringBuilder(String.valueOf(quote))).append(list[i]).append(quote).toString()); 314 | if(i != list.length - 1) 315 | s.append(","); 316 | } 317 | 318 | return s.toString(); 319 | } 320 | 321 | public static String spellOutNumber(String num) 322 | { 323 | StringBuffer sb = new StringBuffer(); 324 | for(int i = 0; i < num.length(); i++) 325 | { 326 | char ch = num.charAt(i); 327 | switch(ch) 328 | { 329 | case 48: // '0' 330 | sb.append("zero"); 331 | break; 332 | 333 | case 49: // '1' 334 | sb.append("one"); 335 | break; 336 | 337 | case 50: // '2' 338 | sb.append("two"); 339 | break; 340 | 341 | case 51: // '3' 342 | sb.append("three"); 343 | break; 344 | 345 | case 52: // '4' 346 | sb.append("four"); 347 | break; 348 | 349 | case 53: // '5' 350 | sb.append("five"); 351 | break; 352 | 353 | case 54: // '6' 354 | sb.append("six"); 355 | break; 356 | 357 | case 55: // '7' 358 | sb.append("seven"); 359 | break; 360 | 361 | case 56: // '8' 362 | sb.append("eight"); 363 | break; 364 | 365 | case 57: // '9' 366 | sb.append("nine"); 367 | break; 368 | 369 | default: 370 | sb.append(ch); 371 | break; 372 | } 373 | } 374 | 375 | return sb.toString(); 376 | } 377 | 378 | public static String substituteBlanks(String s, String subst) 379 | { 380 | StringBuffer sb = new StringBuffer(); 381 | for(int i = 0; i < s.length(); i++) 382 | if(s.charAt(i) != ' ') 383 | sb.append(s.charAt(i)); 384 | else 385 | sb.append(subst); 386 | 387 | return sb.toString(); 388 | } 389 | 390 | public static String escapeLatex(String s) 391 | { 392 | StringBuffer sb = new StringBuffer(); 393 | for(int i = 0; i < s.length(); i++) 394 | { 395 | String c = (new StringBuilder(String.valueOf(s.charAt(i)))).toString(); 396 | if(c.equals("#")) 397 | c = "\\#"; 398 | if(c.equals("&")) 399 | c = "\\&"; 400 | if(c.equals("$")) 401 | c = "\\$"; 402 | if(c.equals("_")) 403 | c = "\\_"; 404 | sb.append(c); 405 | } 406 | 407 | return sb.toString(); 408 | } 409 | } 410 | -------------------------------------------------------------------------------- /src/main/java/distance/AllPossibleMappingsTED.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package distance; 25 | 26 | import java.util.ArrayList; 27 | import java.util.Arrays; 28 | import java.util.Iterator; 29 | import node.Node; 30 | import node.NodeIndexer; 31 | import costmodel.CostModel; 32 | 33 | /** 34 | * Implements an exponential algorithm for the tree edit distance. It computes 35 | * all possible TED mappings between two trees and calculated their minimal 36 | * cost. 37 | * 38 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.distance.AllPossibleMappingsTED} 39 | * 40 | * @param C type of cost model. 41 | * @param D type of node data. 42 | */ 43 | @Deprecated public class AllPossibleMappingsTED { 44 | 45 | /** 46 | * Indexer of the source tree. 47 | * 48 | * @see node.NodeIndexer 49 | */ 50 | private NodeIndexer it1; 51 | 52 | /** 53 | * Indexer of the destination tree. 54 | * 55 | * @see node.NodeIndexer 56 | */ 57 | private NodeIndexer it2; 58 | 59 | /** 60 | * The size of the source input tree. 61 | */ 62 | private int size1; 63 | 64 | /** 65 | * The size of the destination tree. 66 | */ 67 | private int size2; 68 | 69 | /** 70 | * Cost model to be used for calculating costs of edit operations. 71 | */ 72 | private C costModel; 73 | 74 | /** 75 | * Constructs the AllPossibleMappingsTED algorithm with a specific cost model. 76 | * 77 | * @param costModel a cost model used in the algorithm. 78 | */ 79 | public AllPossibleMappingsTED(C costModel) { 80 | this.costModel = costModel; 81 | } 82 | 83 | /** 84 | * Computes the tree edit distance between two trees by trying all possible 85 | * TED mappings. It uses the specified cost model. 86 | * 87 | * @param t1 source tree. 88 | * @param t2 destination tree. 89 | * @return the tree edit distance between two trees. 90 | */ 91 | public float computeEditDistance(Node t1, Node t2) { 92 | // Index the nodes of both input trees. 93 | init(t1, t2); 94 | ArrayList> mappings = generateAllOneToOneMappings(); 95 | removeNonTEDMappings(mappings); 96 | return getMinCost(mappings); 97 | } 98 | 99 | /** 100 | * Indexes the input trees. 101 | * 102 | * @param t1 source tree. 103 | * @param t2 destination tree. 104 | */ 105 | public void init(Node t1, Node t2) { 106 | it1 = new NodeIndexer(t1, costModel); 107 | it2 = new NodeIndexer(t2, costModel); 108 | size1 = it1.getSize(); 109 | size2 = it2.getSize(); 110 | } 111 | 112 | /** 113 | * Generate all possible 1-1 mappings. 114 | * 115 | *

These mappings do not conform to TED conditions (sibling-order and 116 | * ancestor-descendant). 117 | * 118 | *

A mapping is a list of pairs (arrays) of preorder IDs (identifying 119 | * nodes). 120 | * 121 | * @return set of all 1-1 mappings. 122 | */ 123 | private ArrayList> generateAllOneToOneMappings() { 124 | // Start with an empty mapping - all nodes are deleted or inserted. 125 | ArrayList> mappings = new ArrayList>(1); 126 | mappings.add(new ArrayList(size1 + size2)); 127 | // Add all deleted nodes. 128 | for (int n1 = 0; n1 < size1; n1++) { 129 | mappings.get(0).add(new int[]{n1, -1}); 130 | } 131 | // Add all inserted nodes. 132 | for (int n2 = 0; n2 < size2; n2++) { 133 | mappings.get(0).add(new int[]{-1, n2}); 134 | } 135 | // For each node in the source tree. 136 | for (int n1 = 0; n1 < size1; n1++) { 137 | // Duplicate all mappings and store in mappings_copy. 138 | ArrayList> mappings_copy = deepMappingsCopy(mappings); 139 | // For each node in the destination tree. 140 | for (int n2 = 0; n2 < size2; n2++) { 141 | // For each mapping (produced for all n1 values smaller than 142 | // current n1). 143 | for (ArrayList m : mappings_copy) { 144 | // Produce new mappings with the pair (n1, n2) by adding this 145 | // pair to all mappings where it is valid to add. 146 | boolean element_add = true; 147 | // Verify if (n1, n2) can be added to mapping m. 148 | // All elements in m are checked with (n1, n2) for possible 149 | // violation. 150 | // One-to-one condition. 151 | for (int[] e : m) { 152 | // n1 is not in any of previous mappings 153 | if (e[0] != -1 && e[1] != -1 && e[1] == n2) { 154 | element_add = false; 155 | // System.out.println("Add " + n2 + " false."); 156 | break; 157 | } 158 | } 159 | // New mappings must be produced by duplicating a previous 160 | // mapping and extending it by (n1, n2). 161 | if (element_add) { 162 | ArrayList m_copy = deepMappingCopy(m); 163 | m_copy.add(new int[]{n1, n2}); 164 | // If a pair (n1,n2) is added, (n1,-1) and (-1,n2) must be removed. 165 | removeMappingElement(m_copy, new int[]{n1, -1}); 166 | removeMappingElement(m_copy, new int[]{-1, n2}); 167 | mappings.add(m_copy); 168 | } 169 | } 170 | } 171 | } 172 | return mappings; 173 | } 174 | 175 | /** 176 | * Given all 1-1 mappings, discard these that violate TED conditions 177 | * (ancestor-descendant and sibling order). 178 | * 179 | * @param mappings set of all 1-1 mappings. 180 | */ 181 | private void removeNonTEDMappings(ArrayList> mappings) { 182 | // Validate each mapping separately. 183 | // Iterator safely removes mappings while iterating. 184 | for (Iterator> mit = mappings.iterator(); mit.hasNext();) { 185 | ArrayList m = mit.next(); 186 | if (!isTEDMapping(m)) { 187 | mit.remove(); 188 | } 189 | } 190 | } 191 | 192 | /** 193 | * Test if a 1-1 mapping is a TED mapping. 194 | * 195 | * @param m a 1-1 mapping. 196 | * @return {@code true} if {@code m} is a TED mapping, and {@code false} 197 | * otherwise. 198 | */ 199 | boolean isTEDMapping(ArrayList m) { 200 | // Validate each pair of pairs of mapped nodes in the mapping. 201 | for (int[] e1 : m) { 202 | // Use only pairs of mapped nodes for validation. 203 | if (e1[0] == -1 || e1[1] == -1) { 204 | continue; 205 | } 206 | for (int[] e2 : m) { 207 | // Use only pairs of mapped nodes for validation. 208 | if (e2[0] == -1 || e2[1] == -1) { 209 | continue; 210 | } 211 | // If any of the conditions below doesn't hold, discard m. 212 | // Validate ancestor-descendant condition. 213 | boolean a = e1[0] < e2[0] && it1.preL_to_preR[e1[0]] < it1.preL_to_preR[e2[0]]; 214 | boolean b = e1[1] < e2[1] && it2.preL_to_preR[e1[1]] < it2.preL_to_preR[e2[1]]; 215 | if ((a && !b) || (!a && b)) { 216 | // Discard the mapping. 217 | // If this condition doesn't hold, the next condition 218 | // doesn't have to be verified any more and any other 219 | // pair (e1, e2) doesn't have to be verified any more. 220 | return false; 221 | } 222 | // Validate sibling-order condition. 223 | a = e1[0] < e2[0] && it1.preL_to_preR[e1[0]] > it1.preL_to_preR[e2[0]]; 224 | b = e1[1] < e2[1] && it2.preL_to_preR[e1[1]] > it2.preL_to_preR[e2[1]]; 225 | if ((a && !b) || (!a && b)) { 226 | // Discard the mapping. 227 | return false; 228 | } 229 | } 230 | } 231 | return true; 232 | } 233 | 234 | /** 235 | * Given list of all TED mappings, calculate the cost of the minimal-cost 236 | * mapping. 237 | * 238 | * @param tedMappings set of all TED mappings. 239 | * @return the minimal cost among all TED mappings. 240 | */ 241 | float getMinCost(ArrayList> tedMappings) { 242 | // Initialize min_cost to the upper bound. 243 | float min_cost = size1 + size2; 244 | // System.out.println("min_cost = " + min_cost); 245 | // Verify cost of each mapping. 246 | for (ArrayList m : tedMappings) { 247 | float m_cost = 0; 248 | // Sum up edit costs for all elements in the mapping m. 249 | for (int[] e : m) { 250 | // Add edit operation cost. 251 | if (e[0] > -1 && e[1] > -1) { 252 | m_cost += costModel.ren(it1.preL_to_node[e[0]], it2.preL_to_node[e[1]]); // USE COST MODEL - rename e[0] to e[1]. 253 | } else if (e[0] > -1) { 254 | m_cost += costModel.del(it1.preL_to_node[e[0]]); // USE COST MODEL - insert e[1]. 255 | } else { 256 | m_cost += costModel.ins(it2.preL_to_node[e[1]]); // USE COST MODEL - delete e[0]. 257 | } 258 | // Break as soon as the current min_cost is exceeded. 259 | // Only for early loop break. 260 | if (m_cost >= min_cost) { 261 | break; 262 | } 263 | } 264 | // Store the minimal cost - compare m_cost and min_cost 265 | if (m_cost < min_cost) { 266 | min_cost = m_cost; 267 | } 268 | // System.out.printf("min_cost = %.8f\n", min_cost); 269 | } 270 | return min_cost; 271 | } 272 | 273 | /** 274 | * Makes a deep copy of a mapping. 275 | * 276 | * @param mapping mapping to copy. 277 | * @return a mapping. 278 | */ 279 | private ArrayList deepMappingCopy(ArrayList mapping) { 280 | ArrayList mapping_copy = new ArrayList(mapping.size()); 281 | for (int[] me : mapping) { // for each mapping element in a mapping 282 | mapping_copy.add(Arrays.copyOf(me, me.length)); 283 | } 284 | return mapping_copy; 285 | } 286 | 287 | /** 288 | * Makes a deep copy of a set of mappings. 289 | * 290 | * @param mappings set of mappings to copy. 291 | * @return set of mappings. 292 | */ 293 | private ArrayList> deepMappingsCopy(ArrayList> mappings) { 294 | ArrayList> mappings_copy = new ArrayList>(mappings.size()); 295 | for (ArrayList m : mappings) { // for each mapping in mappings 296 | ArrayList m_copy = new ArrayList(m.size()); 297 | for (int[] me : m) { // for each mapping element in a mapping 298 | m_copy.add(Arrays.copyOf(me, me.length)); 299 | } 300 | mappings_copy.add(m_copy); 301 | } 302 | return mappings_copy; 303 | } 304 | 305 | /** 306 | * Constructs a string representation of a set of mappings. 307 | * 308 | * @param mappings set of mappings to convert. 309 | * @return string representation of a set of mappings. 310 | */ 311 | private String mappingsToString(ArrayList> mappings) { 312 | String result = "Mappings:\n"; 313 | for (ArrayList m : mappings) { 314 | result += "{"; 315 | for (int[] me : m) { 316 | result += "[" + me[0] + "," + me[1] + "]"; 317 | } 318 | result += "}\n"; 319 | } 320 | return result; 321 | } 322 | 323 | /** 324 | * Removes an element (edit operation) from a mapping by its value. In our 325 | * case the element to remove can be always found in the mapping. 326 | * 327 | * @param m an edit mapping. 328 | * @param e element to remove from {@code m}. 329 | * @return {@code true} if {@code e} has been removed, and {@code false} 330 | * otherwise. 331 | */ 332 | private boolean removeMappingElement(ArrayList m, int[] e) { 333 | for (int[] me : m) { 334 | if (me[0] == e[0] && me[1] == e[1]) { 335 | m.remove(me); 336 | return true; 337 | } 338 | } 339 | return false; 340 | } 341 | } 342 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/distance/AllPossibleMappingsTED.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.distance; 25 | 26 | import java.util.ArrayList; 27 | import java.util.Arrays; 28 | import java.util.Iterator; 29 | import at.unisalzburg.dbresearch.apted.node.Node; 30 | import at.unisalzburg.dbresearch.apted.node.NodeIndexer; 31 | import at.unisalzburg.dbresearch.apted.costmodel.CostModel; 32 | 33 | /** 34 | * Implements an exponential algorithm for the tree edit distance. It computes 35 | * all possible TED mappings between two trees and calculated their minimal 36 | * cost. 37 | * 38 | * @param type of cost model. 39 | * @param type of node data. 40 | */ 41 | public class AllPossibleMappingsTED { 42 | 43 | /** 44 | * Indexer of the source tree. 45 | * 46 | * @see node.NodeIndexer 47 | */ 48 | private NodeIndexer it1; 49 | 50 | /** 51 | * Indexer of the destination tree. 52 | * 53 | * @see node.NodeIndexer 54 | */ 55 | private NodeIndexer it2; 56 | 57 | /** 58 | * The size of the source input tree. 59 | */ 60 | private int size1; 61 | 62 | /** 63 | * The size of the destination tree. 64 | */ 65 | private int size2; 66 | 67 | /** 68 | * Cost model to be used for calculating costs of edit operations. 69 | */ 70 | private C costModel; 71 | 72 | /** 73 | * Constructs the AllPossibleMappingsTED algorithm with a specific cost model. 74 | * 75 | * @param costModel a cost model used in the algorithm. 76 | */ 77 | public AllPossibleMappingsTED(C costModel) { 78 | this.costModel = costModel; 79 | } 80 | 81 | /** 82 | * Computes the tree edit distance between two trees by trying all possible 83 | * TED mappings. It uses the specified cost model. 84 | * 85 | * @param t1 source tree. 86 | * @param t2 destination tree. 87 | * @return the tree edit distance between two trees. 88 | */ 89 | public float computeEditDistance(Node t1, Node t2) { 90 | // Index the nodes of both input trees. 91 | init(t1, t2); 92 | ArrayList> mappings = generateAllOneToOneMappings(); 93 | removeNonTEDMappings(mappings); 94 | return getMinCost(mappings); 95 | } 96 | 97 | /** 98 | * Indexes the input trees. 99 | * 100 | * @param t1 source tree. 101 | * @param t2 destination tree. 102 | */ 103 | public void init(Node t1, Node t2) { 104 | it1 = new NodeIndexer(t1, costModel); 105 | it2 = new NodeIndexer(t2, costModel); 106 | size1 = it1.getSize(); 107 | size2 = it2.getSize(); 108 | } 109 | 110 | /** 111 | * Generate all possible 1-1 mappings. 112 | * 113 | *

These mappings do not conform to TED conditions (sibling-order and 114 | * ancestor-descendant). 115 | * 116 | *

A mapping is a list of pairs (arrays) of preorder IDs (identifying 117 | * nodes). 118 | * 119 | * @return set of all 1-1 mappings. 120 | */ 121 | private ArrayList> generateAllOneToOneMappings() { 122 | // Start with an empty mapping - all nodes are deleted or inserted. 123 | ArrayList> mappings = new ArrayList>(1); 124 | mappings.add(new ArrayList(size1 + size2)); 125 | // Add all deleted nodes. 126 | for (int n1 = 0; n1 < size1; n1++) { 127 | mappings.get(0).add(new int[]{n1, -1}); 128 | } 129 | // Add all inserted nodes. 130 | for (int n2 = 0; n2 < size2; n2++) { 131 | mappings.get(0).add(new int[]{-1, n2}); 132 | } 133 | // For each node in the source tree. 134 | for (int n1 = 0; n1 < size1; n1++) { 135 | // Duplicate all mappings and store in mappings_copy. 136 | ArrayList> mappings_copy = deepMappingsCopy(mappings); 137 | // For each node in the destination tree. 138 | for (int n2 = 0; n2 < size2; n2++) { 139 | // For each mapping (produced for all n1 values smaller than 140 | // current n1). 141 | for (ArrayList m : mappings_copy) { 142 | // Produce new mappings with the pair (n1, n2) by adding this 143 | // pair to all mappings where it is valid to add. 144 | boolean element_add = true; 145 | // Verify if (n1, n2) can be added to mapping m. 146 | // All elements in m are checked with (n1, n2) for possible 147 | // violation. 148 | // One-to-one condition. 149 | for (int[] e : m) { 150 | // n1 is not in any of previous mappings 151 | if (e[0] != -1 && e[1] != -1 && e[1] == n2) { 152 | element_add = false; 153 | // System.out.println("Add " + n2 + " false."); 154 | break; 155 | } 156 | } 157 | // New mappings must be produced by duplicating a previous 158 | // mapping and extending it by (n1, n2). 159 | if (element_add) { 160 | ArrayList m_copy = deepMappingCopy(m); 161 | m_copy.add(new int[]{n1, n2}); 162 | // If a pair (n1,n2) is added, (n1,-1) and (-1,n2) must be removed. 163 | removeMappingElement(m_copy, new int[]{n1, -1}); 164 | removeMappingElement(m_copy, new int[]{-1, n2}); 165 | mappings.add(m_copy); 166 | } 167 | } 168 | } 169 | } 170 | return mappings; 171 | } 172 | 173 | /** 174 | * Given all 1-1 mappings, discard these that violate TED conditions 175 | * (ancestor-descendant and sibling order). 176 | * 177 | * @param mappings set of all 1-1 mappings. 178 | */ 179 | private void removeNonTEDMappings(ArrayList> mappings) { 180 | // Validate each mapping separately. 181 | // Iterator safely removes mappings while iterating. 182 | for (Iterator> mit = mappings.iterator(); mit.hasNext();) { 183 | ArrayList m = mit.next(); 184 | if (!isTEDMapping(m)) { 185 | mit.remove(); 186 | } 187 | } 188 | } 189 | 190 | /** 191 | * Test if a 1-1 mapping is a TED mapping. 192 | * 193 | * @param m a 1-1 mapping. 194 | * @return {@code true} if {@code m} is a TED mapping, and {@code false} 195 | * otherwise. 196 | */ 197 | boolean isTEDMapping(ArrayList m) { 198 | // Validate each pair of pairs of mapped nodes in the mapping. 199 | for (int[] e1 : m) { 200 | // Use only pairs of mapped nodes for validation. 201 | if (e1[0] == -1 || e1[1] == -1) { 202 | continue; 203 | } 204 | for (int[] e2 : m) { 205 | // Use only pairs of mapped nodes for validation. 206 | if (e2[0] == -1 || e2[1] == -1) { 207 | continue; 208 | } 209 | // If any of the conditions below doesn't hold, discard m. 210 | // Validate ancestor-descendant condition. 211 | boolean a = e1[0] < e2[0] && it1.preL_to_preR[e1[0]] < it1.preL_to_preR[e2[0]]; 212 | boolean b = e1[1] < e2[1] && it2.preL_to_preR[e1[1]] < it2.preL_to_preR[e2[1]]; 213 | if ((a && !b) || (!a && b)) { 214 | // Discard the mapping. 215 | // If this condition doesn't hold, the next condition 216 | // doesn't have to be verified any more and any other 217 | // pair (e1, e2) doesn't have to be verified any more. 218 | return false; 219 | } 220 | // Validate sibling-order condition. 221 | a = e1[0] < e2[0] && it1.preL_to_preR[e1[0]] > it1.preL_to_preR[e2[0]]; 222 | b = e1[1] < e2[1] && it2.preL_to_preR[e1[1]] > it2.preL_to_preR[e2[1]]; 223 | if ((a && !b) || (!a && b)) { 224 | // Discard the mapping. 225 | return false; 226 | } 227 | } 228 | } 229 | return true; 230 | } 231 | 232 | /** 233 | * Given list of all TED mappings, calculate the cost of the minimal-cost 234 | * mapping. 235 | * 236 | * @param tedMappings set of all TED mappings. 237 | * @return the minimal cost among all TED mappings. 238 | */ 239 | float getMinCost(ArrayList> tedMappings) { 240 | // Initialize min_cost to the upper bound. 241 | float min_cost = size1 + size2; 242 | // System.out.println("min_cost = " + min_cost); 243 | // Verify cost of each mapping. 244 | for (ArrayList m : tedMappings) { 245 | float m_cost = 0; 246 | // Sum up edit costs for all elements in the mapping m. 247 | for (int[] e : m) { 248 | // Add edit operation cost. 249 | if (e[0] > -1 && e[1] > -1) { 250 | m_cost += costModel.ren(it1.preL_to_node[e[0]], it2.preL_to_node[e[1]]); // USE COST MODEL - rename e[0] to e[1]. 251 | } else if (e[0] > -1) { 252 | m_cost += costModel.del(it1.preL_to_node[e[0]]); // USE COST MODEL - insert e[1]. 253 | } else { 254 | m_cost += costModel.ins(it2.preL_to_node[e[1]]); // USE COST MODEL - delete e[0]. 255 | } 256 | // Break as soon as the current min_cost is exceeded. 257 | // Only for early loop break. 258 | if (m_cost >= min_cost) { 259 | break; 260 | } 261 | } 262 | // Store the minimal cost - compare m_cost and min_cost 263 | if (m_cost < min_cost) { 264 | min_cost = m_cost; 265 | } 266 | // System.out.printf("min_cost = %.8f\n", min_cost); 267 | } 268 | return min_cost; 269 | } 270 | 271 | /** 272 | * Makes a deep copy of a mapping. 273 | * 274 | * @param mapping mapping to copy. 275 | * @return a mapping. 276 | */ 277 | private ArrayList deepMappingCopy(ArrayList mapping) { 278 | ArrayList mapping_copy = new ArrayList(mapping.size()); 279 | for (int[] me : mapping) { // for each mapping element in a mapping 280 | mapping_copy.add(Arrays.copyOf(me, me.length)); 281 | } 282 | return mapping_copy; 283 | } 284 | 285 | /** 286 | * Makes a deep copy of a set of mappings. 287 | * 288 | * @param mappings set of mappings to copy. 289 | * @return set of mappings. 290 | */ 291 | private ArrayList> deepMappingsCopy(ArrayList> mappings) { 292 | ArrayList> mappings_copy = new ArrayList>(mappings.size()); 293 | for (ArrayList m : mappings) { // for each mapping in mappings 294 | ArrayList m_copy = new ArrayList(m.size()); 295 | for (int[] me : m) { // for each mapping element in a mapping 296 | m_copy.add(Arrays.copyOf(me, me.length)); 297 | } 298 | mappings_copy.add(m_copy); 299 | } 300 | return mappings_copy; 301 | } 302 | 303 | /** 304 | * Constructs a string representation of a set of mappings. 305 | * 306 | * @param mappings set of mappings to convert. 307 | * @return string representation of a set of mappings. 308 | */ 309 | private String mappingsToString(ArrayList> mappings) { 310 | String result = "Mappings:\n"; 311 | for (ArrayList m : mappings) { 312 | result += "{"; 313 | for (int[] me : m) { 314 | result += "[" + me[0] + "," + me[1] + "]"; 315 | } 316 | result += "}\n"; 317 | } 318 | return result; 319 | } 320 | 321 | /** 322 | * Removes an element (edit operation) from a mapping by its value. In our 323 | * case the element to remove can be always found in the mapping. 324 | * 325 | * @param m an edit mapping. 326 | * @param e element to remove from {@code m}. 327 | * @return {@code true} if {@code e} has been removed, and {@code false} 328 | * otherwise. 329 | */ 330 | private boolean removeMappingElement(ArrayList m, int[] e) { 331 | for (int[] me : m) { 332 | if (me[0] == e[0] && me[1] == e[1]) { 333 | m.remove(me); 334 | return true; 335 | } 336 | } 337 | return false; 338 | } 339 | } 340 | -------------------------------------------------------------------------------- /src/main/java/at/unisalzburg/dbresearch/apted/node/NodeIndexer.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package at.unisalzburg.dbresearch.apted.node; 25 | 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.Arrays; 29 | import java.util.Iterator; 30 | import at.unisalzburg.dbresearch.apted.node.Node; 31 | import at.unisalzburg.dbresearch.apted.costmodel.CostModel; 32 | 33 | /** 34 | * Indexes nodes of the input tree to the algorithm that is already parsed to 35 | * tree structure using {@link node.Node} class. Stores various indices on 36 | * nodes required for efficient computation of APTED [1,2]. Additionally, it 37 | * stores 38 | * single-value properties of the tree. 39 | * 40 | *

For indexing we use four tree traversals that assign ids to the nodes: 41 | *

    42 | *
  • left-to-right preorder [1], 43 | *
  • right-to-left preorder [1], 44 | *
  • left-to-right postorder [2], 45 | *
  • right-to-left postorder [2]. 46 | *
47 | * 48 | *

See the source code for more algorithm-related comments. 49 | * 50 | *

References: 51 | *

    52 | *
  • [1] M. Pawlik and N. Augsten. Efficient Computation of the Tree Edit 53 | * Distance. ACM Transactions on Database Systems (TODS) 40(1). 2015. 54 | *
  • [2] M. Pawlik and N. Augsten. Tree edit distance: Robust and memory- 55 | * efficient. Information Systems 56. 2016. 56 | *
57 | * 58 | * @param type of node data. 59 | * @param type of cost model. 60 | * @see node.Node 61 | * @see parser.InputParser 62 | */ 63 | public class NodeIndexer { 64 | 65 | // [TODO] Be consistent in naming index variables: _to_. 66 | 67 | // Structure indices. 68 | 69 | /** 70 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 71 | * to Node object corresponding to n. Used for cost of edit operations. 72 | * 73 | * @see node.Node 74 | */ 75 | public Node preL_to_node[]; 76 | 77 | /** 78 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 79 | * to the size of n's subtree (node n and all its descendants). 80 | */ 81 | public int sizes[]; 82 | 83 | /** 84 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 85 | * to the left-to-right preorder id of n's parent. 86 | */ 87 | public int parents[]; 88 | 89 | /** 90 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 91 | * to the array of n's children. Size of children array at node n equals the number 92 | * of n's children. 93 | */ 94 | public int children[][]; 95 | 96 | /** 97 | * Index from left-to-right postorder id of node n (starting with {@code 0}) 98 | * to the left-to-right postorder id of n's leftmost leaf descendant. 99 | */ 100 | public int postL_to_lld[]; 101 | 102 | /** 103 | * Index from right-to-left postorder id of node n (starting with {@code 0}) 104 | * to the right-to-left postorder id of n's rightmost leaf descendant. 105 | */ 106 | public int postR_to_rld[]; 107 | 108 | /** 109 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 110 | * to the left-to-right preorder id of the first leaf node to the left of n. 111 | * If there is no leaf node to the left of n, it is represented with the 112 | * value {@code -1} [1, Section 8.4]. 113 | */ 114 | public int preL_to_ln[]; 115 | 116 | /** 117 | * Index from right-to-left preorder id of node n (starting with {@code 0}) 118 | * to the right-to-left preorder id of the first leaf node to the right of n. 119 | * If there is no leaf node to the right of n, it is represented with the 120 | * value {@code -1} [1, Section 8.4]. 121 | */ 122 | public int preR_to_ln[]; 123 | 124 | /** 125 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 126 | * to a boolean value that states if node n lies on the leftmost path 127 | * starting at n's parent [2, Algorithm 1, Lines 26,36]. 128 | */ 129 | public boolean nodeType_L[]; 130 | 131 | /** 132 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 133 | * to a boolean value that states if node n lies on the rightmost path 134 | * starting at n's parent input tree [2, Section 5.3, Algorithm 1, Lines 26,36]. 135 | */ 136 | public boolean nodeType_R[]; 137 | 138 | // Traversal translation indices. 139 | 140 | /** 141 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 142 | * to the right-to-left preorder id of n. 143 | */ 144 | public int preL_to_preR[]; 145 | 146 | /** 147 | * Index from right-to-left preorder id of node n (starting with {@code 0}) 148 | * to the left-to-right preorder id of n. 149 | */ 150 | public int preR_to_preL[]; 151 | 152 | /** 153 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 154 | * to the left-to-right postorder id of n. 155 | */ 156 | public int preL_to_postL[]; 157 | 158 | /** 159 | * Index from left-to-right postorder id of node n (starting with {@code 0}) 160 | * to the left-to-right preorder id of n. 161 | */ 162 | public int postL_to_preL[]; 163 | 164 | /** 165 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 166 | * to the right-to-left postorder id of n. 167 | */ 168 | public int preL_to_postR[]; 169 | 170 | /** 171 | * Index from right-to-left postorder id of node n (starting with {@code 0}) 172 | * to the left-to-right preorder id of n. 173 | */ 174 | public int postR_to_preL[]; 175 | 176 | // Cost indices. 177 | 178 | /** 179 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 180 | * to the cost of spf_L (single path function using the leftmost path) for 181 | * the subtree rooted at n [1, Section 5.2]. 182 | */ 183 | public int preL_to_kr_sum[]; 184 | 185 | /** 186 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 187 | * to the cost of spf_R (single path function using the rightmost path) for 188 | * the subtree rooted at n [1, Section 5.2]. 189 | */ 190 | public int preL_to_rev_kr_sum[]; 191 | 192 | /** 193 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 194 | * to the cost of spf_A (single path function using an inner path) for the 195 | * subtree rooted at n [1, Section 5.2]. 196 | */ 197 | public int preL_to_desc_sum[]; 198 | 199 | /** 200 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 201 | * to the cost of deleting all nodes in the subtree rooted at n. 202 | */ 203 | public float preL_to_sumDelCost[]; 204 | 205 | /** 206 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 207 | * to the cost of inserting all nodes in the subtree rooted at n. 208 | */ 209 | public float preL_to_sumInsCost[]; 210 | 211 | // Variables holding values modified at runtime while the algorithm executes. 212 | 213 | /** 214 | * Stores the left-to-right preorder id of the current subtree's root node. 215 | * Used in the tree decomposition phase of APTED [1, Algorithm 1]. 216 | */ 217 | private int currentNode; 218 | 219 | // Structure single-value variables. 220 | 221 | /** 222 | * Stores the size of the input tree. 223 | */ 224 | private int treeSize; 225 | 226 | /** 227 | * Stores the number of leftmost-child leaf nodes in the input tree 228 | * [2, Section 5.3]. 229 | */ 230 | public int lchl; 231 | 232 | /** 233 | * Stores the number of rightmost-child leaf nodes in the input tree 234 | * [2, Section 5.3]. 235 | */ 236 | public int rchl; 237 | 238 | // Variables used temporarily while indexing. 239 | 240 | /** 241 | * Temporary variable used in indexing for storing subtree size. 242 | */ 243 | private int sizeTmp; 244 | 245 | /** 246 | * Temporary variable used in indexing for storing sum of subtree sizes 247 | * rooted at descendant nodes. 248 | */ 249 | private int descSizesTmp; 250 | 251 | /** 252 | * Temporary variable used in indexing for storing sum of keyroot node sizes. 253 | */ 254 | private int krSizesSumTmp; 255 | 256 | /** 257 | * Temporary variable used in indexing for storing sum of right-to-left 258 | * keyroot node sizes. 259 | */ 260 | private int revkrSizesSumTmp; 261 | 262 | /** 263 | * Temporary variable used in indexing for storing preorder index of a node. 264 | */ 265 | private int preorderTmp; 266 | 267 | private C costModel; 268 | 269 | /** 270 | * Indexes the nodes of input trees and stores the indices for quick access 271 | * from APTED algorithm. 272 | * 273 | * @param inputTree an input tree to APTED. Its nodes will be indexed. 274 | * @param costModel instance of a cost model to compute preL_to_sumDelCost 275 | * and preL_to_sumInsCost. 276 | */ 277 | public NodeIndexer(Node inputTree, C costModel) { 278 | // Initialise variables. 279 | sizeTmp = 0; 280 | descSizesTmp = 0; 281 | krSizesSumTmp = 0; 282 | revkrSizesSumTmp = 0; 283 | preorderTmp = 0; 284 | currentNode = 0; 285 | treeSize = inputTree.getNodeCount(); 286 | 287 | // Initialise indices with the lengths equal to the tree size. 288 | sizes = new int[treeSize]; 289 | preL_to_preR = new int[treeSize]; 290 | preR_to_preL = new int[treeSize]; 291 | preL_to_postL = new int[treeSize]; 292 | postL_to_preL = new int[treeSize]; 293 | preL_to_postR = new int[treeSize]; 294 | postR_to_preL = new int[treeSize]; 295 | postL_to_lld = new int[treeSize]; 296 | postR_to_rld = new int[treeSize]; 297 | preL_to_node = new Node[treeSize]; 298 | preL_to_ln = new int[treeSize]; 299 | preR_to_ln = new int[treeSize]; 300 | preL_to_kr_sum = new int[treeSize]; 301 | preL_to_rev_kr_sum = new int[treeSize]; 302 | preL_to_desc_sum = new int[treeSize]; 303 | 304 | preL_to_sumDelCost = new float[treeSize]; 305 | preL_to_sumInsCost = new float[treeSize]; 306 | 307 | children = new int[treeSize][]; 308 | nodeType_L = new boolean[treeSize]; 309 | nodeType_R = new boolean[treeSize]; 310 | parents = new int[treeSize]; 311 | parents[0] = -1; // The root has no parent. 312 | 313 | this.costModel = costModel; 314 | 315 | // Index the nodes. 316 | indexNodes(inputTree, -1); 317 | postTraversalIndexing(); 318 | } 319 | 320 | /** 321 | * Indexes the nodes of the input tree. Stores information about each tree 322 | * node in index arrays. It computes the following indices: {@link #parents}, 323 | * {@link #children}, {@link #nodeType_L}, {@link #nodeType_R}, 324 | * {@link #preL_to_desc_sum}, {@link #preL_to_kr_sum}, 325 | * {@link #preL_to_rev_kr_sum}, {@link #preL_to_node}, {@link #sizes}, 326 | * {@link #preL_to_preR}, {@link #preR_to_preL}, {@link #postL_to_preL}, 327 | * {@link #preL_to_postL}, {@link #preL_to_postR}, {@link #postR_to_preL}. 328 | * 329 | *

It is a recursive method that traverses the tree once. 330 | * 331 | * @param node is the current node while traversing the input tree. 332 | * @param postorder is the postorder id of the current node. 333 | * @return postorder id of the current node. 334 | */ 335 | private int indexNodes(Node node, int postorder) { 336 | // Initialise variables. 337 | int currentSize = 0; 338 | int childrenCount = 0; 339 | int descSizes = 0; 340 | int krSizesSum = 0; 341 | int revkrSizesSum = 0; 342 | int preorder = preorderTmp; 343 | int preorderR = 0; 344 | int currentPreorder = -1; 345 | // Initialise empty array to store children of this node. 346 | ArrayList childrenPreorders = new ArrayList<>(); 347 | 348 | // Store the preorder id of the current node to use it after the recursion. 349 | preorderTmp++; 350 | 351 | // Loop over children of a node. 352 | Iterator> childrenIt = node.getChildren().iterator(); 353 | while (childrenIt.hasNext()) { 354 | childrenCount++; 355 | currentPreorder = preorderTmp; 356 | parents[currentPreorder] = preorder; 357 | 358 | // Execute method recursively for next child. 359 | postorder = indexNodes(childrenIt.next(), postorder); 360 | 361 | childrenPreorders.add(Integer.valueOf(currentPreorder)); 362 | 363 | currentSize += 1 + sizeTmp; 364 | descSizes += descSizesTmp; 365 | if(childrenCount > 1) { 366 | krSizesSum += krSizesSumTmp + sizeTmp + 1; 367 | } else { 368 | krSizesSum += krSizesSumTmp; 369 | nodeType_L[currentPreorder] = true; 370 | } 371 | if(childrenIt.hasNext()) { 372 | revkrSizesSum += revkrSizesSumTmp + sizeTmp + 1; 373 | } else { 374 | revkrSizesSum += revkrSizesSumTmp; 375 | nodeType_R[currentPreorder] = true; 376 | } 377 | } 378 | 379 | postorder++; 380 | 381 | int currentDescSizes = descSizes + currentSize + 1; 382 | preL_to_desc_sum[preorder] = ((currentSize + 1) * (currentSize + 1 + 3)) / 2 - currentDescSizes; 383 | preL_to_kr_sum[preorder] = krSizesSum + currentSize + 1; 384 | preL_to_rev_kr_sum[preorder] = revkrSizesSum + currentSize + 1; 385 | 386 | // Store pointer to a node object corresponding to preorder. 387 | preL_to_node[preorder] = node; 388 | 389 | sizes[preorder] = currentSize + 1; 390 | preorderR = treeSize - 1 - postorder; 391 | preL_to_preR[preorder] = preorderR; 392 | preR_to_preL[preorderR] = preorder; 393 | 394 | children[preorder] = toIntArray(childrenPreorders); 395 | 396 | descSizesTmp = currentDescSizes; 397 | sizeTmp = currentSize; 398 | krSizesSumTmp = krSizesSum; 399 | revkrSizesSumTmp = revkrSizesSum; 400 | 401 | postL_to_preL[postorder] = preorder; 402 | preL_to_postL[preorder] = postorder; 403 | preL_to_postR[preorder] = treeSize-1-preorder; 404 | postR_to_preL[treeSize-1-preorder] = preorder; 405 | 406 | return postorder; 407 | } 408 | 409 | /** 410 | * Indexes the nodes of the input tree. It computes the following indices, 411 | * which could not be computed immediately while traversing the tree in 412 | * {@link #indexNodes}: {@link #preL_to_ln}, {@link #postL_to_lld}, 413 | * {@link #postR_to_rld}, {@link #preR_to_ln}. 414 | * 415 | *

Runs in linear time in the input tree size. Currently requires two 416 | * loops over input tree nodes. Can be reduced to one loop (see the code). 417 | */ 418 | private void postTraversalIndexing() { 419 | int currentLeaf = -1; 420 | int nodeForSum = -1; 421 | int parentForSum = -1; 422 | for(int i = 0; i < treeSize; i++) { 423 | preL_to_ln[i] = currentLeaf; 424 | if(isLeaf(i)) { 425 | currentLeaf = i; 426 | } 427 | 428 | // This block stores leftmost leaf descendants for each node 429 | // indexed in postorder. Used for mapping computation. 430 | // Added by Victor. 431 | int postl = i; // Assume that the for loop iterates postorder. 432 | int preorder = postL_to_preL[i]; 433 | if (sizes[preorder] == 1) { 434 | postL_to_lld[postl] = postl; 435 | } else { 436 | postL_to_lld[postl] = postL_to_lld[preL_to_postL[children[preorder][0]]]; 437 | } 438 | // This block stores rightmost leaf descendants for each node 439 | // indexed in right-to-left postorder. 440 | // [TODO] Use postL_to_lld and postR_to_rld instead of APTED.getLLD 441 | // and APTED.gerRLD methods, remove these method. 442 | // Result: faster lookup of these values. 443 | int postr = i; // Assume that the for loop iterates reversed postorder. 444 | preorder = postR_to_preL[postr]; 445 | if (sizes[preorder] == 1) { 446 | postR_to_rld[postr] = postr; 447 | } else { 448 | postR_to_rld[postr] = postR_to_rld[preL_to_postR[children[preorder][children[preorder].length-1]]]; 449 | } 450 | // Count lchl and rchl. 451 | // [TODO] There are no values for parent node. 452 | if (sizes[i] == 1) { 453 | int parent = parents[i]; 454 | if (parent > -1) { 455 | if (parent+1 == i) { 456 | lchl++; 457 | } else if (preL_to_preR[parent]+1 == preL_to_preR[i]) { 458 | rchl++; 459 | } 460 | } 461 | } 462 | 463 | // Sum up costs of deleting and inserting entire subtrees. 464 | // Reverse the node index. Here, we need traverse nodes bottom-up. 465 | nodeForSum = treeSize - i - 1; 466 | parentForSum = parents[nodeForSum]; 467 | // Update myself. 468 | preL_to_sumDelCost[nodeForSum] += costModel.del(preL_to_node[nodeForSum]); 469 | preL_to_sumInsCost[nodeForSum] += costModel.ins(preL_to_node[nodeForSum]); 470 | if (parentForSum > -1) { 471 | // Update my parent. 472 | preL_to_sumDelCost[parentForSum] += preL_to_sumDelCost[nodeForSum]; 473 | preL_to_sumInsCost[parentForSum] += preL_to_sumInsCost[nodeForSum]; 474 | } 475 | } 476 | 477 | currentLeaf = -1; 478 | // [TODO] Merge with the other loop. Assume different traversal. 479 | for(int i = 0; i < sizes[0]; i++) { 480 | preR_to_ln[i] = currentLeaf; 481 | if(isLeaf(preR_to_preL[i])) { 482 | currentLeaf = i; 483 | } 484 | } 485 | } 486 | 487 | /** 488 | * An abbreviation that uses indices to calculate the left-to-right preorder 489 | * id of the leftmost leaf node of the given node. 490 | * 491 | * @param preL left-to-right preorder id of a node. 492 | * @return left-to-right preorder id of the leftmost leaf node of preL. 493 | */ 494 | public int preL_to_lld(int preL) { 495 | return postL_to_preL[postL_to_lld[preL_to_postL[preL]]]; 496 | } 497 | 498 | /** 499 | * An abbreviation that uses indices to calculate the left-to-right preorder 500 | * id of the rightmost leaf node of the given node. 501 | * 502 | * @param preL left-to-right preorder id of a node. 503 | * @return left-to-right preorder id of the rightmost leaf node of preL. 504 | */ 505 | public int preL_to_rld(int preL) { 506 | return postR_to_preL[postR_to_rld[preL_to_postR[preL]]]; 507 | } 508 | 509 | /** 510 | * An abbreviation that uses indices to retrieve pointer to {@link node.Node} 511 | * of the given node. 512 | * 513 | * @param postL left-to-right postorder id of a node. 514 | * @return {@link node.Node} corresponding to postL. 515 | */ 516 | public Node postL_to_node(int postL) { 517 | return preL_to_node[postL_to_preL[postL]]; 518 | } 519 | 520 | /** 521 | * An abbreviation that uses indices to retrieve pointer to {@link node.Node} 522 | * of the given node. 523 | * 524 | * @param postR right-to-left postorder id of a node. 525 | * @return {@link node.Node} corresponding to postR. 526 | */ 527 | public Node postR_to_node(int postR) { 528 | return preL_to_node[postR_to_preL[postR]]; 529 | } 530 | 531 | /** 532 | * Returns the number of nodes in the input tree. 533 | * 534 | * @return number of nodes in the tree. 535 | */ 536 | public int getSize() { 537 | return treeSize; 538 | } 539 | 540 | /** 541 | * Verifies if node is a leaf. 542 | * 543 | * @param node preorder id of a node to verify. 544 | * @return {@code true} if {@code node} is a leaf, {@code false} otherwise. 545 | */ 546 | public boolean isLeaf(int node) { 547 | return sizes[node] == 1; 548 | } 549 | 550 | /** 551 | * Converts {@link ArrayList} of integer values to an array. Reads all items 552 | * in the list and copies to the output array. The size of output array equals 553 | * the number of elements in the list. 554 | * 555 | * @param integers ArrayList with integer values. 556 | * @return array with values from input ArrayList. 557 | */ 558 | private int[] toIntArray(ArrayList integers) { 559 | int ints[] = new int[integers.size()]; 560 | int i = 0; 561 | for (Integer n : integers) { 562 | ints[i++] = n.intValue(); 563 | } 564 | return ints; 565 | } 566 | 567 | /** 568 | * Returns the root node of the currently processed subtree in the tree 569 | * decomposition part of APTED [1, Algorithm 1]. At each point, we have to 570 | * know which subtree do we process. 571 | * 572 | * @return current subtree root node. 573 | */ 574 | public int getCurrentNode() { 575 | return currentNode; 576 | } 577 | 578 | /** 579 | * Stores the root nodes's preorder id of the currently processes subtree. 580 | * 581 | * @param preorder preorder id of the root node. 582 | */ 583 | public void setCurrentNode(int preorder) { 584 | currentNode = preorder; 585 | } 586 | 587 | } 588 | -------------------------------------------------------------------------------- /src/main/java/node/NodeIndexer.java: -------------------------------------------------------------------------------- 1 | /* MIT License 2 | * 3 | * Copyright (c) 2017 Mateusz Pawlik 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | package node; 25 | 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.Arrays; 29 | import java.util.Iterator; 30 | import node.Node; 31 | import costmodel.CostModel; 32 | 33 | /** 34 | * Indexes nodes of the input tree to the algorithm that is already parsed to 35 | * tree structure using {@link node.Node} class. Stores various indices on 36 | * nodes required for efficient computation of APTED [1,2]. Additionally, it 37 | * stores 38 | * single-value properties of the tree. 39 | * 40 | *

For indexing we use four tree traversals that assign ids to the nodes: 41 | *

    42 | *
  • left-to-right preorder [1], 43 | *
  • right-to-left preorder [1], 44 | *
  • left-to-right postorder [2], 45 | *
  • right-to-left postorder [2]. 46 | *
47 | * 48 | *

See the source code for more algorithm-related comments. 49 | * 50 | *

References: 51 | *

    52 | *
  • [1] M. Pawlik and N. Augsten. Efficient Computation of the Tree Edit 53 | * Distance. ACM Transactions on Database Systems (TODS) 40(1). 2015. 54 | *
  • [2] M. Pawlik and N. Augsten. Tree edit distance: Robust and memory- 55 | * efficient. Information Systems 56. 2016. 56 | *
57 | * 58 | * @deprecated Due to packaging update replaced by {@link at.unisalzburg.dbresearch.apted.node.NodeIndexer} 59 | * 60 | * @param type of node data. 61 | * @param type of cost model. 62 | * @see node.Node 63 | * @see parser.InputParser 64 | */ 65 | @Deprecated public class NodeIndexer { 66 | 67 | // [TODO] Be consistent in naming index variables: _to_. 68 | 69 | // Structure indices. 70 | 71 | /** 72 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 73 | * to Node object corresponding to n. Used for cost of edit operations. 74 | * 75 | * @see node.Node 76 | */ 77 | public Node preL_to_node[]; 78 | 79 | /** 80 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 81 | * to the size of n's subtree (node n and all its descendants). 82 | */ 83 | public int sizes[]; 84 | 85 | /** 86 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 87 | * to the left-to-right preorder id of n's parent. 88 | */ 89 | public int parents[]; 90 | 91 | /** 92 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 93 | * to the array of n's children. Size of children array at node n equals the number 94 | * of n's children. 95 | */ 96 | public int children[][]; 97 | 98 | /** 99 | * Index from left-to-right postorder id of node n (starting with {@code 0}) 100 | * to the left-to-right postorder id of n's leftmost leaf descendant. 101 | */ 102 | public int postL_to_lld[]; 103 | 104 | /** 105 | * Index from right-to-left postorder id of node n (starting with {@code 0}) 106 | * to the right-to-left postorder id of n's rightmost leaf descendant. 107 | */ 108 | public int postR_to_rld[]; 109 | 110 | /** 111 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 112 | * to the left-to-right preorder id of the first leaf node to the left of n. 113 | * If there is no leaf node to the left of n, it is represented with the 114 | * value {@code -1} [1, Section 8.4]. 115 | */ 116 | public int preL_to_ln[]; 117 | 118 | /** 119 | * Index from right-to-left preorder id of node n (starting with {@code 0}) 120 | * to the right-to-left preorder id of the first leaf node to the right of n. 121 | * If there is no leaf node to the right of n, it is represented with the 122 | * value {@code -1} [1, Section 8.4]. 123 | */ 124 | public int preR_to_ln[]; 125 | 126 | /** 127 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 128 | * to a boolean value that states if node n lies on the leftmost path 129 | * starting at n's parent [2, Algorithm 1, Lines 26,36]. 130 | */ 131 | public boolean nodeType_L[]; 132 | 133 | /** 134 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 135 | * to a boolean value that states if node n lies on the rightmost path 136 | * starting at n's parent input tree [2, Section 5.3, Algorithm 1, Lines 26,36]. 137 | */ 138 | public boolean nodeType_R[]; 139 | 140 | // Traversal translation indices. 141 | 142 | /** 143 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 144 | * to the right-to-left preorder id of n. 145 | */ 146 | public int preL_to_preR[]; 147 | 148 | /** 149 | * Index from right-to-left preorder id of node n (starting with {@code 0}) 150 | * to the left-to-right preorder id of n. 151 | */ 152 | public int preR_to_preL[]; 153 | 154 | /** 155 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 156 | * to the left-to-right postorder id of n. 157 | */ 158 | public int preL_to_postL[]; 159 | 160 | /** 161 | * Index from left-to-right postorder id of node n (starting with {@code 0}) 162 | * to the left-to-right preorder id of n. 163 | */ 164 | public int postL_to_preL[]; 165 | 166 | /** 167 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 168 | * to the right-to-left postorder id of n. 169 | */ 170 | public int preL_to_postR[]; 171 | 172 | /** 173 | * Index from right-to-left postorder id of node n (starting with {@code 0}) 174 | * to the left-to-right preorder id of n. 175 | */ 176 | public int postR_to_preL[]; 177 | 178 | // Cost indices. 179 | 180 | /** 181 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 182 | * to the cost of spf_L (single path function using the leftmost path) for 183 | * the subtree rooted at n [1, Section 5.2]. 184 | */ 185 | public int preL_to_kr_sum[]; 186 | 187 | /** 188 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 189 | * to the cost of spf_R (single path function using the rightmost path) for 190 | * the subtree rooted at n [1, Section 5.2]. 191 | */ 192 | public int preL_to_rev_kr_sum[]; 193 | 194 | /** 195 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 196 | * to the cost of spf_A (single path function using an inner path) for the 197 | * subtree rooted at n [1, Section 5.2]. 198 | */ 199 | public int preL_to_desc_sum[]; 200 | 201 | /** 202 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 203 | * to the cost of deleting all nodes in the subtree rooted at n. 204 | */ 205 | public float preL_to_sumDelCost[]; 206 | 207 | /** 208 | * Index from left-to-right preorder id of node n (starting with {@code 0}) 209 | * to the cost of inserting all nodes in the subtree rooted at n. 210 | */ 211 | public float preL_to_sumInsCost[]; 212 | 213 | // Variables holding values modified at runtime while the algorithm executes. 214 | 215 | /** 216 | * Stores the left-to-right preorder id of the current subtree's root node. 217 | * Used in the tree decomposition phase of APTED [1, Algorithm 1]. 218 | */ 219 | private int currentNode; 220 | 221 | // Structure single-value variables. 222 | 223 | /** 224 | * Stores the size of the input tree. 225 | */ 226 | private int treeSize; 227 | 228 | /** 229 | * Stores the number of leftmost-child leaf nodes in the input tree 230 | * [2, Section 5.3]. 231 | */ 232 | public int lchl; 233 | 234 | /** 235 | * Stores the number of rightmost-child leaf nodes in the input tree 236 | * [2, Section 5.3]. 237 | */ 238 | public int rchl; 239 | 240 | // Variables used temporarily while indexing. 241 | 242 | /** 243 | * Temporary variable used in indexing for storing subtree size. 244 | */ 245 | private int sizeTmp; 246 | 247 | /** 248 | * Temporary variable used in indexing for storing sum of subtree sizes 249 | * rooted at descendant nodes. 250 | */ 251 | private int descSizesTmp; 252 | 253 | /** 254 | * Temporary variable used in indexing for storing sum of keyroot node sizes. 255 | */ 256 | private int krSizesSumTmp; 257 | 258 | /** 259 | * Temporary variable used in indexing for storing sum of right-to-left 260 | * keyroot node sizes. 261 | */ 262 | private int revkrSizesSumTmp; 263 | 264 | /** 265 | * Temporary variable used in indexing for storing preorder index of a node. 266 | */ 267 | private int preorderTmp; 268 | 269 | private C costModel; 270 | 271 | /** 272 | * Indexes the nodes of input trees and stores the indices for quick access 273 | * from APTED algorithm. 274 | * 275 | * @param inputTree an input tree to APTED. Its nodes will be indexed. 276 | * @param costModel instance of a cost model to compute preL_to_sumDelCost 277 | * and preL_to_sumInsCost. 278 | */ 279 | public NodeIndexer(Node inputTree, C costModel) { 280 | // Initialise variables. 281 | sizeTmp = 0; 282 | descSizesTmp = 0; 283 | krSizesSumTmp = 0; 284 | revkrSizesSumTmp = 0; 285 | preorderTmp = 0; 286 | currentNode = 0; 287 | treeSize = inputTree.getNodeCount(); 288 | 289 | // Initialise indices with the lengths equal to the tree size. 290 | sizes = new int[treeSize]; 291 | preL_to_preR = new int[treeSize]; 292 | preR_to_preL = new int[treeSize]; 293 | preL_to_postL = new int[treeSize]; 294 | postL_to_preL = new int[treeSize]; 295 | preL_to_postR = new int[treeSize]; 296 | postR_to_preL = new int[treeSize]; 297 | postL_to_lld = new int[treeSize]; 298 | postR_to_rld = new int[treeSize]; 299 | preL_to_node = new Node[treeSize]; 300 | preL_to_ln = new int[treeSize]; 301 | preR_to_ln = new int[treeSize]; 302 | preL_to_kr_sum = new int[treeSize]; 303 | preL_to_rev_kr_sum = new int[treeSize]; 304 | preL_to_desc_sum = new int[treeSize]; 305 | 306 | preL_to_sumDelCost = new float[treeSize]; 307 | preL_to_sumInsCost = new float[treeSize]; 308 | 309 | children = new int[treeSize][]; 310 | nodeType_L = new boolean[treeSize]; 311 | nodeType_R = new boolean[treeSize]; 312 | parents = new int[treeSize]; 313 | parents[0] = -1; // The root has no parent. 314 | 315 | this.costModel = costModel; 316 | 317 | // Index the nodes. 318 | indexNodes(inputTree, -1); 319 | postTraversalIndexing(); 320 | } 321 | 322 | /** 323 | * Indexes the nodes of the input tree. Stores information about each tree 324 | * node in index arrays. It computes the following indices: {@link #parents}, 325 | * {@link #children}, {@link #nodeType_L}, {@link #nodeType_R}, 326 | * {@link #preL_to_desc_sum}, {@link #preL_to_kr_sum}, 327 | * {@link #preL_to_rev_kr_sum}, {@link #preL_to_node}, {@link #sizes}, 328 | * {@link #preL_to_preR}, {@link #preR_to_preL}, {@link #postL_to_preL}, 329 | * {@link #preL_to_postL}, {@link #preL_to_postR}, {@link #postR_to_preL}. 330 | * 331 | *

It is a recursive method that traverses the tree once. 332 | * 333 | * @param node is the current node while traversing the input tree. 334 | * @param postorder is the postorder id of the current node. 335 | * @return postorder id of the current node. 336 | */ 337 | private int indexNodes(Node node, int postorder) { 338 | // Initialise variables. 339 | int currentSize = 0; 340 | int childrenCount = 0; 341 | int descSizes = 0; 342 | int krSizesSum = 0; 343 | int revkrSizesSum = 0; 344 | int preorder = preorderTmp; 345 | int preorderR = 0; 346 | int currentPreorder = -1; 347 | // Initialise empty array to store children of this node. 348 | ArrayList childrenPreorders = new ArrayList<>(); 349 | 350 | // Store the preorder id of the current node to use it after the recursion. 351 | preorderTmp++; 352 | 353 | // Loop over children of a node. 354 | Iterator> childrenIt = node.getChildren().iterator(); 355 | while (childrenIt.hasNext()) { 356 | childrenCount++; 357 | currentPreorder = preorderTmp; 358 | parents[currentPreorder] = preorder; 359 | 360 | // Execute method recursively for next child. 361 | postorder = indexNodes(childrenIt.next(), postorder); 362 | 363 | childrenPreorders.add(Integer.valueOf(currentPreorder)); 364 | 365 | currentSize += 1 + sizeTmp; 366 | descSizes += descSizesTmp; 367 | if(childrenCount > 1) { 368 | krSizesSum += krSizesSumTmp + sizeTmp + 1; 369 | } else { 370 | krSizesSum += krSizesSumTmp; 371 | nodeType_L[currentPreorder] = true; 372 | } 373 | if(childrenIt.hasNext()) { 374 | revkrSizesSum += revkrSizesSumTmp + sizeTmp + 1; 375 | } else { 376 | revkrSizesSum += revkrSizesSumTmp; 377 | nodeType_R[currentPreorder] = true; 378 | } 379 | } 380 | 381 | postorder++; 382 | 383 | int currentDescSizes = descSizes + currentSize + 1; 384 | preL_to_desc_sum[preorder] = ((currentSize + 1) * (currentSize + 1 + 3)) / 2 - currentDescSizes; 385 | preL_to_kr_sum[preorder] = krSizesSum + currentSize + 1; 386 | preL_to_rev_kr_sum[preorder] = revkrSizesSum + currentSize + 1; 387 | 388 | // Store pointer to a node object corresponding to preorder. 389 | preL_to_node[preorder] = node; 390 | 391 | sizes[preorder] = currentSize + 1; 392 | preorderR = treeSize - 1 - postorder; 393 | preL_to_preR[preorder] = preorderR; 394 | preR_to_preL[preorderR] = preorder; 395 | 396 | children[preorder] = toIntArray(childrenPreorders); 397 | 398 | descSizesTmp = currentDescSizes; 399 | sizeTmp = currentSize; 400 | krSizesSumTmp = krSizesSum; 401 | revkrSizesSumTmp = revkrSizesSum; 402 | 403 | postL_to_preL[postorder] = preorder; 404 | preL_to_postL[preorder] = postorder; 405 | preL_to_postR[preorder] = treeSize-1-preorder; 406 | postR_to_preL[treeSize-1-preorder] = preorder; 407 | 408 | return postorder; 409 | } 410 | 411 | /** 412 | * Indexes the nodes of the input tree. It computes the following indices, 413 | * which could not be computed immediately while traversing the tree in 414 | * {@link #indexNodes}: {@link #preL_to_ln}, {@link #postL_to_lld}, 415 | * {@link #postR_to_rld}, {@link #preR_to_ln}. 416 | * 417 | *

Runs in linear time in the input tree size. Currently requires two 418 | * loops over input tree nodes. Can be reduced to one loop (see the code). 419 | */ 420 | private void postTraversalIndexing() { 421 | int currentLeaf = -1; 422 | int nodeForSum = -1; 423 | int parentForSum = -1; 424 | for(int i = 0; i < treeSize; i++) { 425 | preL_to_ln[i] = currentLeaf; 426 | if(isLeaf(i)) { 427 | currentLeaf = i; 428 | } 429 | 430 | // This block stores leftmost leaf descendants for each node 431 | // indexed in postorder. Used for mapping computation. 432 | // Added by Victor. 433 | int postl = i; // Assume that the for loop iterates postorder. 434 | int preorder = postL_to_preL[i]; 435 | if (sizes[preorder] == 1) { 436 | postL_to_lld[postl] = postl; 437 | } else { 438 | postL_to_lld[postl] = postL_to_lld[preL_to_postL[children[preorder][0]]]; 439 | } 440 | // This block stores rightmost leaf descendants for each node 441 | // indexed in right-to-left postorder. 442 | // [TODO] Use postL_to_lld and postR_to_rld instead of APTED.getLLD 443 | // and APTED.gerRLD methods, remove these method. 444 | // Result: faster lookup of these values. 445 | int postr = i; // Assume that the for loop iterates reversed postorder. 446 | preorder = postR_to_preL[postr]; 447 | if (sizes[preorder] == 1) { 448 | postR_to_rld[postr] = postr; 449 | } else { 450 | postR_to_rld[postr] = postR_to_rld[preL_to_postR[children[preorder][children[preorder].length-1]]]; 451 | } 452 | // Count lchl and rchl. 453 | // [TODO] There are no values for parent node. 454 | if (sizes[i] == 1) { 455 | int parent = parents[i]; 456 | if (parent > -1) { 457 | if (parent+1 == i) { 458 | lchl++; 459 | } else if (preL_to_preR[parent]+1 == preL_to_preR[i]) { 460 | rchl++; 461 | } 462 | } 463 | } 464 | 465 | // Sum up costs of deleting and inserting entire subtrees. 466 | // Reverse the node index. Here, we need traverse nodes bottom-up. 467 | nodeForSum = treeSize - i - 1; 468 | parentForSum = parents[nodeForSum]; 469 | // Update myself. 470 | preL_to_sumDelCost[nodeForSum] += costModel.del(preL_to_node[nodeForSum]); 471 | preL_to_sumInsCost[nodeForSum] += costModel.ins(preL_to_node[nodeForSum]); 472 | if (parentForSum > -1) { 473 | // Update my parent. 474 | preL_to_sumDelCost[parentForSum] += preL_to_sumDelCost[nodeForSum]; 475 | preL_to_sumInsCost[parentForSum] += preL_to_sumInsCost[nodeForSum]; 476 | } 477 | } 478 | 479 | currentLeaf = -1; 480 | // [TODO] Merge with the other loop. Assume different traversal. 481 | for(int i = 0; i < sizes[0]; i++) { 482 | preR_to_ln[i] = currentLeaf; 483 | if(isLeaf(preR_to_preL[i])) { 484 | currentLeaf = i; 485 | } 486 | } 487 | } 488 | 489 | /** 490 | * An abbreviation that uses indices to calculate the left-to-right preorder 491 | * id of the leftmost leaf node of the given node. 492 | * 493 | * @param preL left-to-right preorder id of a node. 494 | * @return left-to-right preorder id of the leftmost leaf node of preL. 495 | */ 496 | public int preL_to_lld(int preL) { 497 | return postL_to_preL[postL_to_lld[preL_to_postL[preL]]]; 498 | } 499 | 500 | /** 501 | * An abbreviation that uses indices to calculate the left-to-right preorder 502 | * id of the rightmost leaf node of the given node. 503 | * 504 | * @param preL left-to-right preorder id of a node. 505 | * @return left-to-right preorder id of the rightmost leaf node of preL. 506 | */ 507 | public int preL_to_rld(int preL) { 508 | return postR_to_preL[postR_to_rld[preL_to_postR[preL]]]; 509 | } 510 | 511 | /** 512 | * An abbreviation that uses indices to retrieve pointer to {@link node.Node} 513 | * of the given node. 514 | * 515 | * @param postL left-to-right postorder id of a node. 516 | * @return {@link node.Node} corresponding to postL. 517 | */ 518 | public Node postL_to_node(int postL) { 519 | return preL_to_node[postL_to_preL[postL]]; 520 | } 521 | 522 | /** 523 | * An abbreviation that uses indices to retrieve pointer to {@link node.Node} 524 | * of the given node. 525 | * 526 | * @param postR right-to-left postorder id of a node. 527 | * @return {@link node.Node} corresponding to postR. 528 | */ 529 | public Node postR_to_node(int postR) { 530 | return preL_to_node[postR_to_preL[postR]]; 531 | } 532 | 533 | /** 534 | * Returns the number of nodes in the input tree. 535 | * 536 | * @return number of nodes in the tree. 537 | */ 538 | public int getSize() { 539 | return treeSize; 540 | } 541 | 542 | /** 543 | * Verifies if node is a leaf. 544 | * 545 | * @param node preorder id of a node to verify. 546 | * @return {@code true} if {@code node} is a leaf, {@code false} otherwise. 547 | */ 548 | public boolean isLeaf(int node) { 549 | return sizes[node] == 1; 550 | } 551 | 552 | /** 553 | * Converts {@link ArrayList} of integer values to an array. Reads all items 554 | * in the list and copies to the output array. The size of output array equals 555 | * the number of elements in the list. 556 | * 557 | * @param integers ArrayList with integer values. 558 | * @return array with values from input ArrayList. 559 | */ 560 | private int[] toIntArray(ArrayList integers) { 561 | int ints[] = new int[integers.size()]; 562 | int i = 0; 563 | for (Integer n : integers) { 564 | ints[i++] = n.intValue(); 565 | } 566 | return ints; 567 | } 568 | 569 | /** 570 | * Returns the root node of the currently processed subtree in the tree 571 | * decomposition part of APTED [1, Algorithm 1]. At each point, we have to 572 | * know which subtree do we process. 573 | * 574 | * @return current subtree root node. 575 | */ 576 | public int getCurrentNode() { 577 | return currentNode; 578 | } 579 | 580 | /** 581 | * Stores the root nodes's preorder id of the currently processes subtree. 582 | * 583 | * @param preorder preorder id of the root node. 584 | */ 585 | public void setCurrentNode(int preorder) { 586 | currentNode = preorder; 587 | } 588 | 589 | } 590 | --------------------------------------------------------------------------------