├── .gitignore ├── LICENSE ├── README.md ├── build.gradle ├── docs └── img │ └── graphflow.png ├── env.sh ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── intellij_configs ├── README.md ├── graphflow_code_style_guide.xml ├── graphflow_inspections.xml └── runConfigurations │ ├── GraphflowCliRunner.xml │ └── GraphflowServerRunner.xml ├── scripts ├── change_snap_to_csv.py ├── execute_query.py ├── serialize_catalog.py └── serialize_dataset.py ├── settings.gradle └── src └── main ├── antlr └── ca │ └── waterloo │ └── dsg │ └── graphflow │ └── grammar │ └── Graphflow.g4 ├── java └── ca │ └── waterloo │ └── dsg │ └── graphflow │ ├── plan │ ├── Plan.java │ ├── Workers.java │ └── operator │ │ ├── AdjListDescriptor.java │ │ ├── Operator.java │ │ ├── extend │ │ ├── EI.java │ │ ├── Extend.java │ │ └── Intersect.java │ │ ├── hashjoin │ │ ├── Build.java │ │ ├── HashJoin.java │ │ ├── HashTable.java │ │ ├── Probe.java │ │ ├── ProbeCartesian.java │ │ ├── ProbeMultiVertices.java │ │ └── ProbeMultiVerticesCartesian.java │ │ ├── scan │ │ ├── Scan.java │ │ ├── ScanBlocking.java │ │ └── ScanSampling.java │ │ └── sink │ │ ├── Sink.java │ │ └── SinkLimit.java │ ├── planner │ ├── QueryPlanner.java │ ├── QueryPlannerBig.java │ └── catalog │ │ ├── Catalog.java │ │ ├── CatalogFactory.java │ │ ├── CatalogPlans.java │ │ └── operator │ │ ├── IntersectCatalog.java │ │ └── Noop.java │ ├── query │ ├── QueryEdge.java │ ├── QueryGraph.java │ ├── QueryGraphSet.java │ └── parser │ │ ├── AntlrErrorListener.java │ │ ├── ParseTreeVisitor.java │ │ └── QueryParser.java │ ├── runner │ ├── AbstractRunner.java │ ├── ArgsFactory.java │ ├── dataset │ │ ├── CatalogSerializer.java │ │ └── DatasetSerializer.java │ └── plan │ │ └── OptimizerExecutor.java │ ├── storage │ ├── Graph.java │ ├── GraphFactory.java │ ├── KeyStore.java │ ├── KeyStoreFactory.java │ └── SortedAdjList.java │ └── util │ ├── IOUtils.java │ ├── collection │ ├── MapUtils.java │ └── SetUtils.java │ └── container │ ├── Quad.java │ └── Triple.java └── resources └── log4j2.properties /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle/ 2 | gradlew.bat 3 | .idea/ 4 | .DS_Store 5 | build/ 6 | build/* 7 | out/ 8 | .editorconfig 9 | generated 10 | classes 11 | *.iml 12 | output/* 13 | scripts/*/ 14 | !scripts/converter/ 15 | scripts/*.data 16 | *.data 17 | .vscode 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Optimizing Subgraph Queries by Combining Binary and Worst-Case Optimal Joins 2 | 3 | Table of Contents 4 | ----------------- 5 | * [Overview](#Overview) 6 | * [Build Steps](#Build-Steps) 7 | * [Executing Queries](#Executing-Queries) 8 | * [Contact](#contact) 9 | 10 | Overview 11 | ----------------- 12 | For an overview of our one-time subgraph matching optimizer, check our [paper](http://amine.io/papers/wco-optimizer-vldb19.pdf). 13 | We study the problem of optimizing subgraph queries using the new worst-case optimal join plans. Worst-case optimal plans evaluate queries by matching one query vertex at a time using multi-way intersections. The core problem in optimizing worst-case optimal plans is to pick an ordering of the query vertices to match. We design a cost-based optimizer that (i) picks efficient query vertex orderings for worst-case optimal plans; and (ii) generates hybrid plans that mix traditional binary joins with worst-case optimal style multiway intersections. Our cost metric combines the cost of binary joins with a new cost metric called intersection-cost. The plan space of our optimizer contains plans that are not in the plan spaces based on tree decompositions from prior work. 14 | 15 | DO NOT DISTRIBUTE. USE ONLY FOR ACADEMIC RESEARCH PURPOSES. 16 | 17 | Build Steps 18 | ----------------- 19 | * To do a full clean build: `./gradlew clean build installDist` 20 | * All subsequent builds: `./gradlew build installDist` 21 | 22 | Executing Queries 23 | ----------------- 24 | ### Getting Started 25 | After building, run the following command in the project root directory: 26 | ``` 27 | . ./env.sh 28 | ``` 29 | You can now move into the scripts folder to load a dataset and execute queries: 30 | ``` 31 | cd scripts 32 | ``` 33 | 34 | ### Dataset Preperation 35 | A dataset may consist of two files: (i) a vertex file, where IDs are from 0 to N and each line is of the format (ID,LABEL); and (ii) an edge file where each line is of the format (FROM,TO,LABEL). If the vertex file is omitted, all vertices are assigned the same label. We mainly used datasets from [SNAP](https://snap.stanford.edu/). The `serialize_dataset.py` script lets you load datasets from csv files and serialize them to the appropriate format for quick subsequent loading. 36 | 37 | To load and serialize a dataset from a single edges files, run the following command in the `scripts` folder: 38 | ``` 39 | python3 serialize_dataset.py /absolute/path/edges.csv /absolute/path/data 40 | ``` 41 | The system will assume that all vertices have the same label in this case. The serialized graph will be stored in the `data` directory. If the dataset consists of an edges file and a vertices file, the following command can be used instead: 42 | ``` 43 | python3 serialize_dataset.py /absolute/path/edges.csv /absolute/path/data -v /absolute/path/vertices.csv 44 | ``` 45 | After running one of the commands above, a catalog can be generated for the optimizer using the `serialize_catalog.py` script. 46 | ``` 47 | python3 serialize_catalog.py /absolute/path/data 48 | ``` 49 | 50 | ### Executing Queries 51 | Once a dataset has been prepared, executing a query is as follows: 52 | ``` 53 | python3 execute_query.py "(a)->(b),(b)->(c),(c)->(d)" /absolute/path/data 54 | ``` 55 | 56 | An output example on the dataset of Amazon0601 from [SNAP](https://snap.stanford.edu/) with 1 edge label and 1 verte label is shown below. The dataset loading time, the opimizer run time, the quey execution run time and the query plan with the number of output and intermediate tuples are logged. 57 | ``` 58 | Dataset loading run time: 626.713398 (ms) 59 | Optimizer run time: 9.745375 (ms) 60 | Plan initialization before exec run time: 9.745375 (ms) 61 | Query execution run time: 2334.2977 (ms) 62 | Number output tuples: 118175329 63 | Number intermediate tuples: 34971362 64 | Plan: SCAN (a)->(c), Single-Edge-Extend TO (b) From (a[Fwd]), Multi-Edge-Extend TO (d) From (b[Fwd]-c[Fwd]) 65 | ``` 66 | 67 | In order to invoke a multi-threaded execution, one can execute the query above with the following command to use 2 threads. 68 | ``` 69 | python3 execute_query.py "(a)->(b),(b)->(c),(c)->(d)" /absolute/path/data -t 2 70 | ``` 71 | 72 | The query above assigns an arbitrary edge and vertex labels to (a), (b), (c), (a)->(b), and (b)->(c). Use it with unlabeled datasets only. 73 | When the dataset has labels, assign labels to each vertex and edge as follows: 74 | ``` 75 | python3 execute_query.py "(a:person)-[friendof]->(b:person), (b:person)-[likes]->(c:movie)" /absolute/path/data 76 | ``` 77 | 78 | ### Requiring More Memory 79 | Note that the JVM heap by default is allocated a max of 2GB of memory. Changing the JVM heap maximum size can be done by prepending JAVA_OPTS='-Xmx500G' when calling the python scripts: 80 | ``` 81 | JAVA_OPTS='-Xmx500G' python3 serialize_catalog.py /absolute/path/data 82 | ``` 83 | 84 | Contact 85 | ----------------- 86 | [Amine Mhedhbi](http://amine.io/) 87 | 88 | License 89 | ----------------- 90 | This software is released under the Apache 2.0 license. 91 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | group 'ca.waterloo.dsg' 2 | version '0.1.0' 3 | 4 | apply plugin: 'java' 5 | apply plugin: 'antlr' 6 | apply plugin: 'maven' 7 | apply plugin: 'maven-publish' 8 | 9 | repositories { 10 | mavenCentral() 11 | } 12 | 13 | dependencies { 14 | antlr "org.antlr:antlr4:4.7" 15 | 16 | compile group: 'commons-cli', name: 'commons-cli', version: '1.4' 17 | compile 'org.apache.logging.log4j:log4j-api:2.8.2' 18 | compile 'org.apache.logging.log4j:log4j-core:2.8.2' 19 | compile 'org.projectlombok:lombok:1.18.12' 20 | 21 | testCompile 'junit:junit:4.12' 22 | testCompile 'org.assertj:assertj-core:3.8.0' 23 | testCompile 'org.mockito:mockito-core:2.9.0' 24 | } 25 | 26 | // ANTLR4 configuration. 27 | generateGrammarSource { 28 | maxHeapSize = "64m" 29 | arguments += [ 30 | "-long-messages", // show exception details 31 | "-visitor", // generate parse tree visitor classes 32 | "-package", "ca.waterloo.dsg.graphflow.grammar" // specify package for generated classes 33 | ] 34 | } 35 | // Add the source files generated by ANTLR4 to the gradle sourcesets. 36 | sourceSets.main.java.srcDirs += "$buildDir/generated-src/antlr/main" 37 | 38 | compileJava { 39 | sourceCompatibility = '11' 40 | targetCompatibility = '11' 41 | // Enable additional compile time warnings. 42 | options.compilerArgs << "-Xlint:all" << "-Xlint:-processing" << "-Xlint:-cast" << "-Xlint:-serial" 43 | // Enable incremental builds. Disable if giving errors. 44 | options.incremental = true 45 | } 46 | 47 | apply plugin: 'application' 48 | // Disable the default assembly tasks. 49 | startScripts.enabled = false 50 | run.enabled = false 51 | distZip.enabled = false 52 | distTar.enabled = false 53 | // Create the executables. 54 | def scripts = [ 55 | 'dataset-serializer' : 'ca.waterloo.dsg.graphflow.runner.dataset.DatasetSerializer', 56 | 'catalog-serializer' : 'ca.waterloo.dsg.graphflow.runner.dataset.CatalogSerializer', 57 | 'optimizer-executor' : 'ca.waterloo.dsg.graphflow.runner.plan.OptimizerExecutor' 58 | ] 59 | scripts.each() { fileName, className -> 60 | tasks.create(name: fileName, type: CreateStartScripts) { 61 | mainClassName = className 62 | applicationName = fileName 63 | outputDir = new File(project.buildDir, 'scripts') 64 | classpath = jar.outputs.files + project.configurations.runtime 65 | } 66 | applicationDistribution.into("bin") { 67 | from(tasks[fileName]) 68 | fileMode = 0755 69 | } 70 | } 71 | 72 | // Define specific gradle version. 73 | task wrapper(type: Wrapper) { 74 | gradleVersion = '4.8.1' 75 | } 76 | 77 | task sourceJar(type: Jar) { 78 | from sourceSets.main.allJava 79 | } 80 | 81 | publishing { 82 | publications { 83 | mavenJava(MavenPublication) { 84 | from components.java 85 | artifact sourceJar { 86 | classifier "sources" 87 | } 88 | } 89 | } 90 | } 91 | 92 | test { 93 | testLogging { 94 | events "skipped", "failed" 95 | exceptionFormat "full" 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /docs/img/graphflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/queryproc/optimizing-subgraph-queries-combining-binary-and-worst-case-optimal-joins/cbce978c9f05503bb85dc5aedce77ff5e487eced/docs/img/graphflow.png -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- 1 | export GRAPHFLOW_HOME=`pwd` 2 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/queryproc/optimizing-subgraph-queries-combining-binary-and-worst-case-optimal-joins/cbce978c9f05503bb85dc5aedce77ff5e487eced/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.8.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /intellij_configs/README.md: -------------------------------------------------------------------------------- 1 | # IntelliJ setup 2 | 3 | Before importing the project into IntelliJ, execute a clean build from the 4 | command line. This helps setup the code properly. 5 | 6 | ## Import source 7 | 8 | In IntelliJ: 9 | 10 | * Either "Import project", or "File -> New -> Project from existing sources..." 11 | * In the file browser, go the Graphflow source directory, and select the 12 | 'build.gradle' file. (This selection of the build.gradle file instead of just 13 | the directory is important because it informs IntelliJ that this is a Gradle 14 | project). In the 'Import Project from Gradle' dialog box, select 'use gradle 15 | wrapper task configuration', and press 'OK' 16 | 17 | This completes the basic setup of the project. You can browse the code. Follow 18 | any IntelliJ prompts. 19 | 20 | ## Create run configurations 21 | 22 | To run or debug the Server or the CLI in IntelliJ, we need to create Run 23 | configurations. 24 | 25 | * It is recommended that you link the provided configurations into the IntelliJ 26 | config directory, so that any changes made in the repo will be automatically 27 | applied by IntelliJ. From the root directory, run: `ln -s ../intellij_configs/runConfigurations .idea` 28 | * Alternatively, copy the configurations: `cp -r intellij_configs/runConfigurations .idea` 29 | * In IntelliJ, "Run -> Run... -> GraphflowServerRunner/GraphflowCliRunner" 30 | 31 | To see/edit the actual configuration, go to "Run -> Edit Configurations..." 32 | 33 | ## Import style settings 34 | 35 | Import the IntelliJ code style configuration for auto-formatting code. 36 | 37 | * "File -> Settings -> Editor -> Code Style -> Java -> Manage... -> Import... -> IntelliJ IDEA code style XML" 38 | * Browse to the source directory and select 'intellij_configs/graphflow_code_style_guide.xml' 39 | * Press "OK" everywhere and exit the settings. 40 | 41 | You can now use 'Alt + Shift + L' to format code automatically in IntelliJ. 42 | 43 | ## Code inspections 44 | 45 | Import `intellij_configs/graphflow_inspections.xml` using "File -> Inspections -> Profile (gear icon) -> 46 | Import profile..." into IntelliJ to highlight code that deviates from the project style guidelines. 47 | 48 | ## Known Issues and Solutions 49 | 50 | - You may run into the following error when trying to build if your IntelliJ version is outdated: 51 | ``` 52 | Exception in thread “main” java.lang.NoClassDefFoundError: io/grpc/BindableService 53 | ``` 54 | This is detailed [here](https://intellij-support.jetbrains.com/hc/en-us/community/posts/115000364850-Gradle-integration-external-dependencies-not-in-classpath-on-run-debug). 55 | Simply update your IntelliJ to the 2017+ version. 56 | 57 | - The gradle version is updated regularly. This can cause various build issues at times on IntelliJ if it is 58 | not refreshed from the Gradle projects in the right side pane. 59 | -------------------------------------------------------------------------------- /intellij_configs/graphflow_code_style_guide.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /intellij_configs/runConfigurations/GraphflowCliRunner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 8 | 9 | 24 | -------------------------------------------------------------------------------- /intellij_configs/runConfigurations/GraphflowServerRunner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 8 | 9 | 24 | -------------------------------------------------------------------------------- /scripts/change_snap_to_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Given an absolute path to a directory containing '.txt' files from The Stanford 4 | Large Network Dataset Collection, the script outputs for each file in the 5 | directory an edges '.csv' files. The CSV is later loaded to graphflow and saved 6 | to a directory in binary format. 7 | """ 8 | import sys 9 | import argparse 10 | import random 11 | from os import listdir 12 | from os.path import isfile, join 13 | 14 | def main(): 15 | args = parse_args() 16 | highestVertexId = produce_edges_file(args.input_file, 17 | args.output_edges_file, args.separator, args.label) 18 | if args.output_vertices_file: 19 | pass 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser(description='reads ') 23 | parser.add_argument('input_file', 24 | help='the raw input file using absolute path') 25 | parser.add_argument('output_edges_file', 26 | help='the csv edges output file using absolute path.') 27 | parser.add_argument('-o', '--output_vertices_file', 28 | help='the csv vertices output file using absolute path.') 29 | parser.add_argument('-s', '--separator', 30 | help='separator between vertices in each line.', default='\t') 31 | parser.add_argument('-t', '--type', 32 | help='number of vertex types.', type=int, default=1) 33 | parser.add_argument('-l', '--label', 34 | help='number of edge labels.', type=int, default=1) 35 | return parser.parse_args() 36 | 37 | def produce_edges_file(input_file, output_file, separator, num_of_labels): 38 | edges_file = open(output_file, 'w+') 39 | highestVertexId = -1 40 | # format file written as: FROM,TO,LABEL. 41 | random.seed(0) # use '0' to always get the same sequence of labels 42 | with open(input_file) as f: 43 | for line in f: 44 | if line[0] == '#': # read comment and remove, process the rest. 45 | continue 46 | try: 47 | edge = line.split(separator) 48 | if len(edge) == 1: 49 | edge = line.split(' ') # edge=['','\n'] 50 | fromVertex = edge[0] 51 | toVertex = edge[1] 52 | toVertex = toVertex[:len(toVertex)-1] # removes '\n' 53 | if int(fromVertex) > highestVertexId: 54 | highestVertexId = int(fromVertex) 55 | if int(toVertex) > highestVertexId: 56 | highestVertexId = int(toVertex) 57 | except Exception: # does not follow the usual csv pattern 58 | continue 59 | if fromVertex == toVertex: # remove self-loops 60 | continue 61 | edge_label = random.randint(0, num_of_labels - 1) 62 | edges_file.write(fromVertex + ',' + toVertex + ',' + \ 63 | str(edge_label) + '\n') 64 | edges_file.close() 65 | 66 | def produce_vertices_file(input_file, output_file, separator, num_of_types, 67 | highestVertexId): 68 | vertices_file = open(output_file, 'w+') 69 | # format file written as: VERTEX_ID,TYPE. 70 | random.seed(0) # use '0' to always get the same sequence of types 71 | for vertexId in range(0, highestVertexId + 1): 72 | vertex_type = random.randint(0, num_of_types - 1) 73 | vertices_file.write(str(vertexId) + ',' + str(vertex_type) + '\n') 74 | vertices_file.close() 75 | 76 | if __name__ == '__main__': 77 | main() 78 | -------------------------------------------------------------------------------- /scripts/execute_query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import glob 4 | import subprocess 5 | import argparse 6 | import shutil 7 | import random 8 | 9 | bin_home = os.environ['GRAPHFLOW_HOME'] + '/build/install/graphflow/bin/' 10 | 11 | def main(): 12 | args = parse_args() 13 | print(args.query) 14 | # set OptimizerRunner.java arguments and exectue the binary. 15 | binary_and_args = [ 16 | bin_home + 'optimizer-executor', 17 | '-q', args.query, '-i', args.input_graph, '-t', str(args.threads), '-e'] 18 | # OptimizerExecutor from 19 | # Graphflow-Optimizers/src/ca.waterloo.dsg.graphflow.runner.plan: 20 | # 1) gets a query plan using QueryPlanner. 21 | # 2) Output is logged to STDOUT. 22 | popen = subprocess.Popen(tuple(binary_and_args), stdout=subprocess.PIPE) 23 | popen.wait() 24 | for line in iter(popen.stdout.readline, b''): 25 | print(line.decode("utf-8"), end='') 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser( 29 | description='runs the optimizer to evaluate a query.') 30 | parser.add_argument('query', help='query graph to evaluate.') 31 | parser.add_argument('input_graph', 32 | help='aboluste path to the serialized input graph directory.') 33 | parser.add_argument('-t', '--threads', 34 | help='number of execution threads.', type=int, default=1) 35 | parser.add_argument('-e', '--execute', 36 | help='execute the plan.', action="store_true") 37 | return parser.parse_args() 38 | 39 | if __name__ == '__main__': 40 | main() 41 | 42 | -------------------------------------------------------------------------------- /scripts/serialize_catalog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import subprocess 4 | import argparse 5 | 6 | bin_home = os.environ['GRAPHFLOW_HOME'] + '/build/install/graphflow/bin/' 7 | 8 | def main(): 9 | args = parse_args() 10 | # set SerializeCatalogRunner.java arguments and exectue the binary. 11 | serialize_catalog_runner = [ 12 | bin_home + 'catalog-serializer', '-i', args.input_graph] 13 | if args.num_edges: 14 | serialize_catalog_runner.append('-n') 15 | serialize_catalog_runner.append(str(args.num_edges)) 16 | if args.vertices: 17 | serialize_catalog_runner.append('-v') 18 | serialize_catalog_runner.append(str(args.vertices)) 19 | if args.threads: 20 | serialize_catalog_runner.append('-t') 21 | serialize_catalog_runner.append(str(args.threads)) 22 | 23 | # SerializeCatalogRunner from 24 | # Graphflow-Optimizers/src/ca.waterloo.dsg.graphflow.runner.plan: 25 | # 1) loads the serialized input graph. 26 | # 2) gets stats about the dataset for the optimizer. 27 | # 3) serealizes the stats data. 28 | popen = subprocess.Popen( 29 | tuple(serialize_catalog_runner), stdout=subprocess.PIPE) 30 | popen.wait() 31 | for line in iter(popen.stdout.readline, b''): 32 | print(line.decode("utf-8"), end='') 33 | 34 | def parse_args(): 35 | parser = argparse.ArgumentParser( 36 | description='loads the serialized input graph.') 37 | parser.add_argument('input_graph', 38 | help='aboluste path to serialized input graph directory.') 39 | parser.add_argument('-n', '--num_edges', 40 | help='number of edges to sample when scanning for the catalog stats.', 41 | type=int) 42 | parser.add_argument('-v', '--vertices', 43 | help='max number of vertices input subgraphs when collecting stats.', 44 | type=int) 45 | parser.add_argument('-t', '--threads', 46 | help='number of threads to use when parallelizing stats collection.', 47 | type=int) 48 | return parser.parse_args() 49 | 50 | if __name__ == '__main__': 51 | main() 52 | 53 | -------------------------------------------------------------------------------- /scripts/serialize_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import subprocess 4 | import argparse 5 | 6 | bin_home = os.environ['GRAPHFLOW_HOME'] + '/build/install/graphflow/bin/' 7 | 8 | def main(): 9 | args = parse_args() 10 | # set SerializeDatasetRunner.java arguments and exectue the binary. 11 | dataset_serializer = [ 12 | bin_home + 'dataset-serializer', 13 | '-e', args.input_file_edges, '-o', args.output_graph] 14 | if args.input_file_vertices: 15 | dataset_serializer.extend(['-v', args.input_file_vertices]) 16 | if args.edges_file_separator: 17 | dataset_serializer.extend(['-m', args.edges_file_separator]) 18 | if args.vertices_file_separator: 19 | dataset_serializer.extend(['-n', args.vertices_file_separator]) 20 | 21 | # SerializeDatasetRunner from 22 | # Graphflow-Optimizers/src/ca.waterloo.dsg.graphflow.runner. 23 | # dataset.DatasetSerializer: 24 | # 1) loads the csv file. 25 | # 2) gets stats about the dataset for the optimizer. 26 | # 3) serealizes the graph, the stats, and type store. 27 | popen = subprocess.Popen( 28 | tuple(dataset_serializer), stdout=subprocess.PIPE) 29 | popen.wait() 30 | for line in iter(popen.stdout.readline, b''): 31 | print(line.decode("utf-8"), end='') 32 | 33 | def parse_args(): 34 | parser = argparse.ArgumentParser( 35 | description='loads the csv files as a graph and serialize it.') 36 | parser.add_argument('input_file_edges', 37 | help='absolute path to the input edges csv file.') 38 | parser.add_argument('output_graph', 39 | help='aboluste path to the output serialized graph directory.') 40 | parser.add_argument('-v', '--input_file_vertices', 41 | help='absolute path to the input vertices csv file.') 42 | parser.add_argument('-e', '--edges_file_separator', 43 | help='csv separator in the input edges csv file.') 44 | parser.add_argument('-s', '--vertices_file_separator', 45 | help='csv separator in the input vertices csv file.') 46 | return parser.parse_args() 47 | 48 | if __name__ == '__main__': 49 | main() 50 | 51 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'graphflow' 2 | -------------------------------------------------------------------------------- /src/main/antlr/ca/waterloo/dsg/graphflow/grammar/Graphflow.g4: -------------------------------------------------------------------------------- 1 | grammar Graphflow; 2 | 3 | graphflow : whitespace? matchPattern whitespace? (LIMIT whitespace Digits whitespace?)? (SEMICOLON whitespace?)? EOF ; 4 | 5 | matchPattern : edge ( whitespace? COMMA whitespace? edge )* ; 6 | edge : vertex whitespace? DASH label? GREATER_THAN vertex ; 7 | vertex : OPEN_ROUND_BRACKET whitespace? variable (type)? whitespace? CLOSE_ROUND_BRACKET ; 8 | type : whitespace? COLON whitespace? variable ; 9 | label : OPEN_SQUARE_BRACKET variable CLOSE_SQUARE_BRACKET DASH; 10 | variable : ( Digits | Characters | UNDERSCORE ) ( Digits | Characters | UNDERSCORE )* ; 11 | whitespace : ( SPACE | TAB | CARRIAGE_RETURN | LINE_FEED | FORM_FEED | Comment )+ ; 12 | 13 | /*********** Lexer rules ***********/ 14 | 15 | LIMIT : L I M I T ; 16 | 17 | fragment EscapedChar : TAB | CARRIAGE_RETURN | LINE_FEED | BACKSPACE | FORM_FEED | '\\' ( '"' | '\'' | '\\' ) ; 18 | 19 | QuotedCharacter : SINGLE_QUOTE ( EscapedChar | ~( '\\' | '\'' ) ) SINGLE_QUOTE ; 20 | QuotedString : DOUBLE_QUOTE ( EscapedChar | ~( '"' ) )* DOUBLE_QUOTE 21 | | SINGLE_QUOTE ( EscapedChar | ~( '\'' ) )* SINGLE_QUOTE ; 22 | 23 | Comment : '/*' .*? '*/' 24 | | '//' ~( '\n' | '\r' )* '\r'? ( '\n' | EOF ) ; 25 | 26 | SPACE : [ ] ; 27 | TAB : [\t] ; 28 | LINE_FEED : [\n] ; 29 | FORM_FEED : [\f] ; 30 | BACKSPACE : [\b] ; 31 | VERTICAL_TAB : [\u000B] ; 32 | CARRIAGE_RETURN : [\r] ; 33 | 34 | DASH : '-' ; 35 | UNDERSCORE : '_' ; 36 | FORWARD_SLASH : '/' ; 37 | BACKWARD_SLASH : '\\' ; 38 | SEMICOLON: ';' ; 39 | COMMA : ',' ; 40 | COLON : ':' ; 41 | SINGLE_QUOTE : '\'' ; 42 | DOUBLE_QUOTE : '"' ; 43 | OPEN_ROUND_BRACKET : '(' ; 44 | CLOSE_ROUND_BRACKET : ')' ; 45 | OPEN_SQUARE_BRACKET : '[' ; 46 | CLOSE_SQUARE_BRACKET : ']' ; 47 | GREATER_THAN : '>' ; 48 | 49 | Digits : ( Digit )+ ; 50 | Characters : ( Character )+ ; 51 | 52 | fragment Digit : '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ; 53 | 54 | fragment Character : A | B | C | D | E | F | G | H | I | J | K | L | M | 55 | N | O | P | Q | R | S | T | U | V | W | X | Y | Z ; 56 | fragment A : ('a'|'A') ; 57 | fragment B : ('b'|'B') ; 58 | fragment C : ('c'|'C') ; 59 | fragment D : ('d'|'D') ; 60 | fragment E : ('e'|'E') ; 61 | fragment F : ('f'|'F') ; 62 | fragment G : ('g'|'G') ; 63 | fragment H : ('h'|'H') ; 64 | fragment I : ('i'|'I') ; 65 | fragment J : ('j'|'J') ; 66 | fragment K : ('k'|'K') ; 67 | fragment L : ('l'|'L') ; 68 | fragment M : ('m'|'M') ; 69 | fragment N : ('n'|'N') ; 70 | fragment O : ('o'|'O') ; 71 | fragment P : ('p'|'P') ; 72 | fragment Q : ('q'|'Q') ; 73 | fragment R : ('r'|'R') ; 74 | fragment S : ('s'|'S') ; 75 | fragment T : ('t'|'T') ; 76 | fragment U : ('u'|'U') ; 77 | fragment V : ('v'|'V') ; 78 | fragment W : ('w'|'W') ; 79 | fragment X : ('x'|'X') ; 80 | fragment Y : ('y'|'Y') ; 81 | fragment Z : ('z'|'Z') ; 82 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/Plan.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.plan.operator.Operator.LimitExceededException; 5 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.Build; 6 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.HashTable; 7 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.Probe; 8 | import ca.waterloo.dsg.graphflow.plan.operator.scan.Scan; 9 | import ca.waterloo.dsg.graphflow.plan.operator.scan.ScanSampling; 10 | import ca.waterloo.dsg.graphflow.plan.operator.sink.Sink; 11 | import ca.waterloo.dsg.graphflow.plan.operator.sink.Sink.SinkType; 12 | import ca.waterloo.dsg.graphflow.plan.operator.sink.SinkLimit; 13 | import ca.waterloo.dsg.graphflow.storage.Graph; 14 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 15 | import ca.waterloo.dsg.graphflow.util.IOUtils; 16 | import ca.waterloo.dsg.graphflow.util.container.Triple; 17 | import lombok.Getter; 18 | import lombok.Setter; 19 | 20 | import java.io.Serializable; 21 | import java.util.ArrayList; 22 | import java.util.HashMap; 23 | import java.util.List; 24 | import java.util.Map; 25 | import java.util.StringJoiner; 26 | 27 | /** 28 | * Query Plan (QP) representing left-deep binary plans, bushy binary plans, worst-case optimal 29 | * plans, and hybrid plans. 30 | */ 31 | public class Plan implements Serializable { 32 | 33 | @Getter private Sink sink; 34 | @Setter public SinkType sinkType = SinkType.COUNTER; 35 | @Getter public ScanSampling scanSampling; 36 | @Getter private Operator lastOperator; 37 | @Setter public int outTuplesLimit; 38 | 39 | @Getter private double elapsedTime = 0; 40 | @Getter private long icost = 0; 41 | @Getter private long numIntermediateTuples = 0; 42 | @Getter private long numOutTuples = 0; 43 | @Getter transient private List> operatorMetrics = new ArrayList<>(); 45 | 46 | private boolean executed = false; 47 | @Getter private boolean adaptiveEnabled = false; 48 | 49 | @Getter List subplans = new ArrayList<>(); 50 | private List probes; 51 | 52 | @Getter @Setter double estimatedICost; 53 | @Getter @Setter double estimatedNumOutTuples; 54 | @Getter @Setter Map qVertexToNumOutTuples; 55 | 56 | /** 57 | * Constructs a {@link Plan} object. 58 | */ 59 | public Plan(ScanSampling scan) { 60 | this.scanSampling = scan; 61 | var lastOperators = new ArrayList(); 62 | scan.getLastOperators(lastOperators); 63 | var outSubgraph = lastOperators.get(0).getOutSubgraph(); 64 | sink = new Sink(outSubgraph); 65 | sink.previous = lastOperators.toArray(new Operator[0]); 66 | for (var lastOperator : lastOperators) { 67 | lastOperator.setNext(sink); 68 | } 69 | } 70 | 71 | /** 72 | * Constructs a {@link Plan} object. 73 | * 74 | * @param lastOperator is the operator to execute. 75 | */ 76 | public Plan(Operator lastOperator) { 77 | this.subplans.add(lastOperator); 78 | this.lastOperator = lastOperator; 79 | } 80 | 81 | /** 82 | * Constructs a {@link Plan} object. 83 | * 84 | * @param subplans are the setAdjListSortOrder of linear subplans making up the query plan. 85 | */ 86 | public Plan(List subplans) { 87 | this.subplans = subplans; 88 | this.lastOperator = subplans.get(subplans.size() - 1); 89 | } 90 | 91 | /** 92 | * Constructs a {@link Plan} object. 93 | * 94 | * @param lastOperator is the scan operator to execute. 95 | * @param estimatedNumOutTuples is the number of output tuples from the scan. 96 | */ 97 | public Plan(Scan lastOperator, double estimatedNumOutTuples) { 98 | this(lastOperator); 99 | this.estimatedNumOutTuples = estimatedNumOutTuples; 100 | qVertexToNumOutTuples = new HashMap<>(); 101 | qVertexToNumOutTuples.put(lastOperator.getFromQueryVertex(), estimatedNumOutTuples); 102 | qVertexToNumOutTuples.put(lastOperator.getToQueryVertex(), estimatedNumOutTuples); 103 | } 104 | 105 | public void append(Operator newOperator) { 106 | lastOperator.setNext(newOperator); 107 | newOperator.setPrev(lastOperator); 108 | subplans.set(subplans.size() - 1, newOperator); 109 | lastOperator = newOperator; 110 | } 111 | 112 | /** 113 | * Executes the {@link Plan}. 114 | */ 115 | public void execute() { 116 | if (SinkType.LIMIT != sinkType) { 117 | var startTime = System.nanoTime(); 118 | try { 119 | for (var subplan : subplans) { 120 | subplan.execute(); 121 | } 122 | } catch (LimitExceededException e) {} // never thrown. 123 | elapsedTime = IOUtils.getElapsedTimeInMillis(startTime); 124 | } else { 125 | ((SinkLimit) sink).setStartTime(System.nanoTime()); 126 | try { 127 | for (var subplan : subplans) { 128 | subplan.execute(); 129 | } 130 | } catch (LimitExceededException e) {} // never thrown. 131 | elapsedTime = ((SinkLimit) sink).getElapsedTime(); 132 | } 133 | executed = true; 134 | numOutTuples = sink.getNumOutTuples(); 135 | } 136 | 137 | /** 138 | * Initialize the plan by initializing all of its operators. 139 | * 140 | * @param graph is the input data graph. 141 | * @param store is the labels and types key store. 142 | */ 143 | public void init(Graph graph, KeyStore store) { 144 | var lastOperator = subplans.get(subplans.size() - 1); 145 | var queryGraph = lastOperator.getOutSubgraph(); 146 | switch(sinkType) { 147 | case LIMIT: 148 | sink = new SinkLimit(queryGraph, outTuplesLimit); 149 | break; 150 | case COUNTER: 151 | default: 152 | sink = new Sink(queryGraph); 153 | break; 154 | } 155 | sink.setPrev(lastOperator); 156 | lastOperator.setNext(sink); 157 | probes = new ArrayList<>(); 158 | for (int i = 1; i < subplans.size(); i++) { 159 | var operator = subplans.get(i); 160 | if (operator instanceof Probe) { 161 | probes.add((Probe) operator); 162 | } 163 | while (null != operator.getPrev()) { 164 | operator = operator.getPrev(); 165 | if (operator instanceof Probe) { 166 | probes.add((Probe) operator); 167 | } 168 | } 169 | } 170 | for (int i = 0; i < subplans.size() - 1; i++) { 171 | var build = (Build) subplans.get(i); 172 | var hashTable = new HashTable(build.getBuildHashIdx(), build.getHashedTupleLen()); 173 | build.setHashTable(hashTable); 174 | } 175 | for (var subplan : subplans) { 176 | var probeTuple = new int[subplan.getOutTupleLen()]; 177 | var firstOperator = subplan; 178 | while (null != firstOperator.getPrev()) { 179 | firstOperator = firstOperator.getPrev(); 180 | } 181 | firstOperator.init(probeTuple, graph, store); 182 | } 183 | } 184 | 185 | void setProbeHashTables(int ID, HashTable[] hashTables) { 186 | for (var probe : probes) { 187 | if (probe.getID() == ID) { 188 | probe.setHashTables(hashTables); 189 | } 190 | } 191 | } 192 | 193 | /** 194 | * @return The stats as a one line comma separated CSV one line row for logging. 195 | */ 196 | public String getOutputLog() { 197 | if (null == operatorMetrics) { 198 | operatorMetrics = new ArrayList<>(); 199 | } 200 | setStats(); 201 | var strJoiner = new StringJoiner(","); 202 | if (executed) { 203 | strJoiner.add(String.format("%.4f", elapsedTime)); 204 | strJoiner.add(String.format("%d", numOutTuples)); 205 | strJoiner.add(String.format("%d", numIntermediateTuples)); 206 | strJoiner.add(String.format("%d", icost)); 207 | } 208 | for (var operatorMetric : operatorMetrics) { 209 | strJoiner.add(String.format("%s", operatorMetric.a)); /* operator name */ 210 | } 211 | return strJoiner.toString() + "\n"; 212 | } 213 | 214 | void setStats() { 215 | for (var subplan : subplans) { 216 | var firstOperator = subplan; 217 | while (null != firstOperator.getPrev()) { 218 | firstOperator = firstOperator.getPrev(); 219 | } 220 | firstOperator.getOperatorMetricsNextOperators(operatorMetrics); 221 | } 222 | for (var i = 0; i < operatorMetrics.size() - 1; ++i) { 223 | icost += operatorMetrics.get(i).b; 224 | numIntermediateTuples += operatorMetrics.get(i).c; 225 | } 226 | icost += operatorMetrics.get(operatorMetrics.size() - 1).b; 227 | } 228 | 229 | /** 230 | * Checks if the two plans are equivalent. 231 | * 232 | * @param otherQueryPlan is the other query plan to compare against. 233 | * @return True, if the two plans are equivalent. False, otherwise. 234 | */ 235 | public boolean isSameAs(Plan otherQueryPlan) { 236 | if (subplans.size() != otherQueryPlan.subplans.size()) { 237 | return false; 238 | } 239 | for (var i = 0; i < subplans.size(); i++) { 240 | if (!subplans.get(i).isSameAs(otherQueryPlan.subplans.get(i))) { 241 | return false; 242 | } 243 | } 244 | return true; 245 | } 246 | 247 | /** 248 | * Shallow copy of the query plan. 249 | */ 250 | public Plan shallowCopy() { 251 | return new Plan(new ArrayList<>(this.subplans)); 252 | } 253 | 254 | /** 255 | * Deep copy of the query plan. 256 | * 257 | * @param isThreadSafe specifies whether scans and hash joins are thread safe or not. 258 | */ 259 | public Plan copy(boolean isThreadSafe) { 260 | var subplans = new ArrayList(this.subplans.size()); 261 | for (var subplan : this.subplans) { 262 | subplans.add(subplan.copy(isThreadSafe)); 263 | } 264 | return new Plan(subplans); 265 | } 266 | 267 | /** 268 | * Deep copy of the query plan. 269 | */ 270 | public Plan copy() { 271 | return copy(false); 272 | } 273 | 274 | public Plan copyCatalogPlan() { 275 | return new Plan(scanSampling.copy()); 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/Workers.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator.LimitExceededException; 4 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.Build; 5 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.HashTable; 6 | import ca.waterloo.dsg.graphflow.plan.operator.scan.ScanBlocking; 7 | import ca.waterloo.dsg.graphflow.plan.operator.scan.ScanBlocking.VertexIdxLimits; 8 | import ca.waterloo.dsg.graphflow.storage.Graph; 9 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 10 | import ca.waterloo.dsg.graphflow.util.IOUtils; 11 | import ca.waterloo.dsg.graphflow.util.container.Triple; 12 | import lombok.Getter; 13 | import org.apache.logging.log4j.LogManager; 14 | import org.apache.logging.log4j.Logger; 15 | 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | import java.util.StringJoiner; 19 | 20 | /** 21 | * Query plan workers execute a query plan in parallel given a number of threads. 22 | */ 23 | public class Workers { 24 | 25 | protected static final Logger logger = LogManager.getLogger(Workers.class); 26 | 27 | private Plan[] queryPlans; 28 | private Thread[][] workers; 29 | private int numThreads = 1; 30 | 31 | @Getter private double elapsedTime = 0; 32 | private long intersectionCost = 0; 33 | private long numIntermediateTuples = 0; 34 | private long numOutTuples = 0; 35 | transient private List> operatorMetrics; 37 | 38 | /** 39 | * Constructs a {@link Workers} object. 40 | * 41 | * @param queryPlan is the query plan to execute. 42 | * @param numThreads is the number of threads to use executing the query. 43 | */ 44 | public Workers(Plan queryPlan, int numThreads) { 45 | queryPlans = new Plan[numThreads]; 46 | // if (numThreads == 1) { 47 | // queryPlans[0] = queryPlan.copy(true /* isThreadSafe */); 48 | // } else { // numThreads > 1 49 | for (int i = 0; i < numThreads; i++) { 50 | queryPlans[i] = queryPlan.copy(true /* isThreadSafe */); 51 | } 52 | this.numThreads = numThreads; 53 | var numSubplans = queryPlans[0].getSubplans().size(); 54 | workers = new Thread[numSubplans][numThreads]; 55 | for (var i = 0; i < queryPlans.length; i++) { 56 | var subplans = queryPlans[i].getSubplans(); 57 | for (var subplanId = 0; subplanId < numSubplans; subplanId++) { 58 | var operator = subplans.get(subplanId); 59 | Runnable runnable = () -> { 60 | try { operator.execute(); } catch (LimitExceededException e) {} 61 | }; 62 | workers[subplanId][i] = new Thread(runnable); 63 | } 64 | } 65 | for (var i = 0; i < numSubplans; i++) { 66 | var globalVertexIdxLimits = new VertexIdxLimits(); 67 | for (var plan : queryPlans) { 68 | var operator = plan.subplans.get(i); 69 | while (null != operator.getPrev()) { 70 | operator = operator.getPrev(); 71 | } 72 | if (operator instanceof ScanBlocking) { 73 | ((ScanBlocking) operator).setGlobalVerticesIdxLimits(globalVertexIdxLimits); 74 | } 75 | } 76 | } 77 | // } 78 | } 79 | 80 | public void init(Graph graph, KeyStore store) { 81 | for (var queryPlan : queryPlans) { 82 | queryPlan.init(graph, store); 83 | } 84 | var numBuildOperators = queryPlans[0].getSubplans().size() - 1; 85 | for (var buildIdx = 0; buildIdx < numBuildOperators; buildIdx++) { 86 | var ID = ((Build) queryPlans[0].getSubplans().get(buildIdx)).getID(); 87 | var hashTables = new HashTable[numThreads]; 88 | for (var i = 0; i < queryPlans.length; i++) { 89 | hashTables[i] = ((Build) queryPlans[i].getSubplans().get(buildIdx)).getHashTable(); 90 | } 91 | for (var queryPlan : queryPlans) { 92 | queryPlan.setProbeHashTables(ID, hashTables); 93 | } 94 | } 95 | } 96 | 97 | public void execute() throws InterruptedException { 98 | if (queryPlans.length == 1) { 99 | queryPlans[0].execute(); 100 | elapsedTime = queryPlans[0].getElapsedTime(); 101 | } else { 102 | var beginTime = System.nanoTime(); 103 | for (var subplanWorkers : workers) { 104 | for (int j = 0; j < queryPlans.length; j++) { 105 | subplanWorkers[j].start(); 106 | } 107 | for (int j = 0; j < queryPlans.length; j++) { 108 | subplanWorkers[j].join(); 109 | } 110 | } 111 | elapsedTime = IOUtils.getElapsedTimeInMillis(beginTime); 112 | } 113 | } 114 | 115 | /** 116 | * @return The stats as a one line comma separated CSV one line row for logging. 117 | */ 118 | public String getOutputLog() { 119 | if (queryPlans.length == 1) { 120 | return queryPlans[0].getOutputLog(); 121 | } 122 | if (null == operatorMetrics) { 123 | operatorMetrics = new ArrayList<>(); 124 | for (var queryPlan : queryPlans) { 125 | queryPlan.setStats(); 126 | } 127 | aggregateOutput(); 128 | } 129 | var strJoiner = new StringJoiner(","); 130 | strJoiner.add(String.format("%.4f", elapsedTime)); 131 | strJoiner.add(String.format("%d", numOutTuples)); 132 | strJoiner.add(String.format("%d", numIntermediateTuples)); 133 | strJoiner.add(String.format("%d", intersectionCost)); 134 | for (var operatorMetric : operatorMetrics) { 135 | strJoiner.add(String.format("%s", operatorMetric.a)); /* operator name */ 136 | } 137 | return strJoiner.toString() + "\n"; 138 | } 139 | 140 | private void aggregateOutput() { 141 | operatorMetrics = new ArrayList<>(); 142 | for (var queryPlan : queryPlans) { 143 | intersectionCost += queryPlan.getIcost(); 144 | numIntermediateTuples += queryPlan.getNumIntermediateTuples(); 145 | numOutTuples += queryPlan.getLastOperator().getNumOutTuples(); 146 | } 147 | var queryPlan = queryPlans[0]; 148 | for (var metric : queryPlan.getOperatorMetrics()) { 149 | operatorMetrics.add(new Triple<>(metric.a, metric.b, metric.c)); 150 | } 151 | for (int i = 1; i < queryPlans.length; i++) { 152 | for (int j = 0; j < operatorMetrics.size(); j++) { 153 | operatorMetrics.get(j).b += queryPlans[i].getOperatorMetrics().get(j).b; 154 | operatorMetrics.get(j).c += queryPlans[i].getOperatorMetrics().get(j).c; 155 | } 156 | } 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/AdjListDescriptor.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator; 2 | 3 | import ca.waterloo.dsg.graphflow.storage.Graph.Direction; 4 | import lombok.Getter; 5 | 6 | import java.io.Serializable; 7 | 8 | /** 9 | * An adjacency list descriptor consists of the following: 10 | * (1) A from variable indicating the vertex in the query graph that is being extended from. 11 | * (2) An index indicating the vertex value position in the processing tuple from which we extend. 12 | * (3) A direction which indicates whether to extend from fwd or bwd adj list. 13 | * (4) An edge label. 14 | */ 15 | public class AdjListDescriptor implements Serializable { 16 | 17 | @Getter private String fromQueryVertex; 18 | @Getter private int vertexIdx; 19 | @Getter private Direction direction; 20 | @Getter private short label; 21 | 22 | /** 23 | * Constructs an {@link AdjListDescriptor} object. 24 | * 25 | * @param fromQueryVertex is the from variable to extend from. 26 | * @param vertexIdx is the index in the tuple indicating the vertex from which we extend. 27 | * @param dir is the direction of extension. 28 | * @param label is the edge label. 29 | */ 30 | public AdjListDescriptor(String fromQueryVertex, int vertexIdx, Direction dir, short label) { 31 | this.fromQueryVertex = fromQueryVertex; 32 | this.vertexIdx = vertexIdx; 33 | this.direction = dir; 34 | this.label = label; 35 | } 36 | 37 | @Override 38 | public String toString() { 39 | return (-1 != vertexIdx ? "vertexIdx: " + vertexIdx : "") + 40 | ", fromQueryVertex: " + fromQueryVertex + 41 | ", direction: " + direction.name() + ", and " + 42 | (label == -1 ? "no label" : "internal label: " + label + "."); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/Operator.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.sink.Sink; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | import ca.waterloo.dsg.graphflow.util.container.Triple; 8 | import lombok.Getter; 9 | import lombok.Setter; 10 | 11 | import java.io.Serializable; 12 | import java.util.List; 13 | import java.util.Map; 14 | import java.util.Set; 15 | 16 | /** 17 | * Base class for all database operators. 18 | */ 19 | public abstract class Operator implements Serializable { 20 | 21 | protected final boolean IS_PROFILED = false; 22 | 23 | /** 24 | * Limit exception thrown for LIMIT queries. 25 | */ 26 | public static class LimitExceededException extends Exception {} 27 | 28 | public static boolean CACHING_ENABLED = true; 29 | 30 | @Getter protected String name; 31 | @Getter protected Operator[] next; 32 | @Getter @Setter protected Operator prev; 33 | 34 | @Setter protected int[] probeTuple; 35 | 36 | @Getter protected int outTupleLen; 37 | @Getter protected QueryGraph inSubgraph; 38 | @Getter @Setter protected QueryGraph outSubgraph; 39 | @Getter @Setter protected Map outQVertexToIdxMap; 40 | @Getter @Setter protected int lastRepeatedVertexIdx; 41 | 42 | @Getter protected long numOutTuples = 0; 43 | @Getter protected long icost = 0; 44 | 45 | /** 46 | * Constructs an {@link Operator} object. 47 | * 48 | * @param outSubgraph The subgraph matched by the output tuples. 49 | * @param inSubgraph The subgraph matched by the input tuples. 50 | */ 51 | protected Operator(QueryGraph outSubgraph, QueryGraph inSubgraph) { 52 | this.outSubgraph = outSubgraph; 53 | this.inSubgraph = inSubgraph; 54 | this.outTupleLen = outSubgraph.getNumVertices(); 55 | } 56 | 57 | public Set getOutQVertices() { 58 | return outQVertexToIdxMap.keySet(); 59 | } 60 | 61 | /** 62 | * Constructs an {@link Operator} object. 63 | */ 64 | protected Operator() {} 65 | 66 | /** 67 | * Initialize the operator e.g. memory allocation. 68 | * 69 | * @param probeTuple is the tuple processed throughout the query plan. 70 | * @param graph is the input data graph. 71 | * @param store is the labels and types key store. 72 | */ 73 | public abstract void init(int[] probeTuple, Graph graph, KeyStore store); 74 | 75 | /** 76 | * Checks if the two plans, the one with this operator as root and the one with root 77 | * as passed operator are the same plans. The function relies in its checks on a set of 78 | * invariants across the code base for each operator. 79 | * 80 | * @param operator The other operator to compare against. 81 | * @return True, if the plans with these operators as root are the same. False, otherwise. 82 | */ 83 | public boolean isSameAs(Operator operator) { 84 | throw new UnsupportedOperationException(); 85 | } 86 | 87 | /** 88 | * @param index The index of the next operator to return. 89 | * @return The {@link Operator} at the given index. 90 | */ 91 | public Operator getNext(int index) { 92 | return next[index]; 93 | } 94 | 95 | /** 96 | * @param operator The next operator to append prefixes to. 97 | */ 98 | public void setNext(Operator operator) { 99 | next = new Operator[] { operator }; 100 | } 101 | 102 | /** 103 | * @param operators The next operator to append prefixes to. 104 | */ 105 | public void setNext(Operator[] operators) { 106 | next = operators; 107 | } 108 | 109 | /** 110 | * Process a new tuple and push the produced tuples to the next operator. 111 | */ 112 | public abstract void processNewTuple() throws LimitExceededException; 113 | 114 | /** 115 | * Executes the operator. 116 | */ 117 | public void execute() throws LimitExceededException { 118 | if (null != prev) { 119 | prev.execute(); 120 | } 121 | } 122 | 123 | public String getALDsAsString() { 124 | return ""; 125 | } 126 | 127 | public void updateOperatorName(Map queryVertexToIndexMap) { 128 | throw new UnsupportedOperationException(this.getClass().getSimpleName() + 129 | " does not support updateOperatorName(Map queryVertexToIndexMap)."); 130 | } 131 | 132 | /** 133 | * Fills the operator metrics and recursively calls its prev operators to do the same. 134 | * 135 | * @param operatorMetrics The List of triple {@code String} operator name, {@code Long} 136 | * intersection cost, and {@code Long} probeTuple output size. 137 | */ 138 | public void getOperatorMetricsNextOperators(List> operatorMetrics) { 139 | operatorMetrics.add(new Triple<>(name, icost, numOutTuples)); 140 | if (null != next) { 141 | for (Operator nextOperator : next) { 142 | if (!(nextOperator instanceof Sink)) { 143 | nextOperator.getOperatorMetricsNextOperators(operatorMetrics); 144 | } 145 | } 146 | } 147 | } 148 | 149 | public void getLastOperators(List lastOperators) { 150 | if (next != null) { 151 | for (var nextOperator : next) { 152 | nextOperator.getLastOperators(lastOperators); 153 | } 154 | } else { 155 | lastOperators.add(this); 156 | } 157 | } 158 | 159 | /** 160 | * @return The number of intersect operators before and including this operator in the query 161 | * transform. 162 | */ 163 | public boolean hasMultiEdgeExtends() { 164 | if (null != prev) { 165 | return prev.hasMultiEdgeExtends(); 166 | } 167 | return false; 168 | } 169 | 170 | /** 171 | * Creates a copy of the operator and same recursively of the prev operators referenced 172 | * for single or multi-threaded execution. 173 | * 174 | * @param isThreadSafe specifies whether to copy each operator as blocking operator or not. 175 | * @return The copy of the operator. 176 | */ 177 | public Operator copy(boolean isThreadSafe) { 178 | throw new UnsupportedOperationException(); 179 | } 180 | 181 | /** 182 | * Creates a copy of the operator and same recursively of the prev operators referenced 183 | * for single-threaded execution. 184 | */ 185 | public Operator copy() { 186 | return copy(false); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/extend/Extend.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.extend; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.AdjListDescriptor; 4 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 5 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 6 | import ca.waterloo.dsg.graphflow.storage.Graph; 7 | import ca.waterloo.dsg.graphflow.storage.Graph.Direction; 8 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 9 | import ca.waterloo.dsg.graphflow.storage.SortedAdjList; 10 | 11 | import java.io.Serializable; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | /** 16 | * Given a set of input tuples from the prev {@link Operator}, E/I extends the tuples by one query 17 | * vertex. 18 | */ 19 | public class Extend extends EI implements Serializable { 20 | 21 | private int vertexIndex; 22 | private short labelOrToType; 23 | private Direction dir; 24 | private SortedAdjList[] adjList; 25 | 26 | /** 27 | * @see EI#make(String, short, List, QueryGraph, QueryGraph, Map) 28 | */ 29 | Extend(String toQVertex, short toType, List ALDs, 30 | QueryGraph outSubgraph, QueryGraph inSubgraph, Map outQVertexToIdxMap) { 31 | super(toQVertex, toType, ALDs, outSubgraph, inSubgraph); 32 | var ALD = ALDs.get(0); 33 | this.vertexIndex = ALD.getVertexIdx(); 34 | this.dir = ALD.getDirection(); 35 | this.labelOrToType = ALD.getLabel(); 36 | this.lastRepeatedVertexIdx = outTupleLen - 2; 37 | this.outQVertexToIdxMap = outQVertexToIdxMap; 38 | this.outIdx = outQVertexToIdxMap.get(toQVertex); 39 | } 40 | 41 | /** 42 | * @see Operator#init(int[], Graph, KeyStore) 43 | */ 44 | @Override 45 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 46 | this.outNeighbours = new Neighbours(); 47 | this.probeTuple = probeTuple; 48 | this.vertexTypes = graph.getVertexTypes(); 49 | this.adjList = dir == Direction.Fwd ? graph.getFwdAdjLists() : graph.getBwdAdjLists(); 50 | if (graph.isAdjListSortedByType()) { 51 | labelOrToType = toType; 52 | toType = KeyStore.ANY; 53 | } 54 | for (var nextOperator : next) { 55 | nextOperator.init(probeTuple, graph, store); 56 | } 57 | } 58 | 59 | /** 60 | * @see Operator#processNewTuple() 61 | */ 62 | @Override 63 | @SuppressWarnings("fallthrough") 64 | public void processNewTuple() throws LimitExceededException { 65 | adjList[probeTuple[vertexIndex]].setNeighbourIds(labelOrToType, outNeighbours); 66 | icost += outNeighbours.endIdx - outNeighbours.startIdx; 67 | for (var idx = outNeighbours.startIdx; idx < outNeighbours.endIdx; idx++) { 68 | if (toType == KeyStore.ANY || toType == vertexTypes[outNeighbours.Ids[idx]]) { 69 | numOutTuples++; 70 | probeTuple[outIdx] = outNeighbours.Ids[idx]; 71 | next[0].processNewTuple(); 72 | } 73 | } 74 | } 75 | 76 | /** 77 | * @see Operator#isSameAs(Operator) 78 | */ 79 | @Override 80 | public boolean isSameAs(Operator operator) { 81 | if (!(operator instanceof Extend)) { 82 | return false; 83 | } 84 | var extend = (Extend) operator; 85 | return 86 | this == extend || ( 87 | (!DIFFERENTIATE_FWD_BWD_SINGLE_ALD || dir == extend.dir) && 88 | labelOrToType == extend.labelOrToType && 89 | toType == extend.toType && 90 | inSubgraph.isIsomorphicTo(operator.getInSubgraph()) && 91 | outSubgraph.isIsomorphicTo(operator.getOutSubgraph()) && 92 | prev.isSameAs(operator.getPrev()) 93 | ); 94 | } 95 | 96 | /** 97 | * @see Operator#copy(boolean) 98 | */ 99 | @Override 100 | public Extend copy(boolean isThreadSafe) { 101 | var extend = new Extend(toQueryVertex, toType, ALDs, outSubgraph, inSubgraph, 102 | outQVertexToIdxMap); 103 | extend.prev = prev.copy(isThreadSafe); 104 | extend.prev.setNext(extend); 105 | extend.initCaching(extend.prev.getLastRepeatedVertexIdx()); 106 | return extend; 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/extend/Intersect.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.extend; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.AdjListDescriptor; 4 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 5 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | 8 | import java.io.Serializable; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | /** 13 | * Given a set of input tuples from the prev {@link Operator}, E/I extends the tuples by one query 14 | * vertex. 15 | */ 16 | public class Intersect extends EI implements Serializable { 17 | 18 | /** 19 | * @see EI#make(String, short, List, QueryGraph, QueryGraph, Map) 20 | */ 21 | protected Intersect(String toQVertex, short toType, List ALDs, 22 | QueryGraph outSubgraph, QueryGraph inSubgraph, Map outQVertexToIdxMap) { 23 | super(toQVertex, toType, ALDs, outSubgraph, inSubgraph); 24 | this.lastRepeatedVertexIdx = outTupleLen - 2; 25 | this.outQVertexToIdxMap = outQVertexToIdxMap; 26 | this.outIdx = this.outQVertexToIdxMap.get(toQVertex); 27 | } 28 | 29 | /** 30 | * @see Operator#processNewTuple() 31 | */ 32 | @Override 33 | public void processNewTuple() throws LimitExceededException { 34 | Neighbours temp; 35 | if (cachingType == CachingType.NONE || !isIntersectionCached()) { 36 | adjListsToCache[0][probeTuple[vertexIdxToCache[0]]].setNeighbourIds( 37 | labelsOrToTypesToCache[0], initNeighbours); 38 | icost += (initNeighbours.endIdx - initNeighbours.startIdx); 39 | icost += adjListsToCache[1][probeTuple[vertexIdxToCache[1]]].intersect( 40 | labelsOrToTypesToCache[1], initNeighbours, cachedNeighbours); 41 | if (toType != KeyStore.ANY) { 42 | var currEndIdx = 0; 43 | for (var i = cachedNeighbours.startIdx; i < cachedNeighbours.endIdx; i++) { 44 | if (vertexTypes[cachedNeighbours.Ids[i]] == toType) { 45 | cachedNeighbours.Ids[currEndIdx++] = cachedNeighbours.Ids[i]; 46 | } 47 | } 48 | cachedNeighbours.endIdx = currEndIdx; 49 | } 50 | for (var i = 2; i < adjListsToCache.length; i++) { 51 | temp = cachedNeighbours; 52 | cachedNeighbours = tempNeighbours; 53 | tempNeighbours = temp; 54 | icost += adjListsToCache[i][probeTuple[vertexIdxToCache[i]]].intersect( 55 | labelsOrToTypesToCache[i], tempNeighbours, cachedNeighbours); 56 | } 57 | } 58 | switch (cachingType) { 59 | case NONE: 60 | case FULL_CACHING: 61 | outNeighbours = cachedNeighbours; 62 | break; 63 | case PARTIAL_CACHING: 64 | icost += adjLists[0][probeTuple[vertexIdx[0]]].intersect( 65 | labelsOrToTypes[0], cachedNeighbours, outNeighbours); 66 | for (int i = 1; i < adjLists.length; i++) { 67 | temp = outNeighbours; 68 | outNeighbours = tempNeighbours; 69 | tempNeighbours = temp; 70 | icost += adjLists[i][probeTuple[vertexIdx[i]]].intersect( 71 | labelsOrToTypes[i], tempNeighbours, outNeighbours); 72 | } 73 | break; 74 | } 75 | // setAdjListSortOrder the initNeighbours ids in the output tuple. 76 | numOutTuples += (outNeighbours.endIdx - outNeighbours.startIdx); 77 | for (var idx = outNeighbours.startIdx; idx < outNeighbours.endIdx; idx++) { 78 | probeTuple[outIdx] = outNeighbours.Ids[idx]; 79 | next[0].processNewTuple(); 80 | } 81 | } 82 | 83 | /** 84 | * @see Operator#isSameAs(Operator) 85 | */ 86 | @Override 87 | public boolean isSameAs(Operator operator) { 88 | if (!(operator instanceof Intersect)) { 89 | return false; 90 | } 91 | var intersect = (Intersect) operator; 92 | return 93 | this == intersect || ( 94 | cachingType == intersect.getCachingType() && 95 | getALDsAsString().equals(intersect.getALDsAsString()) && 96 | inSubgraph.isIsomorphicTo(intersect.getInSubgraph()) && 97 | outSubgraph.isIsomorphicTo(intersect.getOutSubgraph()) && 98 | prev.isSameAs(intersect.getPrev()) 99 | ); 100 | } 101 | 102 | /** 103 | * @see Operator#copy(boolean) 104 | */ 105 | @Override 106 | public Intersect copy(boolean isThreadSafe) { 107 | var intersect = new Intersect(toQueryVertex, toType, ALDs, outSubgraph, inSubgraph, 108 | outQVertexToIdxMap); 109 | intersect.prev = prev.copy(isThreadSafe); 110 | intersect.prev.setNext(intersect); 111 | intersect.initCaching(intersect.prev.getLastRepeatedVertexIdx()); 112 | return intersect; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/hashjoin/Build.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.hashjoin; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | import lombok.Getter; 8 | import lombok.Setter; 9 | 10 | import java.io.Serializable; 11 | 12 | /** 13 | * An operator building a hash table by hashing incoming tuples on a single attribute. 14 | */ 15 | public class Build extends Operator implements Serializable { 16 | 17 | @Getter @Setter HashTable hashTable; 18 | 19 | @Getter @Setter int ID; 20 | 21 | @Getter @Setter private QueryGraph probingSubgraph; 22 | private String queryVertexToHash; 23 | @Getter private int buildHashIdx; 24 | @Getter private int hashedTupleLen; 25 | 26 | /** 27 | * Constructs a {@link Build} object. 28 | * 29 | * @param inSubgraph is the subgraph matched by the input tuples. 30 | * @param queryVertexToHash is the query vertex to hash on. 31 | * @param buildHashIdx is the index of the query vertex in the build tuple to hash on. 32 | */ 33 | Build(QueryGraph inSubgraph, String queryVertexToHash, int buildHashIdx) { 34 | this.inSubgraph = inSubgraph; 35 | this.hashedTupleLen = inSubgraph.getNumVertices() - 1; 36 | this.outTupleLen = inSubgraph.getNumVertices(); 37 | this.queryVertexToHash = queryVertexToHash; 38 | this.buildHashIdx = buildHashIdx; 39 | this.name = "HASH ON (" + queryVertexToHash + ")"; 40 | } 41 | 42 | /** 43 | * @see Operator#init(int[], Graph, KeyStore) 44 | */ 45 | @Override 46 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 47 | if (null == this.probeTuple) { 48 | this.probeTuple = probeTuple; 49 | this.hashTable.allocateInitialMemory(graph.getHighestVertexId()); 50 | } 51 | } 52 | 53 | /** 54 | * @see Operator#processNewTuple() 55 | */ 56 | @Override 57 | public void processNewTuple() { 58 | hashTable.insertTuple(probeTuple); 59 | } 60 | 61 | /** 62 | * @see Operator#isSameAs(Operator) 63 | */ 64 | public boolean isSameAs(Operator operator) { 65 | if (!(operator instanceof Build)) { 66 | return false; 67 | } 68 | var build = (Build) operator; 69 | return this == operator || 70 | (inSubgraph.isIsomorphicTo(build.getInSubgraph()) && prev.isSameAs(build.prev)); 71 | } 72 | 73 | /** 74 | * @see Operator#copy(boolean) 75 | */ 76 | public Build copy(boolean isThreadSafe) { 77 | var build = new Build(inSubgraph, queryVertexToHash, buildHashIdx); 78 | build.prev = prev.copy(isThreadSafe); 79 | build.prev.setNext(build); 80 | build.probingSubgraph = probingSubgraph; 81 | build.setID(ID); 82 | return build; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/hashjoin/HashJoin.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.hashjoin; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.Plan; 4 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 5 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 6 | import ca.waterloo.dsg.graphflow.util.collection.SetUtils; 7 | 8 | import java.util.ArrayList; 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | public class HashJoin { 14 | 15 | public static Plan make(QueryGraph outSubgraph, Plan buildPlan, Plan probePlan, 16 | int nextHashJoinID, int numThreads) { 17 | return new Plan(make(outSubgraph, buildPlan.shallowCopy().getSubplans(), 18 | probePlan.shallowCopy().getSubplans(), nextHashJoinID, numThreads)); 19 | } 20 | 21 | public static List make(QueryGraph outSubgraph, List buildSubplans, 22 | List probeSubplans, int nextHashJoinID, int numThreads) { 23 | var preBuild = buildSubplans.get(buildSubplans.size() - 1); 24 | var preProbe = probeSubplans.get(probeSubplans.size() - 1); 25 | var joinQVertices = SetUtils.intersect(preBuild.getOutQVertices(), 26 | preProbe.getOutQVertices()); 27 | 28 | if (joinQVertices.size() == 0) { 29 | return new ArrayList<>(); 30 | } 31 | 32 | var buildQVertexToIdxMap = preBuild.getOutQVertexToIdxMap(); 33 | var queryVertexToHash = joinQVertices.get(0); 34 | var buildHashIdx = buildQVertexToIdxMap.get(queryVertexToHash); 35 | var build = new Build(preBuild.getOutSubgraph(), queryVertexToHash, buildHashIdx); 36 | build.setID(nextHashJoinID); 37 | build.setPrev(preBuild); 38 | preBuild.setNext(build); 39 | buildSubplans.set(buildSubplans.size() - 1, build); 40 | 41 | var mapping = preBuild.getOutSubgraph().getIsomorphicMappingIfAny( 42 | preProbe.getOutSubgraph()); 43 | Map probeQVertexToIdxMap; 44 | if (null != mapping && numThreads == 1) { 45 | probeQVertexToIdxMap = new HashMap<>(); 46 | for (var queryVertex : buildQVertexToIdxMap.keySet()) { 47 | var idx = buildQVertexToIdxMap.get(queryVertex); 48 | if (idx < buildHashIdx) { 49 | probeQVertexToIdxMap.put(mapping.get(queryVertex), idx); 50 | } else if (idx > buildHashIdx) { 51 | probeQVertexToIdxMap.put(mapping.get(queryVertex), idx - 1); 52 | } 53 | } 54 | probeQVertexToIdxMap.put(mapping.get(joinQVertices.get(0)), 55 | buildQVertexToIdxMap.size() - 1); 56 | } else { 57 | probeQVertexToIdxMap = preProbe.getOutQVertexToIdxMap(); 58 | } 59 | var probeHashIdx = probeQVertexToIdxMap.get(queryVertexToHash); 60 | var outQVertexToIdxMap = computeOutVertexToIdxMap(joinQVertices, buildQVertexToIdxMap, 61 | probeQVertexToIdxMap); 62 | var hashedTupleLen = buildQVertexToIdxMap.size() - 1; 63 | var probeIndices = new int[joinQVertices.size() - 1]; 64 | var buildIndices = new int[joinQVertices.size() - 1]; 65 | for (var i = 1; i < joinQVertices.size(); i++) { 66 | probeIndices[i - 1] = probeQVertexToIdxMap.get(joinQVertices.get(i)); 67 | var otherBuildIdx = buildQVertexToIdxMap.get(joinQVertices.get(i)); 68 | if (buildHashIdx < otherBuildIdx) { 69 | otherBuildIdx -= 1; 70 | } 71 | buildIndices[i - 1] = otherBuildIdx; 72 | } 73 | 74 | Probe probe; 75 | var inSubgraph = preProbe.getOutSubgraph(); 76 | if (null != mapping && numThreads == 1) { 77 | if (probeIndices.length == 0) { 78 | probe = new ProbeCartesian(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, 79 | hashedTupleLen, preProbe.getOutTupleLen(), outQVertexToIdxMap); 80 | } else { 81 | probe = new ProbeMultiVerticesCartesian(outSubgraph, inSubgraph, joinQVertices, 82 | probeHashIdx, probeIndices, buildIndices, hashedTupleLen, preProbe. 83 | getOutTupleLen(), outQVertexToIdxMap); 84 | } 85 | } else { 86 | if (probeIndices.length == 0) { 87 | probe = new Probe(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, 88 | hashedTupleLen, preProbe.getOutTupleLen(), outQVertexToIdxMap); 89 | } else { 90 | probe = new ProbeMultiVertices(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, 91 | probeIndices, buildIndices, hashedTupleLen, preProbe.getOutTupleLen(), 92 | outQVertexToIdxMap); 93 | } 94 | probe.setPrev(preProbe); 95 | preProbe.setNext(probe); 96 | probeSubplans.set(probeSubplans.size() - 1, probe); 97 | } 98 | probe.setID(nextHashJoinID); 99 | probe.setLastRepeatedVertexIdx(probeQVertexToIdxMap.size() - 2); 100 | build.setProbingSubgraph(probe.getInSubgraph()); 101 | 102 | var subplans = new ArrayList<>(buildSubplans); 103 | if (null != mapping) { 104 | subplans.add(probe); 105 | } else { 106 | subplans.addAll(probeSubplans); 107 | } 108 | return subplans; 109 | } 110 | 111 | private static Map computeOutVertexToIdxMap(List joinVertices, 112 | Map buildVertexToIdxMap, Map probeVertexToIdxMap) { 113 | var outVerticesToIdxMap = new HashMap<>(probeVertexToIdxMap); 114 | var buildVertices = new String[buildVertexToIdxMap.size()]; 115 | for (var buildVertex : buildVertexToIdxMap.keySet()) { 116 | buildVertices[buildVertexToIdxMap.get(buildVertex)] = buildVertex; 117 | } 118 | for (var buildQVertex : buildVertices) { 119 | if (joinVertices.contains(buildQVertex)) { 120 | continue; 121 | } 122 | outVerticesToIdxMap.put(buildQVertex, outVerticesToIdxMap.size()); 123 | } 124 | return outVerticesToIdxMap; 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/hashjoin/HashTable.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.hashjoin; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | public class HashTable implements Serializable { 8 | 9 | static class BlockInfo implements Serializable { 10 | int[] block; 11 | int startOffset; 12 | int endOffset; 13 | } 14 | 15 | transient private int[][] blocks; 16 | transient private List extraBlocks; 17 | transient private int[][] blockIdsAndChunkOffsets; 18 | transient int[] numChunks; 19 | private int INITIAL_NUM_BLOCKS = 10;//1000; 20 | private int NUM_TUPLES_PER_CHUNK = 32;//64; 21 | private int NUM_CHUNKS_PER_BLOCK = 80;//8000; 22 | private int INITIAL_NUM_CHUNKS_PER_VERTEX = 6; 23 | 24 | private int BLOCK_SZ; 25 | private int CHUNK_SZ; 26 | 27 | private int nextGlobalBlockId = 0; 28 | private int nextGlobalChunkOffset = 0; 29 | 30 | private int buildHashIdx; 31 | private int buildTupleLen; 32 | private int hashedTupleLen; 33 | 34 | /** 35 | * Constructs a {@link HashTable} object. 36 | * 37 | * @param buildHashIdx is the index of the vertex value in the tuples being hashed. 38 | * @param hashedTupleLen is the size of the tuple being hashed minus 1. 39 | */ 40 | public HashTable(int buildHashIdx, int hashedTupleLen) { 41 | this.buildHashIdx = buildHashIdx; 42 | this.buildTupleLen = hashedTupleLen + 1; 43 | this.hashedTupleLen = hashedTupleLen; 44 | CHUNK_SZ = NUM_TUPLES_PER_CHUNK * hashedTupleLen; 45 | BLOCK_SZ = CHUNK_SZ * NUM_CHUNKS_PER_BLOCK; 46 | } 47 | 48 | void setInitialNumBlocks(int initialNumBlocks) { 49 | INITIAL_NUM_BLOCKS = initialNumBlocks; 50 | } 51 | 52 | void setNumTuplesPerChunk(int numTuplesPerChunk) { 53 | NUM_TUPLES_PER_CHUNK = numTuplesPerChunk; 54 | } 55 | 56 | void setNumChunksPerBlock(int numChunksPerBlock) { 57 | NUM_CHUNKS_PER_BLOCK = numChunksPerBlock; 58 | } 59 | 60 | /** 61 | * Allocates the initial memory required by the {@link HashTable}. 62 | * 63 | * @param highestVertexId is the highest vertex id in the input data graph. 64 | */ 65 | void allocateInitialMemory(int highestVertexId) { 66 | blocks = new int[INITIAL_NUM_BLOCKS][BLOCK_SZ]; 67 | extraBlocks = new ArrayList<>(INITIAL_NUM_BLOCKS); 68 | blockIdsAndChunkOffsets = new int[highestVertexId + 1][INITIAL_NUM_CHUNKS_PER_VERTEX * 3]; 69 | numChunks = new int[highestVertexId + 1]; 70 | } 71 | 72 | /** 73 | * insert a tuple in the {@link HashTable}. 74 | * 75 | * @param buildTuple is the tuple to hash. 76 | */ 77 | void insertTuple(int[] buildTuple) { 78 | int hashVertex = buildTuple[buildHashIdx]; 79 | var lastChunkIdx = this.numChunks[hashVertex]; 80 | if (0 == lastChunkIdx) { 81 | this.numChunks[hashVertex]++; 82 | updateBlockIdsAndGlobalAndChunkOffset(hashVertex); 83 | } 84 | lastChunkIdx = 3 * (this.numChunks[hashVertex] - 1); 85 | var blockId = blockIdsAndChunkOffsets[hashVertex][lastChunkIdx]; 86 | var startOffset = blockIdsAndChunkOffsets[hashVertex][lastChunkIdx + 1]; 87 | var endOffset = blockIdsAndChunkOffsets[hashVertex][lastChunkIdx + 2]; 88 | var block = blockId < INITIAL_NUM_BLOCKS ? blocks[blockId] : 89 | extraBlocks.get(blockId - INITIAL_NUM_BLOCKS); 90 | for (var i = 0; i < buildTupleLen; i++) { 91 | if (i != buildHashIdx) { 92 | block[endOffset++] = buildTuple[i]; 93 | } 94 | } 95 | blockIdsAndChunkOffsets[hashVertex][lastChunkIdx + 2] = endOffset; 96 | if (CHUNK_SZ <= (endOffset - startOffset + hashedTupleLen)) { 97 | this.numChunks[hashVertex]++; 98 | resizeBlockIdsAndGlobalAndChunkOffset(hashVertex); 99 | updateBlockIdsAndGlobalAndChunkOffset(hashVertex); 100 | } 101 | } 102 | 103 | /** 104 | * Sets the block, the start and the end offsets in the passed {@link BlockInfo}. 105 | * 106 | * @param hashVertex is the value of the hashed vertex. 107 | * @param chunkIdx is the index of the chunk. 108 | * @param blockInfo is the object to setAdjListSortOrder. 109 | */ 110 | void getBlockAndOffsets(int hashVertex, int chunkIdx, BlockInfo blockInfo) { 111 | var blockId = blockIdsAndChunkOffsets[hashVertex][chunkIdx * 3]; 112 | blockInfo.startOffset = blockIdsAndChunkOffsets[hashVertex][chunkIdx * 3 + 1]; 113 | blockInfo.endOffset = blockIdsAndChunkOffsets[hashVertex][chunkIdx * 3 + 2]; 114 | blockInfo.block = blockId < INITIAL_NUM_BLOCKS ? blocks[blockId] : 115 | extraBlocks.get(blockId - INITIAL_NUM_BLOCKS); 116 | } 117 | 118 | private void resizeBlockIdsAndGlobalAndChunkOffset(int hashVertex) { 119 | if (this.numChunks[hashVertex] + 1 > (blockIdsAndChunkOffsets[hashVertex].length / 3)) { 120 | var newChunkBlockIdOffsetArray = new int[(this.numChunks[hashVertex] + 2) * 3]; 121 | System.arraycopy(blockIdsAndChunkOffsets[hashVertex], 0, newChunkBlockIdOffsetArray, 0, 122 | blockIdsAndChunkOffsets[hashVertex].length); 123 | blockIdsAndChunkOffsets[hashVertex] = newChunkBlockIdOffsetArray; 124 | } 125 | } 126 | 127 | private void updateBlockIdsAndGlobalAndChunkOffset(int hashVertex) { 128 | var lastChunkIdx = (this.numChunks[hashVertex] - 1) * 3; 129 | blockIdsAndChunkOffsets[hashVertex][lastChunkIdx] = nextGlobalBlockId; 130 | blockIdsAndChunkOffsets[hashVertex][lastChunkIdx + 1] = nextGlobalChunkOffset; 131 | blockIdsAndChunkOffsets[hashVertex][lastChunkIdx + 2] = nextGlobalChunkOffset; 132 | nextGlobalChunkOffset += CHUNK_SZ; 133 | if (nextGlobalChunkOffset == BLOCK_SZ) { 134 | nextGlobalBlockId++; 135 | if (nextGlobalBlockId >= INITIAL_NUM_BLOCKS) { 136 | extraBlocks.add(new int[BLOCK_SZ]); 137 | } 138 | nextGlobalChunkOffset = 0; 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/hashjoin/Probe.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.hashjoin; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.HashTable.BlockInfo; 5 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 6 | import ca.waterloo.dsg.graphflow.storage.Graph; 7 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 8 | import lombok.Getter; 9 | import lombok.Setter; 10 | 11 | import java.io.Serializable; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | /** 16 | * An operator matching incoming tuples by probing a hash table on multiple attributes. 17 | */ 18 | public class Probe extends Operator implements Serializable { 19 | 20 | @Getter @Setter HashTable[] hashTables; 21 | 22 | @Getter @Setter int ID; 23 | 24 | List joinQVertices; 25 | int probeHashIdx; 26 | int hashedTupleLen; 27 | int probeTupleLen; 28 | 29 | transient BlockInfo blockInfo; 30 | 31 | /** 32 | * Constructs a {@link Probe} operator. 33 | * 34 | * @param outSubgraph is the subgraph matched by the output tuples. 35 | * @param inSubgraph is the subgraph matched by the input tuples. 36 | * @param joinQVertices is the list of query vertices to probe the hash table tuples on. 37 | * @param probeHashIdx is the index to probe the hash table on. 38 | * @param hashedTupleLen is the length of the hashed tuple to copy. 39 | * @param probeTupleLen is the length of the previous operator output. 40 | * @param outQVertexToIdxMap The output query vertex to tuple index map. 41 | */ 42 | Probe(QueryGraph outSubgraph, QueryGraph inSubgraph, List joinQVertices, 43 | int probeHashIdx, int hashedTupleLen, int probeTupleLen, 44 | Map outQVertexToIdxMap) { 45 | this.outSubgraph = outSubgraph; 46 | this.inSubgraph = inSubgraph; 47 | this.joinQVertices = joinQVertices; 48 | this.probeHashIdx = probeHashIdx; 49 | this.hashedTupleLen = hashedTupleLen; 50 | this.probeTupleLen = probeTupleLen; 51 | this.outQVertexToIdxMap = outQVertexToIdxMap; 52 | this.outTupleLen = outQVertexToIdxMap.size(); 53 | name = "PROBE ON (" + joinQVertices.get(0) + ")"; 54 | } 55 | 56 | /** 57 | * @see Operator#init(int[], Graph, KeyStore) 58 | */ 59 | @Override 60 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 61 | if (null == this.probeTuple) { 62 | this.probeTuple = probeTuple; 63 | this.blockInfo = new BlockInfo(); 64 | for (var nextOperator : next) { 65 | nextOperator.init(probeTuple, graph, store); 66 | } 67 | } 68 | } 69 | 70 | /** 71 | * @see Operator#processNewTuple() 72 | */ 73 | @Override 74 | public void processNewTuple() throws LimitExceededException { 75 | var hashVertex = probeTuple[probeHashIdx]; 76 | for (var hashTable : hashTables) { 77 | var lastChunkIdx = hashTable.numChunks[hashVertex]; 78 | var prevFirstItem = -1; 79 | for (var chunkIdx = 0; chunkIdx < lastChunkIdx; chunkIdx++) { 80 | hashTable.getBlockAndOffsets(hashVertex, chunkIdx, blockInfo); 81 | for (var offset = blockInfo.startOffset; offset < blockInfo.endOffset;) { 82 | numOutTuples++; 83 | if (hashedTupleLen == 2) { 84 | var firstItem = blockInfo.block[offset++]; 85 | if (prevFirstItem != firstItem) { 86 | probeTuple[probeTupleLen] = firstItem; 87 | prevFirstItem = firstItem; 88 | } 89 | probeTuple[probeTupleLen + 1] = blockInfo.block[offset++]; 90 | } else { 91 | for (var k = 0; k < hashedTupleLen; k++) { 92 | probeTuple[probeTupleLen + k] = blockInfo.block[offset++]; 93 | } 94 | } 95 | next[0].processNewTuple(); 96 | } 97 | } 98 | } 99 | } 100 | 101 | /** 102 | * @see Operator#isSameAs(Operator) 103 | */ 104 | public boolean isSameAs(Operator operator) { 105 | return this == operator || (operator instanceof Probe && 106 | inSubgraph.isIsomorphicTo(operator.getInSubgraph()) && 107 | outSubgraph.isIsomorphicTo(operator.getOutSubgraph()) && 108 | prev.isSameAs(operator.getPrev()) 109 | ); 110 | } 111 | 112 | /** 113 | * @see Operator#copy(boolean) 114 | */ 115 | public Probe copy(boolean isThreadSafe) { 116 | var probe = new Probe(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, hashedTupleLen, 117 | probeTupleLen, outQVertexToIdxMap); 118 | probe.prev = prev.copy(isThreadSafe); 119 | probe.prev.setNext(probe); 120 | probe.setID(ID); 121 | return probe; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/hashjoin/ProbeCartesian.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.hashjoin; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.HashTable.BlockInfo; 5 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 6 | import ca.waterloo.dsg.graphflow.storage.Graph; 7 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 8 | 9 | import java.io.Serializable; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | public class ProbeCartesian extends Probe implements Serializable { 14 | 15 | private BlockInfo otherBlockInfo; 16 | private int highestVertexId; 17 | 18 | /** 19 | * @see Probe#Probe(QueryGraph, QueryGraph, List, int, int, int, Map) 20 | */ 21 | ProbeCartesian(QueryGraph outSubgraph, QueryGraph inSubgraph, List joinQVertices, 22 | int probeHashIdx, int hashedTupleLen, int probeTupleLen, 23 | Map outQVertexToIdxMap) { 24 | super(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, hashedTupleLen, probeTupleLen, 25 | outQVertexToIdxMap); 26 | this.name = "CARTESIAN " + this.name; 27 | } 28 | 29 | /** 30 | * @see Operator#init(int[], Graph, KeyStore) 31 | */ 32 | @Override 33 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 34 | if (null == this.probeTuple) { 35 | highestVertexId = graph.getHighestVertexId(); 36 | otherBlockInfo = new BlockInfo(); 37 | } 38 | super.init(probeTuple, graph, store); 39 | } 40 | 41 | /** 42 | * @see Operator#execute() 43 | */ 44 | @Override 45 | public void execute() throws LimitExceededException { 46 | for (var aHashVertex = 0; aHashVertex <= highestVertexId; aHashVertex++) { 47 | probeTuple[hashedTupleLen] = aHashVertex; 48 | for (var hashTable : hashTables) { 49 | var aLastChunkIdx = hashTable.numChunks[aHashVertex]; 50 | var aPrevFirstVertex = -1; 51 | for (var aChunkIdx = 0; aChunkIdx < aLastChunkIdx; aChunkIdx++) { 52 | hashTable.getBlockAndOffsets(aHashVertex, aChunkIdx, otherBlockInfo); 53 | for (var anOffset = otherBlockInfo.startOffset; 54 | anOffset < otherBlockInfo.endOffset ;) { 55 | if (hashedTupleLen == 2) { 56 | var firstVertex = otherBlockInfo.block[anOffset++]; 57 | if (aPrevFirstVertex != firstVertex) { 58 | probeTuple[0] = firstVertex; 59 | aPrevFirstVertex = firstVertex; 60 | } 61 | probeTuple[1] = otherBlockInfo.block[anOffset++]; 62 | } else { 63 | for (int k = 0; k < hashedTupleLen; k++) { 64 | probeTuple[k] = otherBlockInfo.block[anOffset++]; 65 | } 66 | } 67 | super/* Probe */.processNewTuple(); 68 | } 69 | } 70 | } 71 | } 72 | } 73 | 74 | /** 75 | * @see Operator#isSameAs(Operator) 76 | */ 77 | public boolean isSameAs(Operator operator) { 78 | return this == operator || (operator instanceof ProbeCartesian && 79 | inSubgraph.isIsomorphicTo(operator.getInSubgraph()) && 80 | outSubgraph.isIsomorphicTo(operator.getOutSubgraph()) 81 | ); 82 | } 83 | 84 | /** 85 | * @see Operator#copy(boolean) 86 | */ 87 | public ProbeCartesian copy(boolean isThreadSafe) { 88 | var probe = new ProbeCartesian(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, 89 | hashedTupleLen, probeTupleLen, outQVertexToIdxMap); 90 | probe.setID(ID); 91 | return probe; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/hashjoin/ProbeMultiVertices.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.hashjoin; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | 6 | import java.io.Serializable; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | /** 11 | * An operator matching incoming tuples by probing a hash table on multiple attributes. 12 | */ 13 | public class ProbeMultiVertices extends Probe implements Serializable { 14 | 15 | int[] probeIndices; 16 | int[] buildIndices; 17 | 18 | /** 19 | * Constructs a {@link ProbeMultiVertices} operator. 20 | * 21 | * @param outSubgraph is the subgraph matched by the output tuples. 22 | * @param inSubgraph is the subgraph matched by the input tuples. 23 | * @param joinQVertices is the list of variables to probe the hash table on. 24 | * @param probeHashIdx is the index to probe the hash table on. 25 | * @param probeIndices is the setAdjListSortOrder of extra indices to check for equality on. 26 | * @param buildIndices is the setAdjListSortOrder of indices to match on with the hashTable. 27 | * @param hashedTupleLen is the length of the hashed tuple to copy. 28 | * @param probeTupleLen is the length of the previous operator output. 29 | * @param outQVertexToIdxMap The output query vertex to tuple index map. 30 | */ 31 | ProbeMultiVertices(QueryGraph outSubgraph, QueryGraph inSubgraph, List joinQVertices, 32 | int probeHashIdx, int[] probeIndices, int[] buildIndices, int hashedTupleLen, 33 | int probeTupleLen, Map outQVertexToIdxMap) { 34 | super(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, hashedTupleLen, probeTupleLen, 35 | outQVertexToIdxMap); 36 | this.probeIndices = probeIndices; 37 | this.buildIndices = buildIndices; 38 | var strBuilder = new StringBuilder(); 39 | strBuilder.append("PROBE ON "); 40 | if (1 == joinQVertices.size()) { 41 | strBuilder.append("(").append(joinQVertices.get(0)).append(")"); 42 | } else { 43 | for (var i = 0; i < joinQVertices.size(); i++) { 44 | strBuilder 45 | .append(i > 0 && i < joinQVertices.size() - 1 ? ", " : "") 46 | .append(i == joinQVertices.size() - 1 ? " & " : "") 47 | .append("(") 48 | .append(joinQVertices.get(i)) 49 | .append(")"); 50 | } 51 | } 52 | name = strBuilder.toString(); 53 | } 54 | 55 | /** 56 | * @see Operator#processNewTuple() 57 | */ 58 | @Override 59 | public void processNewTuple() throws LimitExceededException { 60 | var hashVertex = probeTuple[probeHashIdx]; 61 | for (var hashTable : hashTables) { 62 | var lastChunkIdx = hashTable.numChunks[hashVertex]; 63 | for (var chunkIdx = 0; chunkIdx < lastChunkIdx; chunkIdx++) { 64 | hashTable.getBlockAndOffsets(hashVertex, chunkIdx, blockInfo); 65 | offsetLoop: for (var offset = blockInfo.startOffset; 66 | offset < blockInfo.endOffset ; offset += hashedTupleLen) { 67 | for (var i = 0; i < probeIndices.length; i++) { 68 | if (probeTuple[probeIndices[i]] != 69 | blockInfo.block[offset + buildIndices[i]]) { 70 | continue offsetLoop; 71 | } 72 | } 73 | numOutTuples++; 74 | var out = 0; 75 | for (var k = 0; k < hashedTupleLen; k++) { 76 | var copy = true; 77 | for (var buildIdx : buildIndices) { 78 | if (k == buildIdx) { 79 | copy = false; 80 | break; 81 | } 82 | } 83 | if (copy) { 84 | probeTuple[probeTupleLen + out++] = blockInfo.block[offset + k]; 85 | } 86 | } 87 | next[0].processNewTuple(); 88 | } 89 | } 90 | } 91 | } 92 | 93 | /** 94 | * @see Operator#isSameAs(Operator) 95 | */ 96 | public boolean isSameAs(Operator operator) { 97 | return this == operator || (operator instanceof ProbeMultiVertices && 98 | inSubgraph.isIsomorphicTo(operator.getInSubgraph()) && 99 | outSubgraph.isIsomorphicTo(operator.getOutSubgraph()) && 100 | prev.isSameAs(operator.getPrev()) 101 | ); 102 | } 103 | 104 | /** 105 | * @see Operator#copy(boolean) 106 | */ 107 | public ProbeMultiVertices copy(boolean isThreadSafe) { 108 | var probe = new ProbeMultiVertices(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, 109 | probeIndices, buildIndices, hashedTupleLen, probeTupleLen, outQVertexToIdxMap); 110 | probe.prev = prev.copy(isThreadSafe); 111 | probe.prev.setNext(probe); 112 | probe.setID(ID); 113 | return probe; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/hashjoin/ProbeMultiVerticesCartesian.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.hashjoin; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.plan.operator.hashjoin.HashTable.BlockInfo; 5 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 6 | import ca.waterloo.dsg.graphflow.storage.Graph; 7 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 8 | 9 | import java.io.Serializable; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | public class ProbeMultiVerticesCartesian extends ProbeMultiVertices implements Serializable { 14 | 15 | private BlockInfo otherBlockInfo; 16 | private int highestVertexId; 17 | 18 | /** 19 | * @see ProbeMultiVertices#ProbeMultiVertices(QueryGraph, QueryGraph, List, int, int[], int[], 20 | * int, int, Map) 21 | */ 22 | ProbeMultiVerticesCartesian(QueryGraph outSubgraph, QueryGraph inSubgraph, 23 | List joinQVertices, int probeHashIdx, int[] probeIndices, int[] buildIndices, 24 | int hashedTupleLen, int probeTupleLen, Map outQVertexToIdxMap) { 25 | super(outSubgraph, inSubgraph, joinQVertices, probeHashIdx, probeIndices, buildIndices, 26 | hashedTupleLen, probeTupleLen, outQVertexToIdxMap); 27 | this.name = "CARTESIAN " + this.name; 28 | } 29 | 30 | /** 31 | * @see Operator#init(int[], Graph, KeyStore) 32 | */ 33 | @Override 34 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 35 | if (null == this.probeTuple) { 36 | highestVertexId = graph.getHighestVertexId(); 37 | otherBlockInfo = new BlockInfo(); 38 | } 39 | super.init(probeTuple, graph, store); 40 | } 41 | 42 | /** 43 | * @see Operator#execute() 44 | */ 45 | @Override 46 | public void execute() throws LimitExceededException { 47 | for (var aHashVertex = 0; aHashVertex <= highestVertexId; aHashVertex++) { 48 | probeTuple[hashedTupleLen] = aHashVertex; 49 | for (var hashTable : hashTables) { 50 | var aLastChunkIdx = hashTable.numChunks[aHashVertex]; 51 | var aPrevFirstVertex = -1; 52 | for (var aChunkIdx = 0; aChunkIdx < aLastChunkIdx; aChunkIdx++) { 53 | hashTable.getBlockAndOffsets(aHashVertex, aChunkIdx, otherBlockInfo); 54 | for (var anOffset = otherBlockInfo.startOffset; 55 | anOffset < otherBlockInfo.endOffset ;) { 56 | if (hashedTupleLen == 2) { 57 | var firstVertex = otherBlockInfo.block[anOffset++]; 58 | if (aPrevFirstVertex != firstVertex) { 59 | probeTuple[0] = firstVertex; 60 | aPrevFirstVertex = firstVertex; 61 | } 62 | probeTuple[1] = otherBlockInfo.block[anOffset++]; 63 | } else { 64 | for (int k = 0; k < hashedTupleLen; k++) { 65 | probeTuple[k] = otherBlockInfo.block[anOffset++]; 66 | } 67 | } 68 | super/* ProbeMultiVertices */.processNewTuple(); 69 | } 70 | } 71 | } 72 | } 73 | } 74 | 75 | /** 76 | * @see Operator#isSameAs(Operator) 77 | */ 78 | public boolean isSameAs(Operator operator) { 79 | return this == operator || (operator instanceof ProbeMultiVerticesCartesian && 80 | inSubgraph.isIsomorphicTo(operator.getInSubgraph()) && 81 | outSubgraph.isIsomorphicTo(operator.getOutSubgraph()) 82 | ); 83 | } 84 | 85 | /** 86 | * @see Operator#copy(boolean) 87 | */ 88 | public ProbeMultiVerticesCartesian copy(boolean isThreadSafe) { 89 | var probe = new ProbeMultiVerticesCartesian(outSubgraph, inSubgraph, joinQVertices, 90 | probeHashIdx, probeIndices, buildIndices, hashedTupleLen, probeTupleLen, 91 | outQVertexToIdxMap); 92 | probe.setID(ID); 93 | return probe; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/scan/Scan.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.scan; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | import ca.waterloo.dsg.graphflow.storage.SortedAdjList; 8 | import ca.waterloo.dsg.graphflow.util.collection.MapUtils; 9 | import lombok.Getter; 10 | 11 | import java.io.Serializable; 12 | import java.util.HashMap; 13 | import java.util.Map; 14 | 15 | /** 16 | * Scans all edges in the forward adjacency list given an edge label, a source vertex type, and a 17 | * destination source type. Scanned edge are pushed to the next operators one at a time. 18 | */ 19 | public class Scan extends Operator implements Serializable { 20 | 21 | @Getter protected String fromQueryVertex, toQueryVertex; 22 | @Getter short fromType, toType, labelOrToType; 23 | 24 | SortedAdjList[] fwdAdjList; 25 | private int fromVertexStartIdx, fromVertexEndIdx; 26 | int[] vertexIds; 27 | short[] vertexTypes; 28 | 29 | /** 30 | * Constructs a {@link Scan} operator. 31 | * 32 | * @param outSubgraph is the subgraph matched by the scanned output tuples. 33 | */ 34 | public Scan(QueryGraph outSubgraph) { 35 | super(outSubgraph, null /* no inSubgraph */); 36 | if (outSubgraph.getEdges().size() > 1) { 37 | throw new IllegalArgumentException(); 38 | } 39 | var queryEdge = outSubgraph.getEdges().get(0); 40 | fromType = queryEdge.getFromType(); 41 | toType = queryEdge.getToType(); 42 | labelOrToType = queryEdge.getLabel(); 43 | lastRepeatedVertexIdx = 0; 44 | fromQueryVertex = queryEdge.getFromVertex(); 45 | toQueryVertex = queryEdge.getToVertex(); 46 | outQVertexToIdxMap = new HashMap<>(); 47 | outQVertexToIdxMap.put(fromQueryVertex, 0); 48 | outQVertexToIdxMap.put(toQueryVertex, 1); 49 | name = "SCAN (" + fromQueryVertex + ")->(" + toQueryVertex + ")"; 50 | } 51 | 52 | /** 53 | * @see Operator#init(int[], Graph, KeyStore) 54 | */ 55 | @Override 56 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 57 | this.probeTuple = probeTuple; 58 | this.vertexIds = graph.getVertexIds(); 59 | this.vertexTypes = graph.getVertexTypes(); 60 | if (KeyStore.ANY != fromType) { 61 | this.fromVertexStartIdx = graph.getVertexTypeOffsets()[fromType]; 62 | this.fromVertexEndIdx = graph.getVertexTypeOffsets()[fromType + 1]; 63 | } else { 64 | this.fromVertexStartIdx = 0; 65 | this.fromVertexEndIdx = graph.getHighestVertexId() + 1; 66 | } 67 | this.fwdAdjList = graph.getFwdAdjLists(); 68 | if (graph.isAdjListSortedByType()) { 69 | labelOrToType = toType; 70 | toType = KeyStore.ANY; 71 | } 72 | for (var nextOperator : next) { 73 | nextOperator.init(probeTuple, graph, store); 74 | } 75 | } 76 | 77 | /** 78 | * @see Operator#execute() 79 | */ 80 | @Override 81 | public void execute() throws LimitExceededException { 82 | int fromVertex, toVertexStartIdx, toVertexEndIdx; 83 | for (var fromIdx = fromVertexStartIdx; fromIdx < fromVertexEndIdx; fromIdx++) { 84 | fromVertex = vertexIds[fromIdx]; 85 | probeTuple[0] = fromVertex; 86 | toVertexStartIdx = fwdAdjList[fromVertex].getLabelOrTypeOffsets()[labelOrToType]; 87 | toVertexEndIdx = fwdAdjList[fromVertex].getLabelOrTypeOffsets()[labelOrToType + 1]; 88 | for (var toIdx = toVertexStartIdx; toIdx < toVertexEndIdx; toIdx++) { 89 | probeTuple[1] = fwdAdjList[fromVertex].getNeighbourId(toIdx); 90 | if (toType == KeyStore.ANY || vertexTypes[probeTuple[1]] == toType) { 91 | numOutTuples++; 92 | next[0].processNewTuple(); 93 | } 94 | } 95 | } 96 | } 97 | 98 | /** 99 | * @see Operator#updateOperatorName(Map) 100 | */ 101 | @Override 102 | public void updateOperatorName(Map queryVertexToIndexMap) { 103 | queryVertexToIndexMap = new HashMap<>(); 104 | queryVertexToIndexMap.put(fromQueryVertex, 0); 105 | queryVertexToIndexMap.put(toQueryVertex, 1); 106 | if (null != next) { 107 | for (var nextOperator : next) { 108 | nextOperator.updateOperatorName(MapUtils.copy(queryVertexToIndexMap)); 109 | } 110 | } 111 | } 112 | 113 | /** 114 | * @see Operator#processNewTuple() 115 | */ 116 | @Override 117 | public void processNewTuple() { 118 | throw new UnsupportedOperationException( 119 | this.getClass().getSimpleName() + " does not support execute()."); 120 | } 121 | 122 | /** 123 | * @see Operator#copy(boolean) 124 | */ 125 | @Override 126 | public Scan copy(boolean isThreadSafe) { 127 | if (isThreadSafe) { 128 | return new ScanBlocking(outSubgraph); 129 | } 130 | return new Scan(outSubgraph); 131 | } 132 | 133 | /** 134 | * @see Operator#isSameAs(Operator) 135 | */ 136 | public boolean isSameAs(Operator operator) { 137 | return operator instanceof Scan && 138 | fromType == ((Scan) operator).fromType && 139 | toType == ((Scan) operator).toType && 140 | labelOrToType == ((Scan) operator).labelOrToType; 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/scan/ScanBlocking.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.scan; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | import lombok.Setter; 8 | import org.apache.logging.log4j.LogManager; 9 | import org.apache.logging.log4j.Logger; 10 | 11 | import java.util.concurrent.locks.ReentrantLock; 12 | 13 | /** 14 | * A multi-threaded implementation of the {@link Scan} operator . 15 | */ 16 | public class ScanBlocking extends Scan { 17 | 18 | protected static final Logger logger = LogManager.getLogger(ScanBlocking.class); 19 | 20 | public static int PARTITION_SIZE = 100; 21 | 22 | private int currFromIdx, currToIdx; 23 | private int fromIdxLimit, toIdxLimit; 24 | private int highestFromIdx, highestToIdx; 25 | 26 | @Setter private VertexIdxLimits globalVerticesIdxLimits; 27 | 28 | public static class VertexIdxLimits { 29 | int fromVariableIndexLimit; 30 | int toVariableIndexLimit; 31 | ReentrantLock lock = new ReentrantLock(); 32 | } 33 | 34 | /** 35 | * Constructs a {@link Scan} operator. 36 | * 37 | * @param outputSubgraph The subgraph, with one query relation, matched by the output tuples. 38 | */ 39 | ScanBlocking(QueryGraph outputSubgraph) { 40 | super(outputSubgraph); 41 | } 42 | 43 | /** 44 | * @see Operator#init(int[], Graph, KeyStore) 45 | */ 46 | @Override 47 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 48 | this.probeTuple = probeTuple; 49 | this.vertexIds = graph.getVertexIds(); 50 | this.vertexTypes = graph.getVertexTypes(); 51 | this.fwdAdjList = graph.getFwdAdjLists(); 52 | if (graph.isAdjListSortedByType()) { 53 | labelOrToType = toType; 54 | toType = KeyStore.ANY; 55 | } 56 | if (KeyStore.ANY != fromType) { 57 | currFromIdx = graph.getVertexTypeOffsets()[fromType]; 58 | highestFromIdx = graph.getVertexTypeOffsets()[fromType + 1]; 59 | } else { 60 | currFromIdx = 0; 61 | highestFromIdx = graph.getHighestVertexId() + 1; 62 | } 63 | currToIdx = fwdAdjList[vertexIds[currFromIdx]].getLabelOrTypeOffsets()[labelOrToType]; 64 | highestToIdx = fwdAdjList[vertexIds[highestFromIdx - 1]].getLabelOrTypeOffsets()[ 65 | labelOrToType + 1]; 66 | globalVerticesIdxLimits.fromVariableIndexLimit = currFromIdx; 67 | globalVerticesIdxLimits.toVariableIndexLimit = currToIdx; 68 | for (var nextOperator : next) { 69 | nextOperator.init(probeTuple, graph, store); 70 | } 71 | } 72 | 73 | /** 74 | * @see Operator#execute() 75 | */ 76 | @Override 77 | public void execute() throws LimitExceededException { 78 | updateIndicesLimits(); 79 | while (currFromIdx < highestFromIdx - 1 || 80 | (currFromIdx == highestFromIdx - 1 && currToIdx < highestToIdx)) { 81 | if (currFromIdx == fromIdxLimit) { 82 | produceNewEdges(currFromIdx, currToIdx, toIdxLimit); 83 | } else if (currFromIdx < fromIdxLimit) { 84 | produceNewEdges(currFromIdx, currToIdx, fwdAdjList[vertexIds[currFromIdx]]. 85 | getLabelOrTypeOffsets()[labelOrToType + 1]); 86 | for (var fromIdx = currFromIdx + 1; fromIdx < fromIdxLimit; fromIdx++) { 87 | var adjList = fwdAdjList[vertexIds[fromIdx]]; 88 | produceNewEdges(fromIdx, adjList.getLabelOrTypeOffsets()[labelOrToType], 89 | adjList.getLabelOrTypeOffsets()[labelOrToType + 1]); 90 | } 91 | produceNewEdges(fromIdxLimit, fwdAdjList[vertexIds[fromIdxLimit]]. 92 | getLabelOrTypeOffsets()[labelOrToType], toIdxLimit); 93 | } 94 | updateIndicesLimits(); 95 | } 96 | } 97 | 98 | private void produceNewEdges(int fromIdx, int startToIdx, int endToIdx) 99 | throws LimitExceededException { 100 | probeTuple[0] = vertexIds[fromIdx]; 101 | for (var toIdx = startToIdx; toIdx < endToIdx; toIdx++) { 102 | probeTuple[1] = fwdAdjList[probeTuple[0]].getNeighbourId(toIdx); 103 | if (toType == KeyStore.ANY || vertexTypes[probeTuple[1]] == toType) { 104 | numOutTuples++; 105 | next[0].processNewTuple(); 106 | } 107 | } 108 | } 109 | 110 | private void updateIndicesLimits() { 111 | globalVerticesIdxLimits.lock.lock(); 112 | try { 113 | fromIdxLimit = currFromIdx = globalVerticesIdxLimits.fromVariableIndexLimit; 114 | toIdxLimit = currToIdx = globalVerticesIdxLimits.toVariableIndexLimit; 115 | var numEdgesLeft = PARTITION_SIZE; 116 | while (numEdgesLeft > 0 && (fromIdxLimit < highestFromIdx || 117 | (fromIdxLimit == highestFromIdx - 1 && toIdxLimit < highestToIdx))) { 118 | var toLimit = fwdAdjList[vertexIds[fromIdxLimit]].getLabelOrTypeOffsets()[ 119 | labelOrToType + 1]; 120 | if (toIdxLimit + numEdgesLeft < toLimit) { 121 | toIdxLimit += numEdgesLeft; 122 | numEdgesLeft = 0; 123 | } else { // toIdxLimit + numEdgesLeft >= toLimit 124 | numEdgesLeft -= (toLimit - toIdxLimit + 1); 125 | toIdxLimit = toLimit; 126 | if (fromIdxLimit == highestFromIdx - 1) { 127 | break; 128 | } 129 | fromIdxLimit += 1; 130 | toIdxLimit = fwdAdjList[vertexIds[fromIdxLimit]].getLabelOrTypeOffsets()[ 131 | labelOrToType]; 132 | } 133 | } 134 | globalVerticesIdxLimits.fromVariableIndexLimit = fromIdxLimit; 135 | globalVerticesIdxLimits.toVariableIndexLimit = toIdxLimit; 136 | } finally { 137 | globalVerticesIdxLimits.lock.unlock(); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/scan/ScanSampling.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.scan; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | 8 | import java.util.List; 9 | import java.util.NoSuchElementException; 10 | import java.util.Random; 11 | import java.util.concurrent.BlockingQueue; 12 | import java.util.concurrent.LinkedBlockingQueue; 13 | 14 | /** 15 | * Samples edges from an array of edges. Edges are pushed to the next operators one at a time. 16 | */ 17 | public class ScanSampling extends Scan { 18 | 19 | private BlockingQueue edgesQueue; 20 | 21 | /** 22 | * Constructs a {@link ScanSampling} operator. 23 | * 24 | * @param outSubgraph is the subgraph, with one query edge, matched by the output tuples. 25 | */ 26 | public ScanSampling(QueryGraph outSubgraph) { 27 | super(outSubgraph); 28 | } 29 | 30 | /** 31 | * @see Operator#init(int[], Graph, KeyStore) 32 | */ 33 | @Override 34 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 35 | if (null == this.probeTuple) { 36 | this.probeTuple = probeTuple; 37 | for (var nextOperator : next) { 38 | nextOperator.init(probeTuple, graph, store); 39 | } 40 | } 41 | } 42 | 43 | /** 44 | * @see Operator#execute(). 45 | */ 46 | @Override 47 | public void execute() throws LimitExceededException { 48 | try { 49 | while (true) { 50 | var edge = edgesQueue.remove(); // NoSuchElementException if empty. 51 | probeTuple[0] = edge[0]; 52 | probeTuple[1] = edge[1]; 53 | numOutTuples++; 54 | for (var nextOperator : next) { 55 | nextOperator.processNewTuple(); 56 | } 57 | } 58 | } catch (NoSuchElementException e) { 59 | // queue is empty. 60 | } 61 | } 62 | 63 | /** 64 | * @param edges is a list of edges to sample from. 65 | * @param numEdgesToSample is the number of edges to sample. 66 | */ 67 | public void setEdgeIndicesToSample(int[] edges, int numEdgesToSample) { 68 | var randomNumGen = new Random(0 /*Always same seed for reproducibility*/); 69 | var numEdges = edges.length / 2; 70 | edgesQueue = new LinkedBlockingQueue<>(numEdgesToSample); 71 | while (edgesQueue.size() < numEdgesToSample) { 72 | var edgeIdx = randomNumGen.nextInt(numEdges); 73 | edgesQueue.add(new int[] { 74 | edges[edgeIdx * 2] /* fromVertex */, 75 | edges[edgeIdx * 2 + 1] /* toVertex */ 76 | }); 77 | } 78 | } 79 | 80 | /** 81 | * @param edges is a list of edges to sample from. 82 | * @param numEdgesToSample is the number of edges to sample. 83 | */ 84 | public void setEdgeIndicesToSample(List edges, int numEdgesToSample) { 85 | var randomNumGen = new Random(0 /*Always same seed for reproducibility*/); 86 | edgesQueue = new LinkedBlockingQueue<>(numEdgesToSample); 87 | while (edgesQueue.size() < numEdgesToSample) { 88 | var edgeIdx = randomNumGen.nextInt(edges.size()); 89 | edgesQueue.add(edges.get(edgeIdx)); 90 | } 91 | } 92 | 93 | /** 94 | * @see Operator#copy() 95 | */ 96 | @Override 97 | public ScanSampling copy() { 98 | var copy = new ScanSampling(outSubgraph); 99 | if (null != next) { 100 | var nextCopy = new Operator[next.length]; 101 | for (var i = 0; i < next.length; i++) { 102 | nextCopy[i] = next[i].copy(); 103 | } 104 | copy.setNext(nextCopy); 105 | for (var nextOp : nextCopy) { 106 | nextOp.setPrev(copy); 107 | } 108 | } 109 | copy.edgesQueue = edgesQueue; 110 | return copy; 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/sink/Sink.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.sink; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | 8 | /** 9 | * An base sink collecting the output results from the dataflow acting as a count(*). 10 | */ 11 | public class Sink extends Operator { 12 | 13 | /** 14 | * The different types of sink operators. 15 | */ 16 | public enum SinkType { 17 | LIMIT, 18 | COUNTER /* default */ 19 | } 20 | 21 | public Operator[] previous; 22 | 23 | /** 24 | * Constructs a {@link Sink} object. 25 | * 26 | * @param queryGraph is the {@link QueryGraph}, the tuples in the sink match. 27 | */ 28 | public Sink(QueryGraph queryGraph) { 29 | super(queryGraph, queryGraph); 30 | } 31 | 32 | /** 33 | * @see Operator#init(int[], Graph, KeyStore) 34 | */ 35 | @Override 36 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 37 | if (null == this.probeTuple) { 38 | this.probeTuple = probeTuple; 39 | } 40 | } 41 | 42 | /** 43 | * Executes the operator. 44 | */ 45 | public void execute() throws LimitExceededException { 46 | if (null != previous) { 47 | previous[0].execute(); 48 | } else { 49 | prev.execute(); 50 | } 51 | } 52 | 53 | /** 54 | * @see Operator#processNewTuple() 55 | */ 56 | @Override 57 | public void processNewTuple() throws LimitExceededException {} 58 | 59 | @Override 60 | public long getNumOutTuples() { 61 | if (null != previous) { 62 | var numOutTuples = 0; 63 | for (var previousOperator : previous) { 64 | numOutTuples += previousOperator.getNumOutTuples(); 65 | } 66 | return numOutTuples; 67 | } 68 | return prev.getNumOutTuples(); 69 | } 70 | 71 | /** 72 | * @see Sink#isSameAs(Operator) 73 | */ 74 | @Override 75 | public boolean isSameAs(Operator operator) { 76 | return operator instanceof Sink && 77 | this.getPrev().isSameAs(operator.getPrev()); 78 | } 79 | 80 | /** 81 | * @see Operator#copy(boolean) 82 | */ 83 | @Override 84 | public Sink copy(boolean isThreadSafe) { 85 | var sink = new Sink(outSubgraph); 86 | sink.prev = this.prev.copy(isThreadSafe); 87 | return sink; 88 | } 89 | 90 | /** 91 | * @see Operator#copy(boolean) 92 | */ 93 | @Override 94 | public Sink copy() { 95 | return new Sink(outSubgraph); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/plan/operator/sink/SinkLimit.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.plan.operator.sink; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.util.IOUtils; 6 | import lombok.Getter; 7 | import lombok.Setter; 8 | 9 | /** 10 | * A sink operator stopping the query plan execution once a number of output tuples is reached. 11 | */ 12 | public class SinkLimit extends Sink { 13 | 14 | @Setter public long startTime; 15 | @Getter public double elapsedTime; 16 | @Setter public int outTuplesLimit; 17 | 18 | /** 19 | * Constructs a {@link SinkLimit} object. 20 | * 21 | * @param queryGraph is the {@link QueryGraph}, the tuples in the sink match. 22 | * @param outTuplesLimit is the number of output tuples the query is limited to. 23 | */ 24 | public SinkLimit(QueryGraph queryGraph, int outTuplesLimit) { 25 | super(queryGraph); 26 | this.outTuplesLimit = outTuplesLimit; 27 | } 28 | 29 | /** 30 | * @see Operator#processNewTuple() 31 | */ 32 | @Override 33 | public void processNewTuple() throws LimitExceededException { 34 | if (prev.getNumOutTuples() >= outTuplesLimit) { 35 | elapsedTime = IOUtils.getElapsedTimeInMillis(startTime); 36 | throw new LimitExceededException(); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/planner/QueryPlannerBig.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.planner; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.Plan; 4 | import ca.waterloo.dsg.graphflow.plan.operator.scan.Scan; 5 | import ca.waterloo.dsg.graphflow.plan.operator.sink.Sink.SinkType; 6 | import ca.waterloo.dsg.graphflow.planner.catalog.Catalog; 7 | import ca.waterloo.dsg.graphflow.query.QueryEdge; 8 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 9 | import ca.waterloo.dsg.graphflow.storage.Graph; 10 | 11 | import java.util.ArrayList; 12 | import java.util.HashMap; 13 | import java.util.HashSet; 14 | import java.util.List; 15 | import java.util.Map; 16 | import java.util.Set; 17 | import java.util.stream.Collectors; 18 | 19 | /** 20 | * Generates a {@link Plan}. The intersection cost (ICost) is used as a metric of the 21 | * optimization and multiple heuristics are used to reduce the search space: 22 | * (1) Does not consider hash joins. 23 | * (2) Considered a sample equal to 10 of the edges to scan. The ones with least selectivity. 24 | * (3) Considers the next possible extension for query vertices with highest number of ALDs. 25 | * (4) Does not consider 'interesting orders' for better possible caching. 26 | */ 27 | public class QueryPlannerBig extends QueryPlanner { 28 | 29 | public static int NUM_TOP_PLANS_KEPT = 5; 30 | 31 | private Map> subgraphPlans; 33 | 34 | /** 35 | * @see QueryPlanner#QueryPlanner(QueryGraph, Catalog, Graph) 36 | */ 37 | public QueryPlannerBig(QueryGraph queryGraph, Catalog catalog, Graph graph) { 38 | super(queryGraph, catalog, graph); 39 | subgraphPlans = new HashMap<>(); 40 | if (numVertices >= 15) { 41 | NUM_TOP_PLANS_KEPT = 3; 42 | } 43 | } 44 | 45 | /** 46 | * Returns based on the optimizer the 'best' {@link Plan} to evaluate a given 47 | * {@link QueryGraph}. 48 | * 49 | * @return The generated {@link Plan} to evaluate the input query graph. 50 | */ 51 | @Override 52 | public Plan plan() { 53 | considerLeastSelectiveScans(); 54 | while (nextNumQVertices <= numVertices) { 55 | considerNextQueryExtensions(); 56 | nextNumQVertices++; 57 | } 58 | var bestPlan = subgraphPlans.get(numVertices).get(0); 59 | for (var i = 1; i < subgraphPlans.get(numVertices).size(); i++) { 60 | var plan = subgraphPlans.get(numVertices).get(1); 61 | if (bestPlan.getEstimatedICost() > plan.getEstimatedICost()) { 62 | bestPlan = plan; 63 | } 64 | } 65 | // each operator added only sets its prev pointer (to reuse operator objects). 66 | // the picked plan needs to set the next pointer for each operator in the linear subplans. 67 | setNextPointers(bestPlan); 68 | if (queryGraph.getLimit() > 0) { 69 | bestPlan.setSinkType(SinkType.LIMIT); 70 | bestPlan.setOutTuplesLimit(queryGraph.getLimit()); 71 | } 72 | return bestPlan; 73 | } 74 | 75 | private void considerLeastSelectiveScans() { 76 | nextNumQVertices = 2; /* level = 2 for edge scan */ 77 | subgraphPlans.putIfAbsent(nextNumQVertices, new ArrayList<>(NUM_TOP_PLANS_KEPT)); 78 | var edgesToScan = new QueryEdge[NUM_TOP_PLANS_KEPT]; 79 | var numEdgesToScan = new int[NUM_TOP_PLANS_KEPT]; 80 | var qEdges = queryGraph.getEdges(); 81 | for (var i = 0; i < NUM_TOP_PLANS_KEPT; i++) { 82 | 83 | edgesToScan[i] = qEdges.get(i); 84 | numEdgesToScan[i] = getNumEdges(qEdges.get(i)); 85 | } 86 | outer: for (var i = NUM_TOP_PLANS_KEPT; i < qEdges.size(); i++) { 87 | var numEdges = getNumEdges(qEdges.get(i)); 88 | for (var j = 0; j < NUM_TOP_PLANS_KEPT; j++) { 89 | if (numEdges < numEdgesToScan[j]) { 90 | edgesToScan[j] = qEdges.get(i); 91 | numEdgesToScan[j] = numEdges; 92 | continue outer; 93 | } 94 | } 95 | } 96 | for (var i = 0; i < NUM_TOP_PLANS_KEPT; i++) { 97 | var outputSubgraph = new QueryGraph(); 98 | outputSubgraph.addEdge(edgesToScan[i]); 99 | var scan = new Scan(outputSubgraph); 100 | var queryPlan = new Plan(scan, numEdgesToScan[i]); 101 | subgraphPlans.get(nextNumQVertices).add(queryPlan); 102 | } 103 | nextNumQVertices = 3; 104 | } 105 | 106 | private void considerNextQueryExtensions() { 107 | var prevNumQVertices = nextNumQVertices - 1; 108 | var newQueryPlans = new ArrayList(NUM_TOP_PLANS_KEPT); 109 | for (var prevQueryPlan : subgraphPlans.get(prevNumQVertices)) { 110 | var prevQVertices = prevQueryPlan.getLastOperator().getOutSubgraph().getQVertices(); 111 | var toQVertices = queryGraph.getNeighbors(new HashSet<>(prevQVertices)); 112 | var inSubgraph = prevQueryPlan.getLastOperator().getOutSubgraph(); 113 | var nextToQVertices = filterToQVerticesByMaxNumALDs(toQVertices, inSubgraph); 114 | for (var toQVertex : nextToQVertices) { 115 | var keyAndPlan = getPlanWithNextExtend(prevQueryPlan, toQVertex); 116 | var icost = keyAndPlan.b.getEstimatedICost(); 117 | if (newQueryPlans.size() < NUM_TOP_PLANS_KEPT) { 118 | newQueryPlans.add(keyAndPlan.b); 119 | } else { 120 | for (int i = 0; i < NUM_TOP_PLANS_KEPT; i++) { 121 | if (newQueryPlans.get(i).getEstimatedICost() > icost) { 122 | newQueryPlans.set(i, keyAndPlan.b); 123 | } 124 | } 125 | } 126 | } 127 | } 128 | subgraphPlans.put(nextNumQVertices, newQueryPlans); 129 | if (!hasLimit && nextNumQVertices >= 4) { 130 | // TODO: is this necessary?! 131 | for (var queryPlans : subgraphPlans.get(nextNumQVertices)) { 132 | var outSubgraph = queryPlans.getLastOperator().getOutSubgraph(); 133 | // considerAllNextHashJoinOperators(outSubgraph); 134 | } 135 | } 136 | } 137 | 138 | private Set filterToQVerticesByMaxNumALDs(Set toQVertices, 139 | QueryGraph inSubgraph) { 140 | var maxNumALDs = Integer.MIN_VALUE; 141 | Map toQVertexToNumALDsMap = new HashMap<>(); 142 | for (var toQVertex : toQVertices) { 143 | var numALDs = 0; 144 | for (var fromQVertex : inSubgraph.getQVertices()) { 145 | if (queryGraph.containsQueryEdge(fromQVertex, toQVertex)) { 146 | numALDs++; 147 | } 148 | } 149 | if (maxNumALDs < numALDs) { 150 | maxNumALDs = numALDs; 151 | } 152 | toQVertexToNumALDsMap.put(toQVertex, numALDs); 153 | } 154 | var finalMaxNumALDs = maxNumALDs; 155 | return toQVertices. 156 | stream(). 157 | filter(toQVertex -> toQVertexToNumALDsMap.get(toQVertex) == finalMaxNumALDs). 158 | collect(Collectors.toSet()); 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/planner/catalog/CatalogFactory.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.planner.catalog; 2 | 3 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 4 | import ca.waterloo.dsg.graphflow.storage.Graph; 5 | import ca.waterloo.dsg.graphflow.util.IOUtils; 6 | 7 | import java.io.IOException; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | /** 12 | * Constructs a {@link Graph} object from CSV and binary data. 13 | */ 14 | public class CatalogFactory { 15 | 16 | /** 17 | * Constructs a {@link Catalog} object from binary serialized data. 18 | * 19 | * @param directory is the directory to deserialize binary data from. 20 | * @return the constructed {@link Catalog} object. 21 | * @throws IOException if stream to file cannot be written to or closed. 22 | * @throws ClassNotFoundException if the object read is from input stream is not found. 23 | */ 24 | @SuppressWarnings("unchecked") // casting. 25 | public Catalog make(String directory) throws IOException, ClassNotFoundException { 26 | var numSampledEdges = (int) IOUtils.deserializeObj(directory + "numSampledEdges"); 27 | var icost = (Map>) IOUtils.deserializeObj( 28 | directory + "icost_" + numSampledEdges); 29 | var cardinality = (Map>) IOUtils.deserializeObj( 30 | directory + "selectivity_" + numSampledEdges); 31 | var inSubgraphs = (List) IOUtils.deserializeObj(directory + "inSubgraphs"); 32 | var catalog = new Catalog(icost, cardinality, inSubgraphs); 33 | catalog.setAdjListSortedByType((boolean) IOUtils.deserializeObj(directory + 34 | "isAdjListSortedByType")); 35 | catalog.setNumSampledEdges(numSampledEdges); 36 | catalog.setMaxInputNumVertices((int) IOUtils.deserializeObj(directory + 37 | "maxInputNumVertices")); 38 | return catalog; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/planner/catalog/operator/IntersectCatalog.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.planner.catalog.operator; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.AdjListDescriptor; 4 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 5 | import ca.waterloo.dsg.graphflow.plan.operator.extend.EI; 6 | import ca.waterloo.dsg.graphflow.plan.operator.extend.Intersect; 7 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 8 | import ca.waterloo.dsg.graphflow.storage.Graph; 9 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 10 | 11 | import java.io.Serializable; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | /** 16 | * Given a set of input tuples from the prev {@link Operator}, E/I extends the tuples by one query 17 | * vertex. 18 | */ 19 | public class IntersectCatalog extends Intersect implements Serializable { 20 | 21 | private boolean isAdjListSortedByType; 22 | private long lastIcost = 0; 23 | 24 | /** 25 | * @see EI#make(String, short, List, QueryGraph, QueryGraph, Map) 26 | */ 27 | public IntersectCatalog(String toQVertex, short toType, List ALDs, 28 | QueryGraph outSubgraph, QueryGraph inSubgraph, Map outQVertexToIdxMap, 29 | boolean isAdjListSortedByType) { 30 | super(toQVertex, toType, ALDs, outSubgraph, inSubgraph, outQVertexToIdxMap); 31 | this.isAdjListSortedByType = isAdjListSortedByType; 32 | } 33 | 34 | /** 35 | * @see Operator#init(int[], Graph, KeyStore) 36 | */ 37 | @Override 38 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 39 | var type = toType; 40 | super.init(probeTuple, graph, store); 41 | toType = type; 42 | } 43 | 44 | /** 45 | * @see Operator#processNewTuple() 46 | */ 47 | @Override 48 | public void processNewTuple() throws LimitExceededException { 49 | if (1 == ALDs.size()) { 50 | // intersect the adjacency lists and setAdjListSortOrder the output vertex values. 51 | adjListsToCache[0][probeTuple[vertexIdxToCache[0]]].setNeighbourIds( 52 | labelsOrToTypesToCache[0], outNeighbours); 53 | icost += outNeighbours.endIdx - outNeighbours.startIdx; 54 | } else { 55 | // intersect the adjacency lists and setAdjListSortOrder the output vertex values. 56 | Neighbours temp; 57 | if (cachingType == CachingType.NONE || !isIntersectionCached()) { 58 | adjListsToCache[0][probeTuple[vertexIdxToCache[0]]].setNeighbourIds( 59 | labelsOrToTypesToCache[0], initNeighbours); 60 | lastIcost = initNeighbours.endIdx - initNeighbours.startIdx; 61 | lastIcost += adjListsToCache[1][probeTuple[vertexIdxToCache[1]]].intersect( 62 | labelsOrToTypesToCache[1], initNeighbours, cachedNeighbours); 63 | if (toType != KeyStore.ANY) { 64 | var currEndIdx = 0; 65 | for (var i = cachedNeighbours.startIdx; i < cachedNeighbours.endIdx; i++) { 66 | if (vertexTypes[cachedNeighbours.Ids[i]] == toType) { 67 | cachedNeighbours.Ids[currEndIdx++] = cachedNeighbours.Ids[i]; 68 | } 69 | } 70 | cachedNeighbours.endIdx = currEndIdx; 71 | } 72 | for (var i = 2; i < adjListsToCache.length; i++) { 73 | temp = cachedNeighbours; 74 | cachedNeighbours = tempNeighbours; 75 | tempNeighbours = temp; 76 | lastIcost += adjListsToCache[i][probeTuple[vertexIdxToCache[i]]].intersect( 77 | labelsOrToTypesToCache[i], tempNeighbours, cachedNeighbours); 78 | } 79 | } 80 | switch (cachingType) { 81 | case NONE: 82 | case FULL_CACHING: 83 | icost += lastIcost; 84 | outNeighbours = cachedNeighbours; 85 | break; 86 | case PARTIAL_CACHING: 87 | icost += adjLists[0][probeTuple[vertexIdx[0]]].intersect( 88 | labelsOrToTypes[0], cachedNeighbours, outNeighbours); 89 | for (int i = 1; i < adjLists.length; i++) { 90 | temp = outNeighbours; 91 | outNeighbours = tempNeighbours; 92 | tempNeighbours = temp; 93 | icost += adjLists[i][probeTuple[vertexIdx[i]]].intersect( 94 | labelsOrToTypes[i], tempNeighbours, outNeighbours); 95 | } 96 | break; 97 | } 98 | } 99 | 100 | for (var idx = outNeighbours.startIdx; idx < outNeighbours.endIdx; idx++) { 101 | probeTuple[outIdx] = outNeighbours.Ids[idx]; 102 | numOutTuples++; 103 | if (isAdjListSortedByType) { 104 | next[0].processNewTuple(); 105 | } else { 106 | next[vertexTypes[probeTuple[outIdx]]].processNewTuple(); 107 | } 108 | } 109 | } 110 | 111 | /** 112 | * @see Operator#copy() 113 | */ 114 | @Override 115 | public IntersectCatalog copy() { 116 | var copy = new IntersectCatalog(toQueryVertex, toType, ALDs, outSubgraph, inSubgraph, 117 | outQVertexToIdxMap, isAdjListSortedByType); 118 | if (null != next) { 119 | var nextCopy = new Operator[next.length]; 120 | for (var i = 0; i < next.length; i++) { 121 | nextCopy[i] = next[i].copy(); 122 | } 123 | copy.setNext(nextCopy); 124 | for (var nextOp : nextCopy) { 125 | nextOp.setPrev(copy); 126 | } 127 | } 128 | return copy; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/planner/catalog/operator/Noop.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.planner.catalog.operator; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.Operator; 4 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | 8 | public class Noop extends Operator { 9 | 10 | /** 11 | * @param queryGraph is the input and output {@link QueryGraph}. 12 | */ 13 | public Noop(QueryGraph queryGraph) { 14 | super(queryGraph, queryGraph); 15 | } 16 | 17 | /** 18 | * @see Operator#init(int[], Graph, KeyStore) 19 | */ 20 | @Override 21 | public void init(int[] probeTuple, Graph graph, KeyStore store) { 22 | this.probeTuple = probeTuple; 23 | for (var nextOperator : next) { 24 | nextOperator.init(probeTuple, graph, store); 25 | } 26 | } 27 | 28 | /** 29 | * @see Operator#processNewTuple() 30 | */ 31 | @Override 32 | public void processNewTuple() throws LimitExceededException { 33 | numOutTuples++; 34 | for (var nextOperator : next) { 35 | nextOperator.processNewTuple(); 36 | } 37 | } 38 | 39 | /** 40 | * @see Operator#copy() 41 | */ 42 | @Override 43 | public Noop copy() { 44 | var copy = new Noop(outSubgraph); 45 | if (null != next) { 46 | var nextCopy = new Operator[next.length]; 47 | for (var i = 0; i < next.length; i++) { 48 | nextCopy[i] = next[i].copy(); 49 | } 50 | copy.setNext(nextCopy); 51 | for (var nextOp : nextCopy) { 52 | nextOp.setPrev(copy); 53 | } 54 | } 55 | return copy; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/query/QueryEdge.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.query; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.io.Serializable; 7 | 8 | /** 9 | * Represents a query edge. 10 | */ 11 | public class QueryEdge implements Serializable { 12 | 13 | @Getter private String fromVertex; 14 | @Getter private String toVertex; 15 | @Getter @Setter private short fromType; 16 | @Getter @Setter private short toType; 17 | @Getter @Setter private short label; 18 | 19 | /** 20 | * Constructs a {@link QueryEdge} object. 21 | * 22 | * @param fromVertex is the from query vertex of the query edge. 23 | * @param toVertex is the to query vertex of the query edge. 24 | * @param fromType is the from query vertex type. 25 | * @param toType is the to query vertex type. 26 | * @param label is the query edge label. 27 | */ 28 | public QueryEdge(String fromVertex, String toVertex, short fromType, short toType, 29 | short label) { 30 | this.fromVertex = fromVertex; 31 | this.toVertex = toVertex; 32 | this.fromType = fromType; 33 | this.toType = toType; 34 | this.label = label; 35 | } 36 | 37 | /** 38 | * Constructs a {@link QueryEdge} object. 39 | * 40 | * @param fromQVertex is the from query vertex of the query edge. 41 | * @param toQVertex is the to query vertex of the query edge. 42 | */ 43 | public QueryEdge(String fromQVertex, String toQVertex) { 44 | this(fromQVertex, toQVertex, (short) 0 /* fromType */, (short) 0 /* toType */, 45 | (short) 0 /* label */); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/query/QueryGraphSet.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.query; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedHashMap; 5 | import java.util.LinkedHashSet; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | /** 11 | * A setAdjListSortOrder of {@link QueryGraph}s. 12 | */ 13 | public class QueryGraphSet { 14 | 15 | private Map> queryGraphs = new LinkedHashMap<>(); 16 | 17 | /** 18 | * @param queryGraph The {@link QueryGraph} to append to the set. 19 | */ 20 | public void add(QueryGraph queryGraph) { 21 | var encoding = queryGraph.getEncoding(); 22 | queryGraphs.putIfAbsent(encoding, new ArrayList<>()); 23 | queryGraphs.get(encoding).add(queryGraph); 24 | } 25 | 26 | /** 27 | * Checks if an isomorphic query graph is in the setAdjListSortOrder. 28 | * 29 | * @param queryGraph The {@link QueryGraph} to check. 30 | * @return True if an isomorphic {@link QueryGraph} is in the set. False, otherwise. 31 | */ 32 | public boolean contains(QueryGraph queryGraph) { 33 | var queryGraphs = this.queryGraphs.get(queryGraph.getEncoding()); 34 | if (null == queryGraphs) { 35 | return false; 36 | } 37 | for (var otherQueryGraph : queryGraphs) { 38 | if (queryGraph.isIsomorphicTo(otherQueryGraph)) { 39 | return true; 40 | } 41 | } 42 | return false; 43 | } 44 | 45 | /** 46 | * @return The sorted {@link QueryGraph} setAdjListSortOrder. 47 | */ 48 | public Set getQueryGraphSet() { 49 | var queryGraphSet = new LinkedHashSet(); 50 | for (var encoding : queryGraphs.keySet()) { 51 | queryGraphSet.addAll(queryGraphs.get(encoding)); 52 | } 53 | return queryGraphSet; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/query/parser/AntlrErrorListener.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.query.parser; 2 | 3 | import org.antlr.v4.runtime.BaseErrorListener; 4 | import org.antlr.v4.runtime.Parser; 5 | import org.antlr.v4.runtime.RecognitionException; 6 | import org.antlr.v4.runtime.Recognizer; 7 | import org.antlr.v4.runtime.atn.ATNConfigSet; 8 | import org.antlr.v4.runtime.dfa.DFA; 9 | import org.antlr.v4.runtime.misc.ParseCancellationException; 10 | 11 | import java.util.BitSet; 12 | 13 | /** 14 | * This class is used to throw parseAntlr exceptions. 15 | */ 16 | public class AntlrErrorListener extends BaseErrorListener { 17 | 18 | static final AntlrErrorListener INSTANCE = new AntlrErrorListener(); 19 | 20 | @Override 21 | public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int 22 | charPositionInLine, String msg, RecognitionException e) throws ParseCancellationException { 23 | throw new ParseCancellationException("line " + line + ":" + charPositionInLine + " " + msg); 24 | } 25 | 26 | @Override 27 | public void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, int stopIndex, 28 | boolean exact, BitSet ambigAlts, ATNConfigSet configs) throws ParseCancellationException { 29 | throw new ParseCancellationException("Ambiguity Exception startIdx:stopIndex=" + 30 | startIndex + ":" + stopIndex); 31 | } 32 | 33 | @Override 34 | public void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, int 35 | stopIndex, BitSet conflictingAlts, ATNConfigSet configs) throws ParseCancellationException { 36 | throw new ParseCancellationException("AttemptingFullContext Exception " + 37 | "startIdx:stopIndex=" + startIndex + ":" + stopIndex); 38 | } 39 | 40 | @Override 41 | public void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, int 42 | stopIndex, int prediction, ATNConfigSet configs) throws ParseCancellationException { 43 | throw new ParseCancellationException("ContextSensitivity Exception startIdx:stopIndex=" 44 | + startIndex + ":" + stopIndex); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/query/parser/ParseTreeVisitor.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.query.parser; 2 | 3 | import ca.waterloo.dsg.graphflow.grammar.GraphflowBaseVisitor; 4 | import ca.waterloo.dsg.graphflow.grammar.GraphflowParser.EdgeContext; 5 | import ca.waterloo.dsg.graphflow.grammar.GraphflowParser.GraphflowContext; 6 | import ca.waterloo.dsg.graphflow.grammar.GraphflowParser.MatchPatternContext; 7 | import ca.waterloo.dsg.graphflow.query.QueryEdge; 8 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 9 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 10 | 11 | /** 12 | * This class implements the ANTLR4 methods used to traverse the parseAntlr tree and return a 13 | * {@link QueryGraph} object. 14 | */ 15 | public class ParseTreeVisitor extends GraphflowBaseVisitor { 16 | 17 | private KeyStore store; 18 | 19 | /** 20 | * Constructs a {@link ParseTreeVisitor} object. 21 | * 22 | * @param store The type and label key store. 23 | */ 24 | ParseTreeVisitor(KeyStore store) { 25 | this.store = store; 26 | } 27 | 28 | @Override 29 | public QueryGraph visitGraphflow(GraphflowContext ctx) { 30 | var queryGraph = visitMatchPattern(ctx.matchPattern()); 31 | if (null != ctx.LIMIT()) { 32 | queryGraph.setLimit(Integer.parseInt(ctx.Digits().getText())); 33 | } 34 | return queryGraph; 35 | } 36 | 37 | @Override 38 | public QueryGraph visitMatchPattern(MatchPatternContext ctx) { 39 | var queryGraph = new QueryGraph(); 40 | for (int i = 0; i < ctx.edge().size(); i++) { 41 | visitEdge(ctx.edge(i), queryGraph); 42 | } 43 | for (var queryEdge : queryGraph.getEdges()) { 44 | queryEdge.setFromType(queryGraph.getVertexType(queryEdge.getFromVertex())); 45 | queryEdge.setToType(queryGraph.getVertexType(queryEdge.getToVertex())); 46 | } 47 | return queryGraph; 48 | } 49 | 50 | private void visitEdge(EdgeContext ctx, QueryGraph queryGraph) { 51 | var fromQVertex = ctx.vertex(0).variable().getText(); 52 | var toQVertex = ctx.vertex(1).variable().getText(); 53 | var queryEdge = new QueryEdge(fromQVertex, toQVertex); 54 | if (null != ctx.vertex(0).type()) { 55 | var fromType = ctx.vertex(0).type().variable().getText(); 56 | queryEdge.setFromType(store.getTypeKeyAsShort(fromType)); 57 | } else if (queryGraph.getVertexToTypeMap().containsKey(fromQVertex)) { 58 | queryEdge.setFromType(queryGraph.getVertexToTypeMap().get(fromQVertex)); 59 | } 60 | if (null != ctx.vertex(1).type()) { 61 | var toType = ctx.vertex(1).type().variable().getText(); 62 | queryEdge.setToType(store.getTypeKeyAsShort(toType)); 63 | } else if (queryGraph.getVertexToTypeMap().containsKey(toQVertex)) { 64 | queryEdge.setToType(queryGraph.getVertexToTypeMap().get(toQVertex)); 65 | } 66 | if (null != ctx.label()) { 67 | var label = ctx.label().variable().getText(); 68 | queryEdge.setLabel(store.getLabelKeyAsShort(label)); 69 | } 70 | queryGraph.addEdge(queryEdge); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/query/parser/QueryParser.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.query.parser; 2 | 3 | import ca.waterloo.dsg.graphflow.grammar.GraphflowLexer; 4 | import ca.waterloo.dsg.graphflow.grammar.GraphflowParser; 5 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 6 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 7 | import org.antlr.v4.runtime.CharStreams; 8 | import org.antlr.v4.runtime.CommonTokenStream; 9 | import org.antlr.v4.runtime.misc.ParseCancellationException; 10 | import org.apache.logging.log4j.LogManager; 11 | import org.apache.logging.log4j.Logger; 12 | 13 | /** 14 | * Converts a raw query string into a {@code ParsedQuery} object. 15 | */ 16 | public class QueryParser { 17 | 18 | private static final Logger logger = LogManager.getLogger(QueryParser.class); 19 | 20 | /** 21 | * Parses the user query to obtain a {@link QueryGraph} object. 22 | * 23 | * @param query is the user query. 24 | * @return The parsed {@link QueryGraph}. 25 | */ 26 | public static QueryGraph parse(String query, KeyStore store) { 27 | QueryGraph queryGraph; 28 | try { 29 | queryGraph = parseAntlr(query + ";", store); 30 | } catch (ParseCancellationException e) { 31 | logger.debug("ERROR parsing: " + e.getMessage()); 32 | return null; 33 | } 34 | if (queryGraph == null) { 35 | logger.debug("queryGraph not parsed properly."); 36 | return null; 37 | } 38 | return queryGraph; 39 | } 40 | 41 | private static QueryGraph parseAntlr(String query, KeyStore store) 42 | throws ParseCancellationException { 43 | var lexer = new GraphflowLexer(CharStreams.fromString(query)); 44 | lexer.removeErrorListeners(); // Remove default listeners first. 45 | lexer.addErrorListener(AntlrErrorListener.INSTANCE); 46 | 47 | var parser = new GraphflowParser(new CommonTokenStream(lexer)); 48 | parser.removeErrorListeners(); // Remove default listeners first. 49 | parser.addErrorListener(AntlrErrorListener.INSTANCE); 50 | 51 | try { 52 | var visitor = new ParseTreeVisitor(store); 53 | return visitor.visit(parser.graphflow() /* parseTree */); 54 | } catch (Exception e) { 55 | throw new ParseCancellationException(e.getMessage()); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/runner/AbstractRunner.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.runner; 2 | 3 | import ca.waterloo.dsg.graphflow.util.IOUtils; 4 | import org.apache.commons.cli.CommandLine; 5 | import org.apache.commons.cli.CommandLineParser; 6 | import org.apache.commons.cli.DefaultParser; 7 | import org.apache.commons.cli.HelpFormatter; 8 | import org.apache.commons.cli.Option; 9 | import org.apache.commons.cli.Options; 10 | import org.apache.commons.cli.ParseException; 11 | import org.apache.logging.log4j.LogManager; 12 | import org.apache.logging.log4j.Logger; 13 | 14 | import java.io.IOException; 15 | 16 | /** 17 | * The base class for shared functionality between Runner classes. 18 | */ 19 | public abstract class AbstractRunner { 20 | 21 | private static final Logger logger = LogManager.getLogger(AbstractRunner.class); 22 | 23 | /** 24 | * sanitizes the string directory from user input and ensures it ends with character '/'. 25 | * Creates the directory if not already created in the file system. 26 | * 27 | * @param userDefinedDirectory The output directory from command line. 28 | */ 29 | protected static String sanitizeDirStrAndMkdirIfNeeded(String userDefinedDirectory) 30 | throws IOException { 31 | String outputDirectory = sanitizeDirStr(userDefinedDirectory); 32 | IOUtils.mkdirs(outputDirectory); 33 | return outputDirectory; 34 | } 35 | 36 | protected static String sanitizeDirStr(String userDefinedDirectory) { 37 | String outputDirectory = userDefinedDirectory; 38 | if (!outputDirectory.equals("") && !outputDirectory.endsWith("/")) { 39 | outputDirectory += "/"; 40 | } 41 | return outputDirectory; 42 | } 43 | 44 | /** 45 | * @param args The supplied command-line arguments. 46 | * @param options The setAdjListSortOrder of {@link Options} to parse the args. 47 | * 48 | * @return A {@link CommandLine} object providing access to the parsed args given the options. 49 | */ 50 | protected static CommandLine parseCmdLine(String[] args, Options options) { 51 | var cmdLineParser = new DefaultParser(); 52 | CommandLine cmdLine = null; 53 | try { 54 | cmdLine = cmdLineParser.parse(options, args); 55 | } catch (ParseException e) { 56 | logger.error("Error parsing cmd line - " + e.getMessage()); 57 | } 58 | return cmdLine; 59 | } 60 | 61 | protected static boolean isAskingHelp(String[] args, Options options) { 62 | try { 63 | if (isAskingHelp(args)) { 64 | HelpFormatter fmt = new HelpFormatter(); 65 | fmt.printHelp("Help", options); 66 | return true; 67 | } 68 | } catch (ParseException e) { 69 | // ignore the parsing error as it is due to not using the proper options. 70 | } 71 | return false; 72 | } 73 | 74 | private static boolean isAskingHelp(String[] args) throws ParseException { 75 | Option helpOption = ArgsFactory.getHelpOption(); 76 | Options options = new Options(); 77 | options.addOption(helpOption); 78 | CommandLineParser parser = new DefaultParser(); 79 | CommandLine cmd = parser.parse(options, args); 80 | return cmd.hasOption(helpOption.getOpt()); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/runner/ArgsFactory.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.runner; 2 | 3 | import org.apache.commons.cli.Option; 4 | 5 | /** 6 | * The class containing all Command Line Options options needed by the runners. 7 | */ 8 | public class ArgsFactory { 9 | 10 | public static Option getHelpOption() { 11 | return new Option("h" /* HELP */, "help", false, "Print this message."); 12 | } 13 | 14 | /* 15 | * Serialize DataSet Runner: 16 | * ~~~~~~~~~~~~~~~~~~~~~~~~~ 17 | * INPUT_FILE_EDGES -e 18 | * EDGES_FILE_SEPARATOR -m 19 | * INPUT_FILE_VERTICES -v 20 | * VERTICES_FILE_SEPARATOR -n 21 | * SERIALIZE_OUTPUT -o 22 | * UNDIRECTED -u 23 | */ 24 | public static String INPUT_FILE_VERTICES = "v"; 25 | public static String INPUT_FILE_EDGES = "e"; 26 | public static String EDGES_FILE_SEPARATOR = "m"; 27 | public static String VERTICES_FILE_SEPARATOR = "n"; 28 | public static String SERIALIZE_OUTPUT = "o"; 29 | public static String UNDIRECTED = "u"; 30 | 31 | public static Option getInputFileEdges() { 32 | var option = new Option(INPUT_FILE_EDGES, "input_file_edges", true /* hasArg */, 33 | "The separator between columns in the input CSV file."); 34 | option.setRequired(true); 35 | return option; 36 | } 37 | 38 | public static Option getOutputDirOption() { 39 | var option = new Option(SERIALIZE_OUTPUT, "output", true, 40 | "Absolute path to serialize the input graph."); 41 | option.setRequired(true); 42 | return option; 43 | } 44 | 45 | public static Option getEdgesFileSeparator() { 46 | return new Option(EDGES_FILE_SEPARATOR, "edge_separator", true /* hasArg */, 47 | "The separator between columns in the input CSV file. The default is set to ','."); 48 | } 49 | 50 | public static Option getInputFileVertices() { 51 | return new Option(INPUT_FILE_VERTICES, "input_file_vertices", true /* hasArg */, 52 | "The absolute path to the vertices csv file."); 53 | } 54 | 55 | public static Option getVerticesFileSeparator() { 56 | return new Option(VERTICES_FILE_SEPARATOR, "vertices_separator", true /* hasArg */, 57 | "The separator between columns in the input CSV file. The default is set to ','."); 58 | } 59 | 60 | /* 61 | * Query Plans Generator: 62 | * ~~~~~~~~~~~~~~~~~~~~~~ 63 | * QUERY -q 64 | * PLAN_CATEGORY -p 65 | * SERIALIZE_OUTPUT -o (same as 'Serialize DataSet Runner') 66 | * SERIALIZE_PLANS -s 67 | * INPUT_FILE_STORE -i 68 | * DIFFERENTIATE_SINGLE_ALD -k 69 | */ 70 | public static String QUERY = "q"; 71 | public static String DISABLE_CACHE = "c"; 72 | public static String PLAN_CATEGORY = "p"; 73 | public static String SERIALIZE_PLANS = "s"; 74 | public static String INPUT_FILE_STORE = "i"; 75 | public static String ENABLE_ADAPTIVITY = "a"; 76 | public static String DIFFERENTIATE_SINGLE_ALD = "k"; 77 | 78 | public static Option getQueryOption() { 79 | var option = new Option(QUERY, "query", true, 80 | "Query graph to evaluate e.g. '(a)->(b)' and '(a)->(b), (b)->(c)'"); 81 | option.setRequired(true); 82 | return option; 83 | } 84 | 85 | public static Option getDisableCachingOption() { 86 | return new Option(DISABLE_CACHE, "cache", false, "Disable caching in intersect operators."); 87 | } 88 | 89 | public static Option getPlanCategoryOption() { 90 | var option = new Option(PLAN_CATEGORY, "plan_category", true, "category of plans to run"); 91 | option.setRequired(true); 92 | return option; 93 | } 94 | 95 | public static Option getSerializeQueryPlansOption() { 96 | return new Option(SERIALIZE_PLANS, "serialize_plans", false, "serialize query plans."); 97 | } 98 | 99 | public static Option getInputFileStoreOption() { 100 | return new Option(INPUT_FILE_STORE, "input_file_store", true /* hasArg */, 101 | "The absolute path to the type and label store serialized file."); 102 | } 103 | 104 | public static Option getEnableAdaptivityOption() { 105 | return new Option(ENABLE_ADAPTIVITY, "enable_adaptivity", false, 106 | "Enable adaptivity in the query plans."); 107 | } 108 | 109 | public static Option getDifferentiateSingleALD() { 110 | return new Option(DIFFERENTIATE_SINGLE_ALD, "differentiate_single_ald", false, 111 | "Enable differentiation between single forward and backward ALD."); 112 | } 113 | 114 | /* 115 | * Query Plan Executor: 116 | * ~~~~~~~~~~~~~~~~~~~~ 117 | * INPUT_GRAPH_DIR -i 118 | * INPUT_SER_PLAN -p 119 | * OUTPUT_FILE -o 120 | * NUM_THREADS -t 121 | * PARTITION_SIZE -s 122 | * DISABLE_FLATTENING -f 123 | * DISABLE_CACHE -c 124 | * ENABLE_ADAPTIVITY -a 125 | * 126 | * Query Plan JSON Executor: 127 | * ~~~~~~~~~~~~~~~~~~~~~~~~~ 128 | * INPUT_GRAPH_DIR -i 129 | * INPUT_JSON_PLAN -j 130 | * NUM_THREADS -t 131 | * PARTITION_SIZE -s 132 | * ENABLE_ADAPTIVITY -a (same as 'Query Plan Executor') 133 | */ 134 | public static String INPUT_GRAPH_DIR = "i"; 135 | public static String INPUT_JSON_PLAN = "j"; 136 | public static String INPUT_SER_PLAN = "p"; 137 | public static String OUTPUT_FILE = "o"; 138 | public static String NUM_THREADS = "t"; 139 | public static String PARTITION_SIZE = "s"; 140 | 141 | public static Option getInputGraphDirectoryOption() { 142 | var option = new Option(INPUT_GRAPH_DIR, "input_graph_dir", true, 143 | "Absolute path to the directory of the serialized input graph."); 144 | option.setRequired(true); 145 | return option; 146 | } 147 | 148 | public static Option getInputSerializedPlanOption() { 149 | var option = new Option(INPUT_SER_PLAN, "input_ser_plan", true, 150 | "Query graph to evaluate e.g. '(a)->(b)' and '(a)->(b), (b)->(c)'"); 151 | option.setRequired(true); 152 | return option; 153 | } 154 | 155 | public static Option getInputJSONPlanOption() { 156 | var option = new Option(INPUT_JSON_PLAN, "input_json_plan", true, 157 | "Query graph to evaluate in the JSON format.'"); 158 | option.setRequired(true); 159 | return option; 160 | } 161 | 162 | public static Option getOutputFileOption() { 163 | var option = new Option(OUTPUT_FILE, "output_file", true, 164 | "Absolute path to the output log file."); 165 | option.setRequired(true); 166 | return option; 167 | } 168 | 169 | public static Option getNotRequiredOutputFileOption() { 170 | return new Option(OUTPUT_FILE, "output_file", true, 171 | "Absolute path to the output log file."); 172 | } 173 | 174 | public static Option getNumberThreadsOption() { 175 | return new Option(NUM_THREADS, "number_threads", true, 176 | "Number of threads used to parallelize the computation."); 177 | } 178 | 179 | public static Option getPartitionSizeOption() { 180 | return new Option(PARTITION_SIZE, "partition_size", true, 181 | "Size of each edge partition obtained at once when scanning."); 182 | } 183 | 184 | /* 185 | * Serialize Catalog Runner: 186 | * ~~~~~~~~~~~~~~~~~~~~~~~~~ 187 | * INPUT_GRAPH -i (same as 'Query Plan Executor') 188 | * NUM_SAMPLED_EDGES -n 189 | * NUM_MAX_INPUT_VERTICES -v 190 | * NUM_THREADS -t (same as 'Query Plan Executor') 191 | */ 192 | public static String NUM_SAMPLED_EDGES = "n"; 193 | public static String NUM_MAX_INPUT_VERTICES = "v"; 194 | 195 | public static Option getNumberEdgesToSampleOption() { 196 | return new Option(NUM_SAMPLED_EDGES, "number_edges_to_sample", true, 197 | "The number of edges to sample when scanning for the catalog query transform."); 198 | } 199 | 200 | public static Option getMaxInputNumVerticesOption() { 201 | return new Option(NUM_MAX_INPUT_VERTICES, "vertices", true, 202 | "The max number of vertices for input subgraphs when collecting catalog stats."); 203 | } 204 | 205 | public static Option getIsGraphUndirected() { 206 | return new Option(UNDIRECTED, "undirected", false, "hint: the input graph is undirected."); 207 | } 208 | 209 | /* 210 | * Optimizer Executor: 211 | * ~~~~~~~~~~~~~~~~~~~ 212 | * INPUT_GRAPH_DIR -i (same as 'Query Plan Executor') 213 | * QUERY -q (same as 'Query Plans Generator') 214 | * EXECUTE_PLAN -e 215 | * DISABLE_FLATTENING -f (same as 'Query Plan Executor') 216 | * ENABLE_ADAPTIVITY -a (same as 'Query Plan Executor') 217 | */ 218 | public static String EXECUTE_PLAN = "e"; 219 | 220 | public static Option getExecuteOption() { 221 | return new Option(EXECUTE_PLAN, "execute", false, "Execute the optimizer's picked plan."); 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/runner/dataset/CatalogSerializer.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.runner.dataset; 2 | 3 | import ca.waterloo.dsg.graphflow.planner.catalog.Catalog; 4 | import ca.waterloo.dsg.graphflow.planner.catalog.CatalogPlans; 5 | import ca.waterloo.dsg.graphflow.runner.AbstractRunner; 6 | import ca.waterloo.dsg.graphflow.runner.ArgsFactory; 7 | import ca.waterloo.dsg.graphflow.storage.Graph; 8 | import ca.waterloo.dsg.graphflow.storage.GraphFactory; 9 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 10 | import ca.waterloo.dsg.graphflow.storage.KeyStoreFactory; 11 | import org.apache.commons.cli.Options; 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | 15 | import java.io.IOException; 16 | 17 | /** 18 | * Loads the given dataset, resets the catalog and saves the catalog in a serialized format in the 19 | * given output directory. 20 | */ 21 | public class CatalogSerializer extends AbstractRunner { 22 | 23 | protected static final Logger logger = LogManager.getLogger(CatalogSerializer.class); 24 | 25 | public static void main(String[] args) throws InterruptedException { 26 | // If the user asks for help, enforce it over the required options. 27 | if (isAskingHelp(args, getCommandLineOptions())) { 28 | return; 29 | } 30 | 31 | var cmdLine = parseCmdLine(args, getCommandLineOptions()); 32 | if (null == cmdLine) { 33 | logger.info("could not parse all the program arguments"); 34 | return; 35 | } 36 | 37 | var maxInputNumVertices = cmdLine.hasOption(ArgsFactory.NUM_MAX_INPUT_VERTICES) ? 38 | Integer.parseInt(cmdLine.getOptionValue(ArgsFactory.NUM_MAX_INPUT_VERTICES)) : 39 | CatalogPlans.DEF_MAX_INPUT_NUM_VERTICES; 40 | var numSampledEdges = cmdLine.hasOption(ArgsFactory.NUM_SAMPLED_EDGES) ? 41 | Integer.parseInt(cmdLine.getOptionValue(ArgsFactory.NUM_SAMPLED_EDGES)) : 42 | CatalogPlans.DEF_NUM_EDGES_TO_SAMPLE; 43 | 44 | // Run the plans and collect sampled estimates for i-cost and cardinality. 45 | var numThreads = cmdLine.hasOption(ArgsFactory.NUM_THREADS) ? 46 | Integer.parseInt(cmdLine.getOptionValue(ArgsFactory.NUM_THREADS)) : 1 /* default */; 47 | 48 | // Load the data from the given binary directory. 49 | var inputDirectory = sanitizeDirStr(cmdLine.getOptionValue(ArgsFactory.INPUT_GRAPH_DIR)); 50 | Graph graph; 51 | KeyStore store; 52 | try { 53 | graph = new GraphFactory().make(inputDirectory); 54 | store = new KeyStoreFactory().make(inputDirectory); 55 | } catch (IOException | ClassNotFoundException e) { 56 | logger.error("Error in deserialization: " + e.getMessage()); 57 | return; 58 | } 59 | 60 | Catalog catalog = new Catalog(numSampledEdges, maxInputNumVertices); 61 | try { 62 | catalog.populate(graph, store, numThreads, inputDirectory + "/catalog.txt"); 63 | } catch (IOException e) { 64 | logger.error("Error logging catalog in human readable format: " + e.getMessage()); 65 | } 66 | try { 67 | catalog.serialize(inputDirectory); 68 | } catch (IOException e) { 69 | logger.error("Error in serializing the catalog: " + e.getMessage()); 70 | } 71 | } 72 | 73 | /** 74 | * @return The {@link Options} required by the {@link CatalogSerializer}. 75 | */ 76 | private static Options getCommandLineOptions() { 77 | var options = new Options(); // ArgsFactory. 78 | options.addOption(ArgsFactory.getInputGraphDirectoryOption()); // INPUT_GRAPH_DIR -i 79 | options.addOption(ArgsFactory.getNumberEdgesToSampleOption()); // NUM_SAMPLED_EDGES -n 80 | options.addOption(ArgsFactory.getMaxInputNumVerticesOption()); // NUM_MAX_INPUT_VERTICES -v 81 | options.addOption(ArgsFactory.getNumberThreadsOption()); // NUM_THREADS -t 82 | return options; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/runner/dataset/DatasetSerializer.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.runner.dataset; 2 | 3 | import ca.waterloo.dsg.graphflow.runner.AbstractRunner; 4 | import ca.waterloo.dsg.graphflow.runner.ArgsFactory; 5 | import ca.waterloo.dsg.graphflow.storage.Graph; 6 | import ca.waterloo.dsg.graphflow.storage.GraphFactory; 7 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 8 | import org.apache.commons.cli.Options; 9 | import org.apache.logging.log4j.LogManager; 10 | import org.apache.logging.log4j.Logger; 11 | 12 | import java.io.IOException; 13 | 14 | /** 15 | * Loads the given .csv file and saves it in a serialized format in the given directory. 16 | */ 17 | public class DatasetSerializer extends AbstractRunner { 18 | 19 | private static final Logger logger = LogManager.getLogger(DatasetSerializer.class); 20 | 21 | public static void main(String[] args) throws IOException { 22 | // If the user asks for help, enforce it over the required options. 23 | if (isAskingHelp(args, getCommandLineOptions())) { 24 | return; 25 | } 26 | 27 | var cmdLine = parseCmdLine(args, getCommandLineOptions()); 28 | if (null == cmdLine) { 29 | logger.info("could not parse all the program arguments."); 30 | return; 31 | } 32 | 33 | Graph graph; 34 | var store = new KeyStore(); 35 | try { 36 | var csvEdgesFile = cmdLine.getOptionValue(ArgsFactory.INPUT_FILE_EDGES); 37 | var edgesSeparator = ","; 38 | if (cmdLine.hasOption(ArgsFactory.EDGES_FILE_SEPARATOR)) { 39 | edgesSeparator = cmdLine.getOptionValue(ArgsFactory.EDGES_FILE_SEPARATOR); 40 | } 41 | if (cmdLine.hasOption(ArgsFactory.INPUT_FILE_VERTICES)) { 42 | var csvVerticesFile = cmdLine.getOptionValue(ArgsFactory.INPUT_FILE_VERTICES); 43 | String verticesSeparator = ","; 44 | if (cmdLine.hasOption(ArgsFactory.VERTICES_FILE_SEPARATOR)) { 45 | verticesSeparator = cmdLine.getOptionValue(ArgsFactory. 46 | VERTICES_FILE_SEPARATOR); 47 | } 48 | graph = new GraphFactory().make(csvVerticesFile, verticesSeparator, 49 | csvEdgesFile, edgesSeparator, store); 50 | } else { 51 | graph = new GraphFactory().make(csvEdgesFile, edgesSeparator, store); 52 | } 53 | } catch (IOException e) { 54 | logger.info("Could not load the csv input graph data."); 55 | return; 56 | } 57 | 58 | graph.setUndirected(cmdLine.hasOption(ArgsFactory.UNDIRECTED)); 59 | 60 | // Serialize the data and save the files in the given output directory. 61 | var outputDirectory = sanitizeDirStrAndMkdirIfNeeded(cmdLine.getOptionValue( 62 | ArgsFactory.SERIALIZE_OUTPUT)); 63 | try { 64 | store.serialize(outputDirectory); 65 | graph.serialize(outputDirectory); 66 | } catch (IOException e) { 67 | logger.error("Error in serialization: ", e.getMessage()); 68 | } 69 | } 70 | 71 | /** 72 | * @return The {@link Options} required by the {@link DatasetSerializer}. 73 | */ 74 | private static Options getCommandLineOptions() { 75 | var options = new Options(); // ArgsFactory. 76 | options.addOption(ArgsFactory.getInputFileEdges()); // INPUT_FILE_EDGES -e 77 | options.addOption(ArgsFactory.getOutputDirOption()); // SERIALIZE_OUTPUT -o 78 | options.addOption(ArgsFactory.getEdgesFileSeparator()); // EDGES_FILE_SEPARATOR -m 79 | options.addOption(ArgsFactory.getInputFileVertices()); // INPUT_FILE_VERTICES -v 80 | options.addOption(ArgsFactory.getVerticesFileSeparator()); // VERTICES_FILE_SEPARATOR -n 81 | options.addOption(ArgsFactory.getIsGraphUndirected()); // UNDIRECTED -u 82 | return options; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/runner/plan/OptimizerExecutor.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.runner.plan; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.Workers; 4 | import ca.waterloo.dsg.graphflow.planner.QueryPlanner; 5 | import ca.waterloo.dsg.graphflow.planner.QueryPlannerBig; 6 | import ca.waterloo.dsg.graphflow.planner.catalog.Catalog; 7 | import ca.waterloo.dsg.graphflow.planner.catalog.CatalogFactory; 8 | import ca.waterloo.dsg.graphflow.query.QueryGraph; 9 | import ca.waterloo.dsg.graphflow.query.parser.QueryParser; 10 | import ca.waterloo.dsg.graphflow.runner.AbstractRunner; 11 | import ca.waterloo.dsg.graphflow.runner.ArgsFactory; 12 | import ca.waterloo.dsg.graphflow.storage.Graph; 13 | import ca.waterloo.dsg.graphflow.storage.GraphFactory; 14 | import ca.waterloo.dsg.graphflow.storage.KeyStore; 15 | import ca.waterloo.dsg.graphflow.storage.KeyStoreFactory; 16 | import ca.waterloo.dsg.graphflow.util.IOUtils; 17 | import org.apache.commons.cli.Options; 18 | import org.apache.logging.log4j.LogManager; 19 | import org.apache.logging.log4j.Logger; 20 | 21 | import java.io.IOException; 22 | 23 | /** 24 | * Runs a specific transform for a {@link QueryGraph} and logs the transform. 25 | */ 26 | public class OptimizerExecutor extends AbstractRunner { 27 | 28 | protected static final Logger logger = LogManager.getLogger(OptimizerExecutor.class); 29 | 30 | public static void main(String[] args) throws InterruptedException, IOException { 31 | var startTimeLoading = System.nanoTime(); 32 | var cmdLine = parseCmdLine(args, getCommandLineOptions()); 33 | if (null == cmdLine) { 34 | return; 35 | } 36 | 37 | var inputDirectory = sanitizeDirStr(cmdLine.getOptionValue(ArgsFactory.INPUT_GRAPH_DIR)); 38 | 39 | Graph graph; 40 | Catalog catalog; 41 | KeyStore store; 42 | try { 43 | graph = new GraphFactory().make(inputDirectory); 44 | catalog = new CatalogFactory().make(inputDirectory); 45 | store = new KeyStoreFactory().make(inputDirectory); 46 | } catch (IOException | ClassNotFoundException e) { 47 | logger.error("Error in deserialization: " + e.getMessage()); 48 | return; 49 | } 50 | 51 | var queryGraph = QueryParser.parse(cmdLine.getOptionValue(ArgsFactory.QUERY), store); 52 | if (null == queryGraph) { 53 | logger.error("An error occurred parsing the query graph."); 54 | return; 55 | } 56 | 57 | var numQVertices = queryGraph.getNumVertices(); 58 | if (numQVertices >= 15 && numQVertices <= 25) { 59 | QueryPlannerBig.NUM_TOP_PLANS_KEPT = 5; 60 | } else if (numQVertices > 25) { 61 | QueryPlannerBig.NUM_TOP_PLANS_KEPT = 1; 62 | } 63 | var elapsedTimeLoading = IOUtils.getElapsedTimeInMillis(startTimeLoading); 64 | logger.info("Dataset loading run time: " + elapsedTimeLoading + " (ms)"); 65 | 66 | var planner = queryGraph.getNumVertices() <= 8 ? 67 | new QueryPlanner(queryGraph, catalog, graph) : 68 | new QueryPlannerBig(queryGraph, catalog, graph); 69 | var numThreads = !cmdLine.hasOption(ArgsFactory.NUM_THREADS) ? 1 /* single thread */ : 70 | Integer.parseInt(cmdLine.getOptionValue(ArgsFactory.NUM_THREADS)); 71 | planner.setNumThreads(numThreads); 72 | var beginTime = System.nanoTime(); 73 | var queryPlan = planner.plan(); 74 | var elapsedTime = IOUtils.getElapsedTimeInMillis(beginTime); 75 | if (!cmdLine.hasOption(ArgsFactory.OUTPUT_FILE)) { 76 | logger.info("Optimizer run time: " + elapsedTime + " (ms)"); 77 | } 78 | if (cmdLine.hasOption(ArgsFactory.EXECUTE_PLAN)) { 79 | beginTime = System.nanoTime(); 80 | // initialize and execute the query transform, get the output metrics and log it. 81 | var workers = new Workers(queryPlan, numThreads); 82 | logger.info("Plan initialization before exec run time: " + elapsedTime + " (ms)"); 83 | workers.init(graph, store); 84 | elapsedTime = IOUtils.getElapsedTimeInMillis(beginTime); 85 | workers.execute(); 86 | if (cmdLine.hasOption(ArgsFactory.OUTPUT_FILE)) { 87 | IOUtils.log(cmdLine.getOptionValue(ArgsFactory.OUTPUT_FILE), 88 | elapsedTime + "," + workers.getElapsedTime() + "\n"); 89 | } else { 90 | var output = workers.getOutputLog(); 91 | var outputSplit = output.split("Plan output:")[0].split(","); 92 | logger.info("Query execution run time: " + outputSplit[0] + " (ms)"); 93 | logger.info("Number output tuples: " + outputSplit[1]); 94 | logger.info("Number intermediate tuples: " + outputSplit[2]); 95 | var line = ""; 96 | for (var i = 4; i < outputSplit.length - 1; i++) { 97 | line += outputSplit[i] + ", "; 98 | } 99 | line += outputSplit[outputSplit.length - 1].split("\n")[0]; 100 | logger.info("Plan: " + line); 101 | } 102 | } else { 103 | logger.info("Plan output:" + queryPlan.getOutputLog()); 104 | } 105 | } 106 | 107 | /** 108 | * @return The {@link Options} required by the {@link OptimizerExecutor}. 109 | */ 110 | private static Options getCommandLineOptions() { 111 | var options = new Options(); // ArgsFactory. 112 | options.addOption(ArgsFactory.getInputGraphDirectoryOption()); // INPUT_GRAPH_DIR -i 113 | options.addOption(ArgsFactory.getNotRequiredOutputFileOption()); // OUTPUT_FILE -o 114 | options.addOption(ArgsFactory.getQueryOption()); // QUERY -q 115 | options.addOption(ArgsFactory.getNumberThreadsOption()); // NUM_THREADS -t 116 | options.addOption(ArgsFactory.getExecuteOption()); // EXECUTE_PLAN -e 117 | options.addOption(ArgsFactory.getEnableAdaptivityOption()); // ENABLE_ADAPTIVITY -a 118 | return options; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/storage/Graph.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.storage; 2 | 3 | import ca.waterloo.dsg.graphflow.util.IOUtils; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | import org.apache.logging.log4j.LogManager; 7 | import org.apache.logging.log4j.Logger; 8 | 9 | import java.io.IOException; 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | 13 | /** 14 | * The input graph data stored. 15 | */ 16 | public class Graph { 17 | 18 | private static final Logger logger = LogManager.getLogger(Graph.class); 19 | 20 | /** 21 | * Used to identify the edge direction in the graph representation. 22 | */ 23 | public enum Direction { 24 | Fwd /* forward */, 25 | Bwd /* backward */ 26 | } 27 | 28 | // vertex Ids indexed by type and random access to vertex types. 29 | @Getter @Setter private int[] vertexIds; 30 | @Getter @Setter private short[] vertexTypes; 31 | @Getter @Setter private int[] vertexTypeOffsets; 32 | // Adjacency lists containing the neighbour vertex IDs sorted by ID. 33 | @Getter @Setter private SortedAdjList[] fwdAdjLists; 34 | @Getter @Setter private SortedAdjList[] bwdAdjLists; 35 | // Graph metadata. 36 | @Getter @Setter private int highestVertexId = -1; 37 | @Getter @Setter private int numEdges; 38 | @Setter private int[] labelOrToTypeToNumEdges; 39 | @Setter private int[] labelOrTypeToLargestFwdAdjListSize; 40 | @Setter private int[] labelOrTypeToLargestBwdAdjListSize; 41 | @Getter @Setter private Map edgeKeyToNumEdgesMap; 42 | @Getter @Setter private Map labelAndToTypeToPercentageMap; 43 | @Getter @Setter private Map fromTypeAndLabelToPercentageMap; 44 | @Getter @Setter private boolean isUndirected; 45 | @Getter @Setter private boolean isAdjListSortedByType; 46 | 47 | /** 48 | * Constructs a {@link Graph} object. 49 | */ 50 | public Graph() {} 51 | 52 | /** 53 | * Constructs a {@link Graph} object. 54 | * 55 | * @param fwdAdjLists are the forward adjacency lists. 56 | * @param bwdAdjLists are the backward adjacency lists. 57 | * @param highestVertexId is the highest vertex ID. 58 | */ 59 | public Graph(SortedAdjList[] fwdAdjLists, SortedAdjList[] bwdAdjLists, int highestVertexId) { 60 | this.fwdAdjLists = fwdAdjLists; 61 | this.bwdAdjLists = bwdAdjLists; 62 | this.highestVertexId = highestVertexId; 63 | } 64 | 65 | /** 66 | * @param fromType is the from query vertex type. 67 | * @param toType is the to query vertex type. 68 | * @param label is the edge label. 69 | * @return The number of edges. 70 | */ 71 | public int getNumEdges(short fromType, short toType, short label) { 72 | if (fromType == KeyStore.ANY && toType == KeyStore.ANY) { 73 | return labelOrToTypeToNumEdges[label]; 74 | } else if (fromType != KeyStore.ANY && toType != KeyStore.ANY) { 75 | return edgeKeyToNumEdgesMap.get(getEdgeKey(fromType, toType, label)); 76 | } else if (fromType != KeyStore.ANY) { 77 | return fromTypeAndLabelToPercentageMap.get(getEdgeKey(fromType, label)); 78 | } 79 | return labelAndToTypeToPercentageMap.get(getEdgeKey(label, toType)); 80 | } 81 | 82 | /** 83 | * @param labelOrToType is the edge label. 84 | * @param direction is the direction of extension as forward or backward. 85 | * @return The largest adjacency list size. 86 | */ 87 | public int getLargestAdjListSize(short labelOrToType, Direction direction) { 88 | if (Direction.Fwd == direction) { 89 | return labelOrTypeToLargestFwdAdjListSize[labelOrToType]; 90 | } else { 91 | return labelOrTypeToLargestBwdAdjListSize[labelOrToType]; 92 | } 93 | } 94 | 95 | /** 96 | * @param store is the vertex types and edge labelsOrToTypes key store. 97 | */ 98 | void setEdgeCountsAndLargestAdjListSizes(KeyStore store) { 99 | // set the largest adjacency list sizes for forward and backward directions per label. 100 | isAdjListSortedByType = store.getNextLabelKey() == 1 /*key 0 only used -> single label.*/ 101 | && store.getNextTypeKey() > 1; /*at least 2 vertex key types used.*/ 102 | var numLabelsOrToTypes = isAdjListSortedByType ? 103 | store.getNextTypeKey() : store.getNextLabelKey(); 104 | labelOrToTypeToNumEdges = new int[numLabelsOrToTypes]; 105 | labelOrTypeToLargestFwdAdjListSize = new int[numLabelsOrToTypes]; 106 | labelOrTypeToLargestBwdAdjListSize = new int[numLabelsOrToTypes]; 107 | for (var vertexId = 0; vertexId <= highestVertexId; vertexId++) { 108 | numEdges += fwdAdjLists[vertexId].size(); 109 | for (short labelOrToType = 0; labelOrToType < numLabelsOrToTypes; labelOrToType++) { 110 | var adjListSize = fwdAdjLists[vertexId].size(labelOrToType); 111 | labelOrToTypeToNumEdges[labelOrToType] += adjListSize; 112 | if (adjListSize > labelOrTypeToLargestFwdAdjListSize[labelOrToType]) { 113 | labelOrTypeToLargestFwdAdjListSize[labelOrToType] = adjListSize; 114 | } 115 | } 116 | for (short labelOrToType = 0; labelOrToType < numLabelsOrToTypes; labelOrToType++) { 117 | var adjListSize = bwdAdjLists[vertexId].size(labelOrToType); 118 | if (adjListSize > labelOrTypeToLargestBwdAdjListSize[labelOrToType]) { 119 | labelOrTypeToLargestBwdAdjListSize[labelOrToType] = adjListSize; 120 | } 121 | } 122 | } 123 | // Set the edge keys (fromType-label-toType), (fromType-label), and (label-toType) to 124 | // the percentage of number of edges. 125 | edgeKeyToNumEdgesMap = new HashMap<>(); 126 | labelAndToTypeToPercentageMap = new HashMap<>(); 127 | fromTypeAndLabelToPercentageMap = new HashMap<>(); 128 | var numVertices = highestVertexId + 1; 129 | for (short fromType = 0; fromType < store.getNextTypeKey(); fromType++) { 130 | for (short toType = 0; toType < store.getNextTypeKey(); toType++) { 131 | for (short label = 0; label < store.getNextLabelKey(); label++) { 132 | var edge = getEdgeKey(fromType, toType, label); 133 | edgeKeyToNumEdgesMap.putIfAbsent(edge, 0); 134 | var labelAndToType = getEdgeKey(label, toType); 135 | labelAndToTypeToPercentageMap.putIfAbsent(labelAndToType, 0); 136 | var fromTypeAndLabel = getEdgeKey(fromType, label); 137 | fromTypeAndLabelToPercentageMap.putIfAbsent(fromTypeAndLabel, 0); 138 | } 139 | } 140 | } 141 | for (var fromVertex = 0; fromVertex < numVertices; fromVertex++) { 142 | var fromType = vertexTypes[fromVertex]; 143 | var offsets = fwdAdjLists[fromVertex].getLabelOrTypeOffsets(); 144 | if (isAdjListSortedByType) { 145 | short label = 0; 146 | for (short toType = 0; toType < offsets.length - 1; toType++) { 147 | var numEdges = offsets[toType + 1] - offsets[toType]; 148 | addEdgeCount(fromType, toType, label, numEdges); 149 | } 150 | } else { 151 | var neighbours = fwdAdjLists[fromVertex].getNeighbourIds(); 152 | for (short label = 0; label < offsets.length - 1; label++) { 153 | for (var toIdx = offsets[label]; toIdx < offsets[label + 1]; toIdx++) { 154 | var toType = vertexTypes[neighbours[toIdx]]; 155 | addEdgeCount(fromType, toType, label, 1); 156 | } 157 | } 158 | } 159 | } 160 | } 161 | 162 | private void addEdgeCount(short fromType, short toType, short label, int numEdges) { 163 | var edge = getEdgeKey(fromType, toType, label); 164 | edgeKeyToNumEdgesMap.put(edge, edgeKeyToNumEdgesMap.get(edge) + numEdges); 165 | var labelAndToType = getEdgeKey(label, toType); 166 | labelAndToTypeToPercentageMap.put(labelAndToType, 167 | labelAndToTypeToPercentageMap.get(labelAndToType) + numEdges); 168 | var fromTypeAndLabel = getEdgeKey(fromType, label); 169 | fromTypeAndLabelToPercentageMap.put(fromTypeAndLabel, 170 | fromTypeAndLabelToPercentageMap.get(fromTypeAndLabel) + numEdges); 171 | } 172 | 173 | /** 174 | * @param fromType is the from query vertex type. 175 | * @param toType is the to query vertex type. 176 | * @param label is the query edge label. 177 | * @return the edge key. 178 | */ 179 | public static long getEdgeKey(short fromType, short toType, short label) { 180 | return ((long) (fromType & 0xFFFF ) << 48) | 181 | ((long) (label & 0x0000FFFF) << 16) | 182 | ((long) (toType & 0xFFFF ) ) ; 183 | } 184 | 185 | /** 186 | * @param typeOrLabel is the from query vertex type. 187 | * @param typeOrLabel2 is the to query vertex type. 188 | * @return the edge key. 189 | */ 190 | private int getEdgeKey(short typeOrLabel, short typeOrLabel2) { 191 | return ((typeOrLabel & 0x0000FFFF) << 16) | 192 | ((typeOrLabel2 & 0xFFFF ) ) ; 193 | } 194 | 195 | /** 196 | * Serializes the graph by persisting different fields into different files. 197 | * 198 | * @param directoryPath is the directory to which the graph's serialized objects are persisted. 199 | * @throws IOException if stream to file cannot be written to or closed. 200 | */ 201 | public void serialize(String directoryPath) throws IOException { 202 | logger.info("Serializing the data graph."); 203 | IOUtils.serializeObjs(directoryPath, new Object[] { 204 | /* pair */ 205 | "vertexIds", vertexIds, 206 | "vertexTypes", vertexTypes, 207 | "vertexTypeOffsets", vertexTypeOffsets, 208 | "highestVertexId", highestVertexId, 209 | "fwdAdjLists", fwdAdjLists, 210 | "bwdAdjLists", bwdAdjLists, 211 | "numEdges", numEdges, 212 | "isAdjListSortedByType", isAdjListSortedByType, 213 | "labelOrToTypeToNumEdges", labelOrToTypeToNumEdges, 214 | "labelOrTypeToLargestFwdAdjListSize", labelOrTypeToLargestFwdAdjListSize, 215 | "labelOrTypeToLargestBwdAdjListSize", labelOrTypeToLargestBwdAdjListSize, 216 | "edgeKeyToNumEdgesMap", edgeKeyToNumEdgesMap, 217 | "labelAndToTypeToPercentageMap", labelAndToTypeToPercentageMap, 218 | "fromTypeAndLabelToPercentageMap", fromTypeAndLabelToPercentageMap, 219 | "isUndirected", isUndirected 220 | }); 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/storage/KeyStore.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.storage; 2 | 3 | import ca.waterloo.dsg.graphflow.util.IOUtils; 4 | import lombok.Getter; 5 | import org.apache.logging.log4j.LogManager; 6 | import org.apache.logging.log4j.Logger; 7 | 8 | import java.io.IOException; 9 | import java.util.HashMap; 10 | import java.util.Map; 11 | 12 | /** 13 | * Stores a mapping of {@code String} keys to {@code short} keys and vice versa. Each new 14 | * {@code String} key inserted gets a consecutively increasing short key starting from 0. 15 | * If more then {@link Short#MAX_VALUE} keys are inserted, an error is thrown. 16 | */ 17 | public class KeyStore { 18 | 19 | private static final Logger logger = LogManager.getLogger(KeyStore.class); 20 | 21 | public static short ANY = -1; 22 | 23 | @Getter private short nextTypeKey = 0; 24 | @Getter private short nextLabelKey = 0; 25 | private Map stringToShortTypeKeyMap = new HashMap<>(); 26 | private Map stringToShortLabelKeyMap = new HashMap<>(); 27 | 28 | /** 29 | * Constructs a {@link KeyStore} object. 30 | */ 31 | public KeyStore() {} 32 | 33 | /** 34 | * Constructs a {@link KeyStore} object. 35 | * 36 | * @param stringToShortTypeKeyMap is the user defined to internal type key map. 37 | * @param stringToShortLabelKeyMap is the user defined to internal label key map. 38 | * @param nextTypeKey is the next internal type key to assign. 39 | * @param nextLabelKey is the next internal label key to assign. 40 | */ 41 | public KeyStore(Map stringToShortTypeKeyMap, 42 | Map stringToShortLabelKeyMap, short nextTypeKey, short nextLabelKey) { 43 | this.stringToShortTypeKeyMap = stringToShortTypeKeyMap; 44 | this.stringToShortLabelKeyMap = stringToShortLabelKeyMap; 45 | this.nextTypeKey = nextTypeKey; 46 | this.nextLabelKey = nextLabelKey; 47 | } 48 | 49 | /** 50 | * Get the type key as short. 51 | * 52 | * @param key is the {@code String} key to get a mapping of. 53 | */ 54 | public short getTypeKeyAsShort(String key) { 55 | if (null == stringToShortTypeKeyMap.get(key)) { 56 | throw new IllegalArgumentException("Type " + key + " does not exist in the database."); 57 | } 58 | return stringToShortTypeKeyMap.get(key); 59 | } 60 | 61 | /** 62 | * Get the label key as short. 63 | * 64 | * @param key is the {@code String} key to get a mapping of. 65 | */ 66 | public short getLabelKeyAsShort(String key) { 67 | if (null == stringToShortLabelKeyMap.get(key)) { 68 | throw new IllegalArgumentException("Label " + key + " does not exist in the database."); 69 | } 70 | return stringToShortLabelKeyMap.get(key); 71 | } 72 | 73 | /** 74 | * Insert the type key if it has not been inserted before. 75 | * 76 | * @param key is the {@code String} type key to insert. 77 | */ 78 | short insertTypeKeyIfNeeded(String key) { 79 | return insertKey(key, nextTypeKey, stringToShortTypeKeyMap); 80 | } 81 | 82 | /** 83 | * Insert the label key if it has not been inserted before. 84 | * 85 | * @param key is the {@code String} label key to insert. 86 | */ 87 | short insertLabelKeyIfNeeded(String key) { 88 | return insertKey(key, nextLabelKey, stringToShortLabelKeyMap); 89 | } 90 | 91 | private short insertKey(String key, short nextKey, Map stringToShortKeyMap) { 92 | if (stringToShortKeyMap.containsKey(key)) { 93 | // logger.info("The key " + key + " has already been inserted."); 94 | return stringToShortKeyMap.get(key); 95 | } 96 | if (nextKey < 0) { 97 | logger.error("Max number of keys inserted."); 98 | throw new IllegalArgumentException("Max number of keys inserted."); 99 | } 100 | // logger.info("Inserting key '" + key + "' as " + nextKey + " in KeyStore."); 101 | stringToShortKeyMap.put(key, nextKey); 102 | if (stringToShortKeyMap.equals(stringToShortTypeKeyMap)) { 103 | return nextTypeKey++; 104 | } else { // .equals(stringToShortLabelKeyMap) 105 | return nextLabelKey++; 106 | } 107 | } 108 | 109 | /** 110 | * Serializes the store by persisting different fields into different files. 111 | * 112 | * @param directoryPath is the directory to which the store's serialized objects are persisted. 113 | * @throws IOException if stream to file cannot be written to or closed. 114 | */ 115 | public void serialize(String directoryPath) throws IOException { 116 | logger.info("Serializing the types and labels key store."); 117 | IOUtils.serializeObjs(directoryPath, new Object[] { 118 | /* pair */ 119 | "TypesKeyMap", stringToShortTypeKeyMap, 120 | "LabelsKeyMap", stringToShortLabelKeyMap, 121 | "nextTypeKey", nextTypeKey, 122 | "nextLabelKey", nextLabelKey 123 | }); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/storage/KeyStoreFactory.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.storage; 2 | 3 | import ca.waterloo.dsg.graphflow.util.IOUtils; 4 | 5 | import java.io.IOException; 6 | import java.util.HashMap; 7 | 8 | public class KeyStoreFactory { 9 | 10 | /** 11 | * Constructs a {@link KeyStore} object from binary serialized data. 12 | * 13 | * @param directoryPath is the directory to deserialize binary data. 14 | * @return the constructed graph object. 15 | * @throws IOException if stream to file cannot be written to or closed. 16 | * @throws ClassNotFoundException if the object read is from input stream is not found. 17 | */ 18 | @SuppressWarnings("unchecked") // casting. 19 | public KeyStore make(String directoryPath) throws IOException, ClassNotFoundException { 20 | var stringToShortTypeKeyMap = (HashMap) IOUtils.deserializeObj( 21 | directoryPath + "TypesKeyMap"); 22 | var stringToShortLabelKeyMap = (HashMap) IOUtils.deserializeObj( 23 | directoryPath + "LabelsKeyMap"); 24 | var nextTypeKey = (short) IOUtils.deserializeObj(directoryPath + "nextTypeKey"); 25 | var nextLabelKey = (short) IOUtils.deserializeObj(directoryPath + "nextLabelKey"); 26 | return new KeyStore(stringToShortTypeKeyMap, stringToShortLabelKeyMap, 27 | nextTypeKey, nextLabelKey); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/storage/SortedAdjList.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.storage; 2 | 3 | import ca.waterloo.dsg.graphflow.plan.operator.extend.EI.Neighbours; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | import java.io.Serializable; 8 | import java.util.Arrays; 9 | 10 | /** 11 | * Represents the adjacency list of a vertex. Stores the IDs of the vertex's initNeighbours, the 12 | * types, and the IDs of edges that the vertex has to these initNeighbours in sorted arrays. Arrays 13 | * are sorted first by neighbour IDs and then by edge {@code short} type values. 14 | */ 15 | public class SortedAdjList implements Serializable { 16 | 17 | @Getter private int[] labelOrTypeOffsets; 18 | @Getter @Setter private int[] neighbourIds; 19 | 20 | /** 21 | * Constructs a {@link SortedAdjList} object. 22 | */ 23 | public SortedAdjList(int[] offsets) { 24 | this.labelOrTypeOffsets = offsets; 25 | this.neighbourIds = new int[offsets[offsets.length - 1]]; 26 | } 27 | 28 | /** 29 | * 30 | * 31 | * @param idx is the index of the neighbour id to return. 32 | * @return the ... 33 | */ 34 | public int getNeighbourId(int idx) { 35 | return neighbourIds[idx]; 36 | } 37 | 38 | /** 39 | * Sets a new neighbour with the given Id at a given index. 40 | * 41 | * @param neighbourId is the Id of the neighbour. 42 | * @param idx is the index of the neighbour in the internal array. 43 | */ 44 | public void setNeighbourId(int neighbourId, int idx) { 45 | neighbourIds[idx] = neighbourId; 46 | } 47 | 48 | /** 49 | * @param labelOrType . 50 | * @param neighbours . 51 | */ 52 | public void setNeighbourIds(short labelOrType, Neighbours neighbours) { 53 | neighbours.Ids = neighbourIds; 54 | neighbours.startIdx = labelOrTypeOffsets[labelOrType]; 55 | neighbours.endIdx = labelOrTypeOffsets[labelOrType + 1]; 56 | } 57 | 58 | /** 59 | * voila. 60 | * 61 | * @param labelOrType 62 | * @param someNeighbours 63 | * @param neighbours 64 | * @return 65 | */ 66 | public int intersect(short labelOrType, Neighbours someNeighbours, Neighbours neighbours) { 67 | intersect(someNeighbours, neighbours, neighbourIds, 68 | labelOrTypeOffsets[labelOrType], labelOrTypeOffsets[labelOrType + 1]); 69 | return labelOrTypeOffsets[labelOrType + 1] - labelOrTypeOffsets[labelOrType]; 70 | } 71 | 72 | private void intersect(Neighbours someNeighbours, Neighbours neighbours, int[] neighbourIds, 73 | int thisIdx, int thisIdxEnd) { 74 | neighbours.reset(); 75 | var someNeighbourIds = someNeighbours.Ids; 76 | var someIdx = someNeighbours.startIdx; 77 | var someEndIdx = someNeighbours.endIdx; 78 | while (thisIdx < thisIdxEnd && someIdx < someEndIdx) { 79 | if (neighbourIds[thisIdx] < someNeighbourIds[someIdx]) { 80 | thisIdx++; 81 | while (thisIdx < thisIdxEnd && 82 | neighbourIds[thisIdx] < someNeighbourIds[someIdx]) { 83 | thisIdx++; 84 | } 85 | } else if (neighbourIds[thisIdx] > someNeighbourIds[someIdx]) { 86 | someIdx++; 87 | while (someIdx < someEndIdx && 88 | neighbourIds[thisIdx] > someNeighbourIds[someIdx]) { 89 | someIdx++; 90 | } 91 | } else { 92 | neighbours.Ids[neighbours.endIdx] = neighbourIds[thisIdx]; 93 | neighbours.endIdx++; 94 | thisIdx++; 95 | someIdx++; 96 | } 97 | } 98 | } 99 | 100 | /** 101 | * Sorts each list of neighbour Ids of a particular label. 102 | */ 103 | public void sort() { 104 | for (int i = 0; i < labelOrTypeOffsets.length - 1; i++) { 105 | Arrays.sort(neighbourIds, labelOrTypeOffsets[i], labelOrTypeOffsets[i + 1]); 106 | } 107 | } 108 | 109 | /** 110 | * @return the size of the adjacency list. 111 | */ 112 | public int size() { 113 | return neighbourIds.length; 114 | } 115 | 116 | /** 117 | * @param labelOrType is the edge label or toVertex type. 118 | * @return the size of the adjacency list. 119 | */ 120 | public int size(short labelOrType) { 121 | return labelOrTypeOffsets[labelOrType + 1] + labelOrTypeOffsets[labelOrType]; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/util/IOUtils.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.util; 2 | 3 | import java.io.BufferedInputStream; 4 | import java.io.BufferedOutputStream; 5 | import java.io.File; 6 | import java.io.FileInputStream; 7 | import java.io.FileOutputStream; 8 | import java.io.FileWriter; 9 | import java.io.IOException; 10 | import java.io.ObjectInputStream; 11 | import java.io.ObjectOutputStream; 12 | 13 | /** 14 | * Utils for Input, Output (I/O) and time operations. 15 | */ 16 | public class IOUtils { 17 | 18 | /** 19 | * Calculates the time difference between now and the given {@code beginTime}. 20 | * 21 | * @param beginTime The being time in nanoseconds. 22 | * @return Time difference in milliseconds. 23 | */ 24 | public static double getElapsedTimeInMillis(long beginTime) { 25 | return (System.nanoTime() - beginTime) / 1000000.0; 26 | } 27 | 28 | /** 29 | * 30 | * 31 | * @param filename 32 | * @param output 33 | * @throws IOException 34 | */ 35 | public static void log(String filename, String output) throws IOException { 36 | IOUtils.mkdirForFile(filename); 37 | IOUtils.createNewFile(filename); 38 | var writer = new FileWriter(filename, true /* append to the file */); 39 | writer.write(output); 40 | writer.flush(); 41 | } 42 | 43 | /** 44 | * @see File#mkdir() 45 | */ 46 | public static void mkdirs(String directoryPath) throws IOException { 47 | var file = new File(directoryPath); 48 | if (!file.exists()) { 49 | if (!file.mkdirs()) { 50 | throw new IOException("The directory " + directoryPath + " was not created."); 51 | } 52 | } 53 | } 54 | 55 | /** 56 | * @see File#createNewFile() 57 | */ 58 | public static void createNewFile(String filePath) throws IOException { 59 | var file = new File(filePath); 60 | if (!file.exists()) { 61 | if (!file.createNewFile()) { 62 | throw new IOException("The file " + filePath + " was not created."); 63 | } 64 | } 65 | } 66 | 67 | /** 68 | * Creates an {@link ObjectOutputStream} object from the given {@code outputPath}. 69 | * 70 | * @param outputPath The {@link String} path to the output file. 71 | * @return an {@link ObjectOutputStream} object. 72 | */ 73 | public static ObjectOutputStream makeObjectOutputStream(String outputPath) 74 | throws IOException { 75 | return new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(outputPath))); 76 | } 77 | 78 | /** 79 | * Creates an {@link ObjectInputStream} object from the given {@code inputFilePath}. 80 | * 81 | * @param inputFilePath The {@link String} path to the input file. 82 | * @return an {@link ObjectInputStream} object. 83 | */ 84 | public static ObjectInputStream makeObjInputStream(String inputFilePath) throws IOException { 85 | return new ObjectInputStream(new BufferedInputStream(new FileInputStream(inputFilePath))); 86 | } 87 | 88 | public static void mkdirForFile(String outputFilename) throws IOException { 89 | String[] output_split = outputFilename.split("/"); 90 | StringBuilder outputDirBuilder = new StringBuilder(); 91 | for (int i = 0; i < output_split.length - 1; i++) { 92 | outputDirBuilder.append(output_split[i]); 93 | outputDirBuilder.append("/"); 94 | } 95 | IOUtils.mkdirs(outputDirBuilder.toString()); 96 | } 97 | 98 | /** 99 | * @param file 100 | * @param object 101 | * @throws IOException if stream to file cannot be written to or closed. 102 | */ 103 | public static void serializeObj(String file, Object object) throws IOException { 104 | var outputStream = IOUtils.makeObjectOutputStream(file); 105 | outputStream.writeObject(object); 106 | outputStream.close(); 107 | } 108 | 109 | /** 110 | * @param directory 111 | * @param filenameObjectPairs 112 | * @throws IOException if stream to file cannot be written to or closed. 113 | */ 114 | public static void serializeObjs(String directory, Object[] filenameObjectPairs) 115 | throws IOException { 116 | for (int i = 0; i < filenameObjectPairs.length; i += 2) { 117 | serializeObj(directory + filenameObjectPairs[i], filenameObjectPairs[i + 1]); 118 | } 119 | } 120 | 121 | /** 122 | * @param file 123 | * @return 124 | * @throws IOException if stream to file cannot be written to or closed. 125 | * @throws ClassNotFoundException if the object read from input stream is not found. 126 | */ 127 | public static Object deserializeObj(String file) throws IOException, 128 | ClassNotFoundException { 129 | var inputStream = IOUtils.makeObjInputStream(file); 130 | Object object = inputStream.readObject(); 131 | inputStream.close(); 132 | return object; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/util/collection/MapUtils.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.util.collection; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | /** 8 | * Utilities for generic map data structures. 9 | */ 10 | public class MapUtils { 11 | 12 | /** 13 | * @param map The map to check if each key, element pair are equal. 14 | * @return True if each, key, element pair are equal. False, otherwise. 15 | */ 16 | public static boolean isEachKeyElementPairEqual(Map map) { 17 | for (var key : map.keySet()) { 18 | if (!key.equals(map.get(key))) { 19 | return false; 20 | } 21 | } 22 | return true; 23 | } 24 | 25 | /** 26 | * @param elements The list of elements to turn into an element to position map. 27 | * @param The type of the elements. 28 | * @return The element to position map. 29 | */ 30 | public static Map getValueToIdxMap(List elements) { 31 | Map map = new HashMap<>(); 32 | int i = 0; 33 | for (var element : elements) { 34 | map.put(element, i++); 35 | } 36 | return map; 37 | } 38 | 39 | /** 40 | * @param map The map to copy. 41 | * @param The type of the keys. 42 | * @param The type of the values. 43 | * @return a copy of the map. 44 | */ 45 | public static Map copy(Map map) { 46 | Map mapCopy = new HashMap<>(); 47 | for (Map.Entry entry : map.entrySet()) { 48 | mapCopy.put(entry.getKey(), entry.getValue()); 49 | } 50 | return mapCopy; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/util/collection/SetUtils.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.util.collection; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.List; 6 | import java.util.Set; 7 | import java.util.stream.Collectors; 8 | 9 | /** 10 | * Utilities for generic setAdjListSortOrder data structures. 11 | */ 12 | public class SetUtils { 13 | 14 | /** 15 | * Subtracts all of the element in the given collection to subtract from the input collection. 16 | * 17 | * @param input The input collection to subtract from. 18 | * @param toSubtract The collection to subtract. 19 | * @return The list of elements in the input collection but not in the toSubtract collection. 20 | */ 21 | public static List subtract(Collection input, Collection toSubtract) { 22 | var result = new ArrayList(); 23 | for (T value : input) { 24 | if (!toSubtract.contains(value)) { 25 | result.add(value); 26 | } 27 | } 28 | return result; 29 | } 30 | 31 | /** 32 | * Intersects two sets and returns the result in a list. 33 | * 34 | * @param set One of the sets to intersect. 35 | * @param otherSet The other setAdjListSortOrder to intersect. 36 | * @return The list of values in both sets. 37 | */ 38 | public static boolean equals(Set set, Set otherSet) { 39 | if (set.size() == otherSet.size()) { 40 | return false; 41 | } 42 | for (T value : set) { 43 | if (!otherSet.contains(value)) { 44 | return false; 45 | } 46 | } 47 | return true; 48 | } 49 | 50 | /** 51 | * Checks if a setAdjListSortOrder is a subset of another. 52 | * 53 | * @param superset The super setAdjListSortOrder. 54 | * @param subset The subset. 55 | * @return True if the {@code superset} is a super setAdjListSortOrder of {@code subset}. Else, True. 56 | */ 57 | public static boolean isSubset(List superset, Set subset) { 58 | for (T value : subset) { 59 | if (!superset.contains(value)) { 60 | return false; 61 | } 62 | } 63 | return true; 64 | } 65 | 66 | /** 67 | * Intersects two collections and returns the result in a list. 68 | * 69 | * @param set One of the collections to intersect. 70 | * @param otherSet The other collection to intersect. 71 | * @return The list of values in both sets. 72 | */ 73 | public static List intersect(Collection set, Collection otherSet) { 74 | var result = new ArrayList(); 75 | for (T value : set) { 76 | if (otherSet.contains(value)) { 77 | result.add(value); 78 | } 79 | } 80 | return result; 81 | } 82 | 83 | /** 84 | * returns a copy of the passed collection. 85 | * 86 | * @param list The collection to copy. 87 | * @return A copy of the collection as a list. 88 | */ 89 | public static List copyAndExclude(Collection list, T valueToExclude) { 90 | var listCopy = new ArrayList(); 91 | for (T value : list) { 92 | if (!value.equals(valueToExclude)) { 93 | listCopy.add(value); 94 | } 95 | } 96 | return listCopy; 97 | } 98 | 99 | public static List> generatePermutations(List set, int len) { 100 | List> permutations = new ArrayList<>(); 101 | getPermutationsGivenLen(set, len, 0, new ArrayList<>(), permutations); 102 | return permutations; 103 | } 104 | 105 | private static void getPermutationsGivenLen(List set, int len, int pos, List temp, 106 | List> permutation) { 107 | if (len == 0) { 108 | permutation.add(new ArrayList<>(temp)); 109 | return; 110 | } 111 | 112 | for (int i = 0; i < set.size(); ++i) { 113 | if (temp.size() < pos + 1) { 114 | temp.add(set.get(i)); 115 | } else { 116 | temp.set(pos, set.get(i)); 117 | } 118 | getPermutationsGivenLen(set, len - 1, pos + 1, temp, permutation); 119 | } 120 | } 121 | 122 | /** 123 | * Generates a power setAdjListSortOrder of as a {@code List>} given set excluding the 124 | * empty set. 125 | * 126 | * @param set The original setAdjListSortOrder as a {@code List} to generate a power set for. 127 | * @return The power setAdjListSortOrder of the given setAdjListSortOrder. 128 | */ 129 | public static List> getPowerSetExcludingEmptySet(List set) { 130 | return generatePowerSet(set) 131 | .stream() 132 | .filter(subset -> subset.size() >= 1 && subset.size() <= set.size()) 133 | .collect(Collectors.toList()); 134 | } 135 | 136 | private static List> generatePowerSet(List originalSet) { 137 | var sets = new ArrayList>(); 138 | if (originalSet.isEmpty()) { 139 | sets.add(new ArrayList<>()); 140 | return sets; 141 | } 142 | var list = new ArrayList(originalSet); 143 | T head = list.get(0); 144 | var rest = new ArrayList(list.subList(1, list.size())); 145 | for (var set : generatePowerSet(rest)) { 146 | var newSet = new ArrayList(); 147 | newSet.add(head); 148 | newSet.addAll(set); 149 | sets.add(newSet); 150 | sets.add(set); 151 | } 152 | return sets; 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/util/container/Quad.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.util.container; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * A mutable Quad (A a, B b, C c, D d). 7 | */ 8 | public class Quad implements Serializable { 9 | 10 | public A a; 11 | public B b; 12 | public C c; 13 | public D d; 14 | 15 | public Quad(A a, B b, C c, D d) { 16 | this.a = a; 17 | this.b = b; 18 | this.c = c; 19 | this.d = d; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/ca/waterloo/dsg/graphflow/util/container/Triple.java: -------------------------------------------------------------------------------- 1 | package ca.waterloo.dsg.graphflow.util.container; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * A mutable Triple (A a, B b, C c). 7 | */ 8 | public class Triple implements Serializable { 9 | 10 | public A a; 11 | public B b; 12 | public C c; 13 | 14 | public Triple(A a, B b, C c) { 15 | this.a = a; 16 | this.b = b; 17 | this.c = c; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | property.filename = logs 2 | appenders = console 3 | appender.console.type = Console 4 | appender.console.name = STDOUT 5 | appender.console.layout.type = PatternLayout 6 | appender.console.layout.pattern = [%-5level][%d{yyyy-MM-dd HH:mm:ss.SSS}] %c{1}: %msg%n 7 | rootLogger.level = debug 8 | rootLogger.appenderRefs = stdout 9 | rootLogger.appenderRef.stdout.ref = STDOUT 10 | --------------------------------------------------------------------------------