├── .gitignore ├── LICENSE ├── README.md ├── accumulo ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── bigtable │ │ └── accumulo │ │ ├── Accumulo.scala │ │ └── AccumuloTable.scala │ └── test │ └── scala │ └── unicorn │ └── bigtable │ └── accumulo │ └── AccumuloSpec.scala ├── bigtable ├── build.sbt └── src │ └── main │ └── scala │ └── unicorn │ └── bigtable │ ├── BigTable.scala │ └── Database.scala ├── build.sbt ├── cassandra ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── bigtable │ │ └── cassandra │ │ ├── Cassandra.scala │ │ └── CassandraTable.scala │ └── test │ └── scala │ └── unicorn │ └── bigtable │ └── cassandra │ └── CassandraSpec.scala ├── docker.sh ├── hbase ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── bigtable │ │ └── hbase │ │ ├── HBase.scala │ │ └── HBaseTable.scala │ └── test │ └── scala │ └── unicorn │ └── bigtable │ └── hbase │ └── HBaseSpec.scala ├── index ├── build.sbt └── src │ ├── main │ ├── resources │ │ └── application.conf │ └── scala │ │ └── unicorn │ │ └── index │ │ ├── CompositeIndexCodec.scala │ │ ├── HashIndexCodec.scala │ │ ├── Index.scala │ │ ├── IndexBuilder.scala │ │ ├── IndexCodec.scala │ │ ├── IndexRowKeyPrefix.scala │ │ ├── Indexing.scala │ │ ├── SingleColumnIndexCodec.scala │ │ ├── TextIndexCodec.scala │ │ └── package.scala │ └── test │ └── scala │ └── unicorn │ └── index │ └── IndexingSpec.scala ├── json ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── json │ │ ├── BaseJsonSerializer.scala │ │ ├── BsonSerializer.scala │ │ ├── ColumnarJsonSerializer.scala │ │ ├── CompactPrinter.scala │ │ ├── DataFrame.scala │ │ ├── JsValue.scala │ │ ├── JsonParser.scala │ │ ├── JsonPath.scala │ │ ├── JsonPrinter.scala │ │ ├── JsonSerializer.scala │ │ ├── PrettyPrinter.scala │ │ └── package.scala │ └── test │ ├── resources │ ├── store.json │ └── test.json │ └── scala │ └── unicorn │ └── json │ ├── CompactPrinterSpec.scala │ ├── JsValueSpec.scala │ ├── JsonParserSpec.scala │ ├── JsonPathSpec.scala │ ├── JsonSerializerSpec.scala │ └── PrettyPrinterSpec.scala ├── narwhal ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── narwhal │ │ ├── FilterExpression.scala │ │ ├── HTable.scala │ │ ├── Narwhal.scala │ │ └── graph │ │ └── GraphX.scala │ └── test │ └── scala │ └── unicorn │ └── narwhal │ ├── FilterExpressionSpec.scala │ ├── HTableSpec.scala │ └── graph │ └── GraphXSpec.scala ├── oid ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── oid │ │ ├── BsonObjectId.scala │ │ ├── LongIdGenerator.scala │ │ ├── ObjectId.scala │ │ ├── Snowflake.scala │ │ └── package.scala │ └── test │ └── scala │ └── unicorn │ └── oid │ └── BsonObjectIdSpec.scala ├── project ├── buildinfo.sbt └── plugins.sbt ├── rhino.sh ├── rhino ├── build.sbt └── src │ ├── main │ ├── resources │ │ ├── application.conf │ │ ├── log4j.properties │ │ ├── logback.xml │ │ └── web │ │ │ ├── WEB-INF │ │ │ └── web.xml │ │ │ ├── index.html │ │ │ ├── network.js │ │ │ ├── rhino.jpg │ │ │ └── style.css │ ├── scala │ │ └── unicorn │ │ │ └── rhino │ │ │ ├── Boot.scala │ │ │ └── Rhino.scala │ └── twirl │ │ ├── doc.scala.html │ │ └── search.scala.html │ ├── templates │ ├── debian │ │ └── systemv │ └── rpm │ │ └── systemd │ ├── test │ └── scala │ │ └── unicorn │ │ └── rhino │ │ └── RhinoSpec.scala │ └── universal │ └── conf │ └── rhino.ini ├── rocksdb ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── bigtable │ │ └── rocksdb │ │ ├── RocksDB.scala │ │ └── RocksTable.scala │ └── test │ └── scala │ └── unicorn │ └── bigtable │ └── rocksdb │ └── RocksDBSpec.scala ├── search ├── build.sbt └── src │ └── main │ └── scala │ └── unicorn │ └── search │ ├── TextIndex.scala │ ├── TextIndexBuilder.scala │ └── TextSearch.scala ├── shell ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── shell │ │ ├── Main.scala │ │ └── Shell.scala │ └── universal │ ├── bin │ └── init.scala │ ├── conf │ ├── log4j.properties │ ├── unicorn.conf │ └── unicorn.ini │ └── examples │ ├── dbpedia.sh │ ├── gods.sh │ ├── gplus.sh │ ├── json.sh │ ├── pagerank.sh │ ├── rhino.sh │ ├── spark.sh │ ├── sql.sh │ ├── traversal.sh │ ├── twitter.sh │ ├── wiki.sh │ └── worker.sh ├── sql ├── build.sbt └── src │ └── main │ └── scala │ └── unicorn │ └── sql │ ├── SQLAstNode.scala │ ├── SQLContext.scala │ ├── SQLParser.scala │ └── package.scala ├── unibase ├── build.sbt └── src │ ├── main │ └── scala │ │ └── unicorn │ │ └── unibase │ │ ├── DocumentSerializer.scala │ │ ├── Table.scala │ │ ├── Unibase.scala │ │ ├── UpdateOps.scala │ │ ├── graph │ │ ├── Edge.scala │ │ ├── Graph.scala │ │ ├── GraphOps.scala │ │ ├── GraphSerializer.scala │ │ ├── Gremlin.scala │ │ ├── SimpleTraveler.scala │ │ ├── Traveler.scala │ │ ├── Vertex.scala │ │ └── package.scala │ │ └── package.scala │ └── test │ └── scala │ └── unicorn │ └── unibase │ ├── TableSpec.scala │ ├── UnibaseSpec.scala │ └── graph │ ├── GraphOpsSpec.scala │ └── GraphSpec.scala ├── unicorn.sh └── util ├── .idea ├── .name ├── compiler.xml ├── copyright │ └── profiles_settings.xml ├── encodings.xml ├── libraries │ └── SBT__org_scala_lang_scala_library_2_11_2_jar.xml ├── misc.xml ├── modules.xml ├── modules │ ├── edatabase-build.iml │ └── edatabase.iml ├── sbt.xml ├── scala_compiler.xml ├── scopes │ └── scope_settings.xml ├── vcs.xml └── workspace.xml ├── build.sbt └── src └── main └── scala └── unicorn └── util ├── ByteArray.scala ├── Config.scala ├── Logging.scala ├── Using.scala └── package.scala /.gitignore: -------------------------------------------------------------------------------- 1 | # IntelliJ 2 | .idea 3 | 4 | *.class 5 | 6 | # Mobile Tools for Java (J2ME) 7 | .mtj.tmp/ 8 | 9 | # Package Files # 10 | *.jar 11 | *.war 12 | *.ear 13 | 14 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 15 | hs_err_pid* 16 | -------------------------------------------------------------------------------- /accumulo/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-accumulo" 2 | 3 | libraryDependencies += "org.apache.accumulo" % "accumulo-core" % "1.7.1" exclude("org.slf4j", "slf4j-log4j12") 4 | 5 | -------------------------------------------------------------------------------- /accumulo/src/main/scala/unicorn/bigtable/accumulo/Accumulo.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.bigtable.accumulo 18 | 19 | import java.util.Properties 20 | import scala.collection.JavaConversions._ 21 | import org.apache.hadoop.io.Text 22 | import org.apache.accumulo.core.client.{Connector, ZooKeeperInstance} 23 | import org.apache.accumulo.core.client.admin.NewTableConfiguration 24 | import org.apache.accumulo.core.client.mock.MockInstance 25 | import org.apache.accumulo.core.client.security.tokens.PasswordToken 26 | import unicorn.bigtable._ 27 | 28 | /** Accumulo server adapter. 29 | * 30 | * @author Haifeng Li 31 | */ 32 | class Accumulo(val connector: Connector) extends Database[AccumuloTable] { 33 | val tableOperations = connector.tableOperations 34 | override def close: Unit = () // Connector has no close method 35 | 36 | override def apply(name: String): AccumuloTable = { 37 | new AccumuloTable(this, name) 38 | } 39 | 40 | override def tables: Set[String] = { 41 | connector.tableOperations.list.toSet 42 | } 43 | 44 | override def createTable(name: String, props: Properties, families: String*): AccumuloTable = { 45 | if (connector.tableOperations.exists(name)) 46 | throw new IllegalStateException(s"Creates Table $name, which already exists") 47 | 48 | val config = new NewTableConfiguration 49 | val settings = props.stringPropertyNames.map { p => (p, props.getProperty(p)) }.toMap 50 | config.setProperties(settings) 51 | connector.tableOperations.create(name, config) 52 | 53 | val localityGroups = families.map { family => 54 | val set = new java.util.TreeSet[Text]() 55 | set.add(new Text(family)) 56 | (family, set) 57 | }.toMap 58 | 59 | tableOperations.setLocalityGroups(name, localityGroups) 60 | apply(name) 61 | } 62 | 63 | override def dropTable(name: String): Unit = { 64 | if (!connector.tableOperations.exists(name)) 65 | throw new IllegalStateException(s"Drop Table $name, which does not exists") 66 | 67 | tableOperations.delete(name) 68 | } 69 | 70 | override def truncateTable(name: String): Unit = { 71 | tableOperations.deleteRows(name, null, null) 72 | } 73 | 74 | override def tableExists(name: String): Boolean = { 75 | tableOperations.exists(name) 76 | } 77 | 78 | override def compactTable(name: String): Unit = { 79 | tableOperations.compact(name, null, null, true, false) 80 | } 81 | } 82 | 83 | object Accumulo { 84 | def apply(instance: String, zookeeper: String, user: String, password: String): Accumulo = { 85 | val inst = new ZooKeeperInstance(instance, zookeeper) 86 | val conn = inst.getConnector(user, new PasswordToken(password)) 87 | new Accumulo(conn) 88 | } 89 | 90 | /** Create a mock instance that holds all data in memory, and will 91 | * not retain any data or settings between runs. It presently does 92 | * not enforce users, logins, permissions, etc. 93 | * This is for test purpose only. 94 | */ 95 | def apply(user: String = "root", password: String = ""): Accumulo = { 96 | val inst = new MockInstance 97 | val conn = inst.getConnector(user, new PasswordToken(password)) 98 | new Accumulo(conn) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /accumulo/src/test/scala/unicorn/bigtable/accumulo/AccumuloSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.bigtable.accumulo 18 | 19 | import org.specs2.mutable._ 20 | import org.specs2.specification.BeforeAfterAll 21 | import unicorn.bigtable._ 22 | import unicorn.util._ 23 | 24 | /** 25 | * @author Haifeng Li 26 | */ 27 | class AccumuloSpec extends Specification with BeforeAfterAll { 28 | // Make sure running examples one by one. 29 | // Otherwise, test cases on same columns will fail due to concurrency 30 | sequential 31 | val accumulo = Accumulo() 32 | val tableName = "unicorn_test" 33 | var table: AccumuloTable = null 34 | 35 | override def beforeAll = { 36 | accumulo.createTable(tableName, "cf1", "cf2") 37 | table = accumulo(tableName) 38 | } 39 | 40 | override def afterAll = { 41 | if (table != null) table.close 42 | accumulo.dropTable(tableName) 43 | } 44 | 45 | "Accumulo" should { 46 | "get the put" in { 47 | table.put("row1", "cf1", "c1", "v1", 0L) 48 | new String(table("row1", "cf1", "c1").get, utf8) === "v1" 49 | table.delete("row1", "cf1", "c1") 50 | table("row1", "cf1", "c1".getBytes(utf8)) === None 51 | } 52 | 53 | "get the family" in { 54 | table.put("row1", "cf1", Column("c1", "v1"), Column("c2", "v2")) 55 | val columns = table.get("row1", "cf1") 56 | columns.size === 2 57 | new String(columns(0).value, utf8) === "v1" 58 | new String(columns(1).value, utf8) === "v2" 59 | 60 | table.delete("row1", "cf1") 61 | val empty = table.get("row1", "cf1") 62 | empty.size === 0 63 | } 64 | 65 | "get empty family" in { 66 | val columns = table.get("row1", "cf1") 67 | columns.size === 0 68 | } 69 | 70 | "get nonexistent family" in { 71 | val columns = table.get("row1", "cf5") 72 | columns.size === 0 73 | } 74 | 75 | "get the row" in { 76 | table.put("row1", Seq( 77 | ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))), 78 | ColumnFamily("cf2", Seq(Column("c3", "v3")))) 79 | ) 80 | val families = table.get("row1") 81 | families.size === 2 82 | families(0).columns.size === 2 83 | families(1).columns.size === 1 84 | families(0).family === "cf1" 85 | families(1).family === "cf2" 86 | 87 | new String(families(0).columns(0).value, utf8) === "v1" 88 | new String(families(0).columns(1).value, utf8) === "v2" 89 | new String(families(1).columns(0).value, utf8) === "v3" 90 | 91 | table.delete("row1", "cf1") 92 | val cf1 = table.get("row1", "cf1") 93 | println(cf1) 94 | cf1.size === 0 95 | 96 | table.get("row1").size === 1 97 | val cf2 = table.get("row1", "cf2") 98 | cf2.size === 1 99 | 100 | table.delete("row1") 101 | table.get("row1").size === 0 102 | } 103 | 104 | "get nonexistent row" in { 105 | val families = table.get("row5") 106 | families.size === 0 107 | } 108 | 109 | "get multiple rows" in { 110 | val row1 = Row("row1", 111 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))), 112 | ColumnFamily("cf2", Seq(Column("c3", "v3"))))) 113 | 114 | val row2 = Row("row2".getBytes(utf8), 115 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))))) 116 | 117 | table.putBatch(row1, row2) 118 | 119 | val keys = Seq("row1", "row2") 120 | val rows = table.getBatch(keys) 121 | rows.size === 2 122 | rows(0).families.size === 2 123 | rows(1).families.size === 1 124 | 125 | table.deleteBatch(keys) 126 | table.getBatch(keys).size === 0 127 | } 128 | 129 | "scan" in { 130 | val row1 = Row("row1", 131 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))), 132 | ColumnFamily("cf2", Seq(Column("c3", "v3"))))) 133 | 134 | val row2 = Row("row2".getBytes(utf8), 135 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))))) 136 | 137 | val row3 = Row("row3".getBytes(utf8), 138 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))))) 139 | 140 | table.putBatch(row1, row2, row3) 141 | 142 | val scanner = table.scan("row1", "row3") 143 | val r1 = scanner.next 144 | new String(r1.key, utf8) === "row1" 145 | r1.families(0).family === "cf1" 146 | r1.families(1).family === "cf2" 147 | val r2 = scanner.next 148 | new String(r2.key, utf8) === "row2" 149 | scanner.hasNext === false 150 | scanner.close 151 | 152 | val keys = Seq("row1", "row2", "row3") 153 | table.deleteBatch(keys) 154 | table.getBatch(keys).size === 0 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /bigtable/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-bigtable" 2 | -------------------------------------------------------------------------------- /bigtable/src/main/scala/unicorn/bigtable/Database.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.bigtable 18 | 19 | import java.util.Properties 20 | 21 | /** A BigTable database. 22 | * 23 | * @author Haifeng Li 24 | */ 25 | trait Database[+T <: BigTable] extends AutoCloseable { 26 | /** Returns a table. 27 | * @param name the name of table. 28 | */ 29 | def apply(name: String): T 30 | 31 | /** Returns the list of tables. */ 32 | def tables: Set[String] 33 | 34 | /** Creates a table. 35 | * @param name the name of table. 36 | * @param families the column families in the table. A column family name 37 | * must be printable -- digit or letter -- and may not contain a :. 38 | * In analogy with relational databases, a column family is as a "table". 39 | * In some NoSQL solutions (e.g. HBase), column families are static 40 | * and should be created when creating the table. 41 | */ 42 | def createTable(name: String, families: String*): T = { 43 | createTable(name, new Properties(), families: _*) 44 | } 45 | 46 | /** Creates a table. 47 | * @param name the name of table. 48 | * @param props table configurations. 49 | * @param families the column families in the table. A column family name 50 | * must be printable -- digit or letter -- and may not contain a :. 51 | * In analogy with relational databases, a column family is as a "table". 52 | * In some NoSQL solutions (e.g. HBase), column families are static 53 | * and should be created when creating the table. 54 | */ 55 | def createTable(name: String, props: Properties, families: String*): T 56 | 57 | /** Truncates a table 58 | * @param name the name of table. 59 | */ 60 | def truncateTable(name: String): Unit 61 | 62 | /** Drops a table. 63 | * @param name the name of table. 64 | */ 65 | def dropTable(name: String): Unit 66 | 67 | /** Tests if a table exists. 68 | * @param name the name of table. 69 | */ 70 | def tableExists(name: String): Boolean 71 | 72 | /** Major compacts a table. Asynchronous operation. 73 | * @param name the name of table. 74 | */ 75 | def compactTable(name: String): Unit 76 | } 77 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn" 2 | 3 | import com.typesafe.sbt.pgp.PgpKeys.{useGpg, publishSigned, publishLocalSigned} 4 | 5 | lazy val commonSettings = Seq( 6 | organization := "com.github.haifengl", 7 | organizationName := "Haifeng Li", 8 | organizationHomepage := Some(url("http://haifengl.github.io/")), 9 | version := "2.1.1", 10 | scalaVersion := "2.11.8", 11 | scalacOptions := Seq("-feature", "-language:_", "-unchecked", "-deprecation", "-encoding", "utf8"), 12 | scalacOptions in Test ++= Seq("-Yrangepos"), 13 | libraryDependencies += "org.specs2" %% "specs2-core" % "3.7" % "test", 14 | parallelExecution in Test := false, 15 | publishTo := { 16 | val nexus = "https://oss.sonatype.org/" 17 | if (isSnapshot.value) 18 | Some("snapshots" at nexus + "content/repositories/snapshots") 19 | else 20 | Some("releases" at nexus + "service/local/staging/deploy/maven2") 21 | }, 22 | publishArtifact in Test := false , 23 | publishMavenStyle := true, 24 | useGpg := true, 25 | pomIncludeRepository := { _ => false }, 26 | pomExtra := ( 27 | https://github.com/haifengl/unicorn 28 | 29 | 30 | Apache License, Version 2.0 31 | http://www.apache.org/licenses/LICENSE-2.0.txt 32 | repo 33 | 34 | 35 | 36 | git@github.com:haifengl/unicorn.git 37 | scm:git:git@github.com:haifengl/unicorn.git 38 | 39 | 40 | 41 | haifengl 42 | Haifeng Li 43 | http://haifengl.github.io/ 44 | 45 | 46 | ) 47 | ) 48 | 49 | lazy val nonPubishSettings = commonSettings ++ Seq( 50 | publishArtifact := false, 51 | publishLocal := {}, 52 | publish := {}, 53 | publishSigned := {}, 54 | publishLocalSigned := {} 55 | ) 56 | 57 | lazy val root = project.in(file(".")).settings(nonPubishSettings: _*) 58 | .aggregate(util, oid, json, bigtable, hbase, cassandra, accumulo, rocksdb, unibase, narwhal, sql, shell, rhino) 59 | 60 | lazy val util = project.in(file("util")).settings(commonSettings: _*) 61 | 62 | lazy val oid = project.in(file("oid")).settings(commonSettings: _*).dependsOn(util) 63 | 64 | lazy val json = project.in(file("json")).settings(commonSettings: _*).dependsOn(oid) 65 | 66 | lazy val bigtable = project.in(file("bigtable")).settings(commonSettings: _*).dependsOn(util) 67 | 68 | lazy val hbase = project.in(file("hbase")).settings(commonSettings: _*).dependsOn(bigtable) 69 | 70 | lazy val accumulo = project.in(file("accumulo")).settings(commonSettings: _*).dependsOn(bigtable) 71 | 72 | lazy val cassandra = project.in(file("cassandra")).settings(commonSettings: _*).dependsOn(bigtable, util) 73 | 74 | lazy val rocksdb = project.in(file("rocksdb")).settings(commonSettings: _*).dependsOn(bigtable, util) 75 | 76 | //lazy val index = project.in(file("index")).settings(nonPubishSettings: _*).dependsOn(bigtable, json, hbase % "test") 77 | 78 | lazy val unibase = project.in(file("unibase")).settings(commonSettings: _*).dependsOn(json, oid, bigtable, accumulo % "test") 79 | 80 | lazy val narwhal = project.in(file("narwhal")).settings(commonSettings: _*).dependsOn(unibase, hbase) 81 | 82 | lazy val sql = project.in(file("sql")).settings(commonSettings: _*).dependsOn(util, narwhal) 83 | 84 | //lazy val transaction = project.in(file("transaction")).settings(commonSettings: _*).dependsOn(util, narwhal) 85 | 86 | //lazy val search = project.in(file("search")).settings(nonPubishSettings: _*).dependsOn(unibase) 87 | 88 | lazy val shell = project.in(file("shell")).settings(nonPubishSettings: _*).dependsOn(unibase, narwhal, sql, hbase, cassandra, accumulo, rocksdb) 89 | 90 | lazy val rhino = project.in(file("rhino")).enablePlugins(SbtTwirl).settings(nonPubishSettings: _*).dependsOn(unibase, hbase, cassandra, accumulo, rocksdb) 91 | 92 | -------------------------------------------------------------------------------- /cassandra/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-cassandra" 2 | 3 | libraryDependencies += "org.apache.cassandra" % "cassandra-all" % "2.2.1" exclude("ch.qos.logback", "logback-classic") 4 | 5 | -------------------------------------------------------------------------------- /cassandra/src/main/scala/unicorn/bigtable/cassandra/Cassandra.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.bigtable.cassandra 18 | 19 | import java.util.Properties 20 | import java.net.{InetAddress, UnknownHostException} 21 | import scala.collection.JavaConversions._ 22 | import org.apache.cassandra.locator.SimpleSnitch 23 | import org.apache.cassandra.thrift.Cassandra.Client 24 | import org.apache.cassandra.thrift.{ConsistencyLevel, KsDef, CfDef} 25 | import org.apache.thrift.transport.TFramedTransport 26 | import org.apache.thrift.transport.TSocket 27 | import org.apache.thrift.protocol.TBinaryProtocol 28 | import unicorn.bigtable._ 29 | import unicorn.util.Logging 30 | 31 | /** Cassandra server adapter. 32 | * 33 | * @author Haifeng Li 34 | */ 35 | class Cassandra(transport: TFramedTransport) extends Database[CassandraTable] with Logging { 36 | val protocol = new TBinaryProtocol(transport) 37 | val client = new Client(protocol) 38 | 39 | override def close: Unit = transport.close 40 | 41 | override def apply(name: String): CassandraTable = { 42 | new CassandraTable(this, name) 43 | } 44 | 45 | def apply(name: String, consistency: ConsistencyLevel): CassandraTable = { 46 | new CassandraTable(this, name, consistency) 47 | } 48 | 49 | override def tables: Set[String] = { 50 | client.describe_keyspaces.map(_.getName).toSet 51 | } 52 | 53 | /** Create a table with default NetworkTopologyStrategy placement strategy. */ 54 | override def createTable(name: String, families: String*): CassandraTable = { 55 | val props = new Properties 56 | props.put("class", "org.apache.cassandra.locator.NetworkTopologyStrategy") 57 | props.put("replication_factor", "3") 58 | createTable(name, props, families: _*) 59 | } 60 | 61 | override def createTable(name: String, props: Properties, families: String*): CassandraTable = { 62 | val replicationStrategy = props.getProperty("class") 63 | val replicationOptions = props.stringPropertyNames.filter(_ != "class").map { p => (p, props.getProperty(p)) }.toMap 64 | if (replicationStrategy.contains(".NetworkTopologyStrategy") && replicationOptions.isEmpty) { 65 | // adding default data center from SimpleSnitch 66 | val snitch = new SimpleSnitch 67 | try { 68 | replicationOptions.put(snitch.getDatacenter(InetAddress.getLocalHost()), "1") 69 | } catch { 70 | case e: UnknownHostException => throw new RuntimeException(e) 71 | } 72 | } 73 | 74 | val keyspace = new KsDef 75 | keyspace.setName(name) 76 | keyspace.setStrategy_class(replicationStrategy) 77 | keyspace.setStrategy_options(replicationOptions) 78 | 79 | families.foreach { family => 80 | val cf = new CfDef 81 | cf.setName(family) 82 | cf.setKeyspace(name) 83 | keyspace.addToCf_defs(cf) 84 | } 85 | 86 | val schemaVersion = client.system_add_keyspace(keyspace) 87 | log.info(s"create table $name: $schemaVersion") 88 | apply(name) 89 | } 90 | 91 | override def dropTable(name: String): Unit = { 92 | client.system_drop_keyspace(name) 93 | } 94 | 95 | override def truncateTable(name: String): Unit = { 96 | client.describe_keyspace(name).getCf_defs.foreach { cf => 97 | client.truncate(cf.getName) 98 | } 99 | } 100 | 101 | override def tableExists(name: String): Boolean = { 102 | client.describe_keyspace(name) != null 103 | } 104 | 105 | /** Cassandra client API doesn't support compaction. 106 | * This is actually a nop. 107 | */ 108 | override def compactTable(name: String): Unit = { 109 | // fail silently 110 | log.warn("Cassandra client API doesn't support compaction") 111 | } 112 | } 113 | 114 | object Cassandra { 115 | def apply(host: String, port: Int): Cassandra = { 116 | // For ultra-wide row, we set the maxLength to 16MB. 117 | // Note that we also need to set the server side configuration 118 | // thrift_framed_transport_size_in_mb in cassandra.yaml 119 | // In case of ultra-wide row, it is better to use intra row scan. 120 | val transport = new TFramedTransport(new TSocket(host, port), 16 * 1024 * 1024) 121 | transport.open 122 | 123 | new Cassandra(transport) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbt docker:publishLocal 4 | docker images -q --filter "dangling=true" | xargs docker rmi 5 | 6 | data_dir=$PWD/target/data 7 | mkdir -p $data_dir 8 | 9 | 10 | docker run --name=unicorn-docker -hostname=unicorn-docker --volume=$data_dir:/data -it --rm haifengl/unicorn 11 | -------------------------------------------------------------------------------- /hbase/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-hbase" 2 | 3 | libraryDependencies ++= { 4 | val hbaseV = "1.2.1" 5 | val hadoopV = "2.6.4" 6 | Seq( 7 | // Spark conflicts with other servlet-api jars 8 | "org.apache.hbase" % "hbase-common" % hbaseV, 9 | "org.apache.hbase" % "hbase-client" % hbaseV, 10 | "org.apache.hbase" % "hbase-server" % hbaseV exclude("org.mortbay.jetty", "servlet-api-2.5"), 11 | "org.apache.hadoop" % "hadoop-common" % hadoopV exclude("org.eclipse.jetty", "servlet-api") exclude("javax.servlet", "servlet-api") exclude("org.slf4j", "slf4j-log4j12") 12 | ) 13 | } 14 | -------------------------------------------------------------------------------- /hbase/src/main/scala/unicorn/bigtable/hbase/HBase.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.bigtable.hbase 18 | 19 | import java.util.Properties 20 | import scala.collection.JavaConversions._ 21 | import org.apache.hadoop.conf.Configuration 22 | import org.apache.hadoop.hbase.HBaseConfiguration 23 | import org.apache.hadoop.hbase.HTableDescriptor 24 | import org.apache.hadoop.hbase.client.ConnectionFactory 25 | import org.apache.hadoop.hbase.HColumnDescriptor 26 | import org.apache.hadoop.hbase.TableName 27 | import unicorn.bigtable._ 28 | //import unicorn.index.Indexing 29 | 30 | /** HBase server adapter. 31 | * 32 | * @author Haifeng Li 33 | */ 34 | class HBase(config: Configuration) extends Database[HBaseTable] { 35 | val connection = ConnectionFactory.createConnection(config) 36 | val admin = connection.getAdmin 37 | 38 | override def close: Unit = connection.close 39 | 40 | override def apply(name: String): HBaseTable = { 41 | new HBaseTable(this, name) 42 | } 43 | /* 44 | def getTableWithIndex(name: String): HBaseTable with Indexing = { 45 | new HBaseTable(this, name) with Indexing 46 | } 47 | */ 48 | 49 | override def tables: Set[String] = { 50 | admin.listTableNames.filter(!_.isSystemTable).map(_.getNameAsString).toSet 51 | } 52 | 53 | override def createTable(name: String, props: Properties, families: String*): HBaseTable = { 54 | if (admin.tableExists(TableName.valueOf(name))) 55 | throw new IllegalStateException(s"Creates Table $name, which already exists") 56 | 57 | val tableDesc = new HTableDescriptor(TableName.valueOf(name)) 58 | props.stringPropertyNames.foreach { p => tableDesc.setConfiguration(p, props.getProperty(p))} 59 | families.foreach { family => 60 | val desc = new HColumnDescriptor(family) 61 | props.stringPropertyNames.foreach { p => desc.setConfiguration(p, props.getProperty(p))} 62 | tableDesc.addFamily(desc) 63 | } 64 | admin.createTable(tableDesc) 65 | apply(name) 66 | } 67 | 68 | override def dropTable(name: String): Unit = { 69 | val tableName = TableName.valueOf(name) 70 | admin.disableTable(tableName) 71 | admin.deleteTable(tableName) 72 | } 73 | 74 | /** Truncates a table and preserves the splits */ 75 | override def truncateTable(name: String): Unit = { 76 | admin.truncateTable(TableName.valueOf(name), true) 77 | } 78 | 79 | override def tableExists(name: String): Boolean = { 80 | admin.tableExists(TableName.valueOf(name)) 81 | } 82 | 83 | override def compactTable(name: String): Unit = { 84 | val tableName = TableName.valueOf(name) 85 | admin.majorCompact(tableName) 86 | } 87 | } 88 | 89 | object HBase { 90 | /* Uses hbase-site.xml and in hbase-default.xml that can be found on the CLASSPATH */ 91 | def apply(): HBase = { 92 | val config = HBaseConfiguration.create 93 | new HBase(config) 94 | } 95 | 96 | def apply(config: Configuration): HBase = { 97 | new HBase(config) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /index/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-index" 2 | 3 | libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.4" 4 | 5 | libraryDependencies += "com.github.haifengl" % "smile-nlp" % "1.1.0" 6 | 7 | -------------------------------------------------------------------------------- /index/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | unicorn { 2 | index { 3 | threshold = 100 4 | } 5 | } -------------------------------------------------------------------------------- /index/src/main/scala/unicorn/index/CompositeIndexCodec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.index 18 | 19 | import unicorn.bigtable.Cell 20 | import unicorn.index.IndexSortOrder._ 21 | import unicorn.util._ 22 | 23 | /** 24 | * Calculate the cell in the index table for a composite index (multiple columns) in the base table. 25 | * The combined size of index columns should be less than 64KB. 26 | * 27 | * In case of range search, composite index can only be used for fixed-width columns 28 | * (except the last one, which can be of variable length). 29 | * Otherwise, it can only be used for equality queries. In relational database, 30 | * this problem is solved by padding varchar to the maximal length. We don't employ 31 | * this approach because we don't want to limit the data size. 32 | * It is the user's responsibility to use it correctly. 33 | * 34 | * If any field of index is missing we won't index the row. 35 | * 36 | * @author Haifeng Li 37 | */ 38 | class CompositeIndexCodec(val index: Index) extends IndexCodec { 39 | require(index.columns.size > 1) 40 | 41 | override def apply(tenant: Option[Array[Byte]], row: ByteArray, columns: ColumnMap): Seq[Cell] = { 42 | val hasUndefinedColumn = index.columns.exists { indexColumn => 43 | columns.get(index.family).map(_.get(indexColumn.qualifier)).getOrElse(None) match { 44 | case Some(_) => false 45 | case None => true 46 | } 47 | } 48 | 49 | if (hasUndefinedColumn) return Seq.empty 50 | 51 | val hasZeroTimestamp = index.columns.exists { indexColumn => 52 | columns.get(index.family).map(_.get(indexColumn.qualifier)).getOrElse(None) match { 53 | case Some(c) => c.timestamp == 0L 54 | case None => false 55 | } 56 | } 57 | 58 | val timestamp = if (hasZeroTimestamp) 0L else index.columns.foldLeft(0L) { (b, indexColumn) => 59 | val ts = columns(index.family)(indexColumn.qualifier).timestamp 60 | Math.max(b, ts) 61 | } 62 | 63 | resetBuffer(tenant) 64 | index.columns.foreach { indexColumn => 65 | val column = columns(index.family)(indexColumn.qualifier).value 66 | 67 | indexColumn.order match { 68 | case Ascending => buffer.put(column) 69 | case Descending => buffer.put(~column) 70 | } 71 | } 72 | 73 | val (qualifier, indexValue) = index.indexType match { 74 | case IndexType.Unique => (UniqueIndexColumnQualifier, row) 75 | case _ => (row, IndexDummyValue) 76 | } 77 | 78 | Seq(Cell(buffer, IndexColumnFamily, qualifier, indexValue, timestamp)) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /index/src/main/scala/unicorn/index/HashIndexCodec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.index 18 | 19 | import java.nio.ByteBuffer 20 | 21 | import unicorn.bigtable.Cell 22 | import unicorn.index.IndexSortOrder._ 23 | import unicorn.util._ 24 | 25 | /** 26 | * Calculate the cell in the index table for a composite index (multiple columns) in the base table. 27 | * Hash index doesn't support unique constraint. 28 | * 29 | * @author Haifeng Li 30 | */ 31 | class HashIndexCodec(val index: Index) extends IndexCodec { 32 | require(index.indexType == IndexType.Hashed) 33 | 34 | override def apply(tenant: Option[Array[Byte]], row: ByteArray, columns: ColumnMap): Seq[Cell] = { 35 | val hasUndefinedColumn = index.columns.exists { indexColumn => 36 | columns.get(index.family).map(_.get(indexColumn.qualifier)).getOrElse(None) match { 37 | case Some(_) => false 38 | case None => true 39 | } 40 | } 41 | 42 | if (hasUndefinedColumn) return Seq.empty 43 | 44 | val hasZeroTimestamp = index.columns.exists { indexColumn => 45 | columns.get(index.family).map(_.get(indexColumn.qualifier)).getOrElse(None) match { 46 | case Some(c) => c.timestamp == 0L 47 | case None => false 48 | } 49 | } 50 | 51 | val timestamp = if (hasZeroTimestamp) 0L else index.columns.foldLeft(0L) { (b, indexColumn) => 52 | val ts = columns(index.family)(indexColumn.qualifier).timestamp 53 | Math.max(b, ts) 54 | } 55 | 56 | resetBuffer(tenant) 57 | index.columns.foreach { indexColumn => 58 | val column = columns(index.family)(indexColumn.qualifier).value 59 | 60 | indexColumn.order match { 61 | case Ascending => buffer.put(column) 62 | case Descending => buffer.put(~column) 63 | } 64 | } 65 | 66 | val key = md5(buffer) 67 | 68 | val (qualifier: ByteArray, indexValue: ByteArray) = index.indexType match { 69 | case IndexType.Unique => (UniqueIndexColumnQualifier, row) 70 | case _ => (row, IndexDummyValue) 71 | } 72 | 73 | Seq(Cell(key, IndexColumnFamily, qualifier, indexValue, timestamp)) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /index/src/main/scala/unicorn/index/IndexCodec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.index 18 | 19 | import java.nio.ByteBuffer 20 | 21 | import unicorn.bigtable.{Cell, Column} 22 | import unicorn.index.IndexSortOrder._ 23 | import unicorn.util._ 24 | 25 | /** Calculate the cell(s) in the index table for a given column set in the base table. 26 | * In case of text index, we have multiple index entries (for each word). 27 | * 28 | * @author Haifeng Li 29 | */ 30 | trait IndexCodec { 31 | /** Index definition. */ 32 | val index: Index 33 | 34 | /** Workspace to encode index row keys. */ 35 | val buffer = ByteBuffer.allocate(16 * 1024) 36 | 37 | /** Returns the index row key prefix. */ 38 | def prefix(tenant: Option[Array[Byte]], value: ByteArray): ByteArray = { 39 | resetBuffer(tenant) 40 | index.columns.head.order match { 41 | case Ascending => buffer.put(value) 42 | case Descending => buffer.put(~value) 43 | } 44 | buffer.put(value) 45 | buffer 46 | } 47 | 48 | /** Given a row, calculate the index entries. 49 | * @param row the row key. 50 | * @param columns a map of family to map of qualifier to cell. 51 | * @return a seq of index entries. 52 | */ 53 | def apply(tenant: Option[Array[Byte]], row: ByteArray, columns: ColumnMap): Seq[Cell] 54 | 55 | /** A helper function useful for testing. */ 56 | def apply(tenant: Option[Array[Byte]], row: ByteArray, family: String, column: ByteArray, value: ByteArray): Seq[Cell] = { 57 | apply(tenant, row, ColumnMap(family, Seq(Column(column, value)))) 58 | } 59 | 60 | /** A helper function useful for testing. */ 61 | def apply(tenant: Option[Array[Byte]], row: ByteArray, family: String, columns: Column*): Seq[Cell] = { 62 | apply(tenant, row, ColumnMap(family, columns)) 63 | } 64 | 65 | /** Resets buffer. */ 66 | def resetBuffer(tenant: Option[Array[Byte]]): Unit = { 67 | buffer.clear 68 | buffer.putShort(index.id.toShort) 69 | tenant match { 70 | case None => buffer.put(0.toByte) 71 | case Some(tenant) => buffer.put(tenant.length.toByte).put(tenant) 72 | } 73 | } 74 | } 75 | 76 | object IndexCodec { 77 | def apply(index: Index): IndexCodec = { 78 | index.indexType match { 79 | case IndexType.Hashed => new HashIndexCodec(index) 80 | case IndexType.Text => new TextIndexCodec(index) 81 | case _ => if (index.columns.size == 1) new SingleColumnIndexCodec(index) else new CompositeIndexCodec(index) 82 | } 83 | } 84 | } -------------------------------------------------------------------------------- /index/src/main/scala/unicorn/index/IndexRowKeyPrefix.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.index 18 | 19 | import java.nio.ByteBuffer 20 | import unicorn.util._ 21 | 22 | /** Optionally, row key in the index table may have specific prefix, 23 | * e.g. tenant id, index name (all index in one table), etc. 24 | * 25 | * @author Haifeng Li 26 | */ 27 | trait IndexRowKeyPrefix { 28 | /** Returns a (optionally) prefix for index table row key. 29 | * @param index index meta data. 30 | * @param baseTableRowKey the row key of base table. 31 | * @return optional index table row key prefix. 32 | */ 33 | def apply(index: Index, baseTableRowKey: ByteArray): ByteArray 34 | } 35 | 36 | /** Suppose the base table row key with tenant id as the prefix. Corresponding, the index 37 | * table may have the tenant id as the prefix too to ensure proper sharding. The tenant id 38 | * should be of fixed size 39 | * 40 | * @param tenantIdSize the tenant id size. 41 | * 42 | * @author Haifeng Li 43 | */ 44 | class TenantPrefix(tenantIdSize: Int) extends IndexRowKeyPrefix { 45 | 46 | override def apply(index: Index, baseTableRowKey: ByteArray): ByteArray = { 47 | val prefix = new Array[Byte](tenantIdSize) 48 | Array.copy(baseTableRowKey, 0, prefix, 0, tenantIdSize) 49 | prefix 50 | } 51 | 52 | override def toString = s"tenant($tenantIdSize)" 53 | } 54 | 55 | /** Multiple indices may be stored in one BigTable. Note that BigTable was designed 56 | * for a few very large tables rather than many small table. With multiple indices in 57 | * one table, it helps to reduce the number of tables. 58 | * 59 | * @param id the index id. 60 | * 61 | * @author Haifeng Li 62 | */ 63 | class IndexIdPrefix(id: Int) extends IndexRowKeyPrefix { 64 | val prefix = ByteBuffer.allocate(4).putInt(id).array 65 | 66 | override def apply(index: Index, baseTableRowKey: ByteArray): ByteArray = { 67 | prefix 68 | } 69 | 70 | override def toString = s"index($id)" 71 | } 72 | -------------------------------------------------------------------------------- /index/src/main/scala/unicorn/index/SingleColumnIndexCodec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.index 18 | 19 | import unicorn.bigtable.Cell 20 | import unicorn.index.IndexSortOrder._ 21 | import unicorn.util._ 22 | 23 | /** Calculates the cell in the index table for a single column in the base table. 24 | * 25 | * @author Haifeng Li 26 | */ 27 | class SingleColumnIndexCodec(val index: Index) extends IndexCodec { 28 | require(index.columns.size == 1) 29 | 30 | val indexColumn = index.columns.head 31 | 32 | override def prefix(tenant: Option[Array[Byte]], value: ByteArray): ByteArray = { 33 | resetBuffer(tenant) 34 | buffer.put(value) 35 | val key: Array[Byte] = buffer 36 | key 37 | } 38 | 39 | override def apply(tenant: Option[Array[Byte]], row: ByteArray, columns: ColumnMap): Seq[Cell] = { 40 | columns.get(index.family).map(_.get(indexColumn.qualifier)).getOrElse(None) match { 41 | case None => Seq.empty 42 | case Some(column) => 43 | val value = indexColumn.order match { 44 | case Ascending => column.value 45 | case Descending => ~column.value 46 | } 47 | 48 | resetBuffer(tenant) 49 | buffer.put(value) 50 | 51 | val (qualifier, indexValue) = index.indexType match { 52 | case IndexType.Unique => (UniqueIndexColumnQualifier, row) 53 | case _ => (row, IndexDummyValue) 54 | } 55 | 56 | Seq(Cell(buffer, IndexColumnFamily, qualifier, indexValue, column.timestamp)) 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /index/src/main/scala/unicorn/index/TextIndexCodec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.index 18 | 19 | import java.nio.ByteBuffer 20 | import java.nio.charset.Charset 21 | import smile.nlp.dictionary.{EnglishPunctuations, EnglishStopWords} 22 | import smile.nlp.stemmer.{PorterStemmer, Stemmer} 23 | import smile.nlp.tokenizer.{SimpleTokenizer, SimpleSentenceSplitter} 24 | import unicorn.bigtable.Cell 25 | import unicorn.json.JsonSerializerHelper 26 | import unicorn.util._ 27 | 28 | /** 29 | * Calculate the cell in the index table for a text index (may include multiple columns) in the base table. 30 | * Naturally, text index doesn't support unique constraint. It also ignores the column order specification. 31 | * 32 | * @author Haifeng Li 33 | */ 34 | class TextIndexCodec(val index: Index, codec: TextCodec = new SimpleTextCodec, stemmer: Option[Stemmer] = Some(new PorterStemmer)) extends IndexCodec { 35 | require(index.indexType == IndexType.Text) 36 | 37 | val valueBuffer = ByteBuffer.allocate(64 * 1024) 38 | 39 | override def apply(tenant: Option[Array[Byte]], row: ByteArray, columns: ColumnMap): Seq[Cell] = { 40 | index.columns.flatMap { indexColumn => 41 | val column = columns.get(index.family).map(_.get(indexColumn.qualifier)).getOrElse(None) 42 | if (column.isDefined) { 43 | val timestamp = column.get.timestamp 44 | val text = codec.decode(column.get.value) 45 | val terms = tokenize(text) 46 | terms.map { case (term, pos) => 47 | resetBuffer(tenant) 48 | val bytes = term.getBytes(utf8) 49 | buffer.putInt(bytes.size) 50 | buffer.put(bytes) 51 | buffer.putInt(indexColumn.qualifier.bytes.size) 52 | buffer.put(indexColumn.qualifier) 53 | val key = ByteArray(buffer) 54 | 55 | valueBuffer.clear 56 | pos.foreach(valueBuffer.putInt(_)) 57 | 58 | Cell(key, IndexColumnFamily, row, byteBuffer2ByteArray(valueBuffer), timestamp) 59 | } 60 | } else Seq.empty 61 | } 62 | } 63 | 64 | /** Sentence splitter. */ 65 | val sentenceSpliter = SimpleSentenceSplitter.getInstance 66 | 67 | /** Tokenizer on sentences. */ 68 | val tokenizer = new SimpleTokenizer 69 | 70 | /** Dictionary of stop words. */ 71 | val stopWords = EnglishStopWords.DEFAULT 72 | 73 | /** Punctuation. */ 74 | val punctuations = EnglishPunctuations.getInstance 75 | 76 | /** 77 | * Process each token (after filtering stop words, numbers, and optional stemming). 78 | */ 79 | private def foreach[U](text: String)(f: ((String, Int)) => U): Unit = { 80 | var pos = 0 81 | 82 | sentenceSpliter.split(text).foreach { sentence => 83 | tokenizer.split(sentence).foreach { token => 84 | pos += 1 85 | val lower = token.toLowerCase 86 | if (!(punctuations.contains(lower) || 87 | stopWords.contains(lower) || 88 | lower.length == 1 || 89 | lower.matches("[0-9\\.\\-\\+\\|\\(\\)]+"))) { 90 | val word = stemmer match { 91 | case Some(stemmer) => stemmer.stem(lower) 92 | case None => lower 93 | } 94 | f(word, pos) 95 | } 96 | } 97 | 98 | pos += 1 99 | } 100 | } 101 | 102 | private def tokenize(text: String): collection.mutable.Map[String, List[Int]] = { 103 | val terms = collection.mutable.Map[String, List[Int]]().withDefaultValue(Nil) 104 | 105 | var size = 0 106 | foreach(text) { case (word, pos) => 107 | size += 1 108 | terms(word) = pos :: terms(word) 109 | } 110 | 111 | terms 112 | } 113 | } 114 | 115 | trait TextCodec { 116 | def decode(bytes: Array[Byte]): String 117 | } 118 | 119 | class SimpleTextCodec(charset: Charset = utf8) extends TextCodec { 120 | override def decode(bytes: Array[Byte]): String = { 121 | new String(bytes, charset) 122 | } 123 | } 124 | /* 125 | class JsStringTextCodec extends TextCodec with JsonSerializerHelper { 126 | override def decode(bytes: Array[Byte]): String = { 127 | string()(ByteBuffer.wrap(bytes)).value 128 | } 129 | } 130 | */ -------------------------------------------------------------------------------- /index/src/main/scala/unicorn/index/package.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn 18 | 19 | import unicorn.bigtable._ 20 | import unicorn.util._ 21 | 22 | /** 23 | * @author Haifeng Li 24 | */ 25 | package object index { 26 | type IndexableTable = BigTable with RowScan with FilterScan with Rollback with Counter 27 | type ColumnMap = collection.mutable.Map[String, collection.mutable.Map[ByteArray, Column]] 28 | 29 | private[index] val IndexTableNamePrefix = "unicorn_index_" 30 | private[index] val IndexColumnFamily = "index" 31 | private[index] val IndexMetaColumnFamily = "meta" 32 | private[index] val IndexColumnFamilies = Seq(IndexColumnFamily, IndexMetaColumnFamily) 33 | 34 | private[index] val IndexTableMetaRow: ByteArray = ".unicorn_index_meta." 35 | private[index] val IndexTableNewIndexId: ByteArray = ".new_index_id." 36 | private[index] val IndexTableStatColumnCount: ByteArray = ".count." 37 | 38 | private[index] val IndexDummyValue = ByteArray(Array[Byte](1)) 39 | private[index] val UniqueIndexColumnQualifier = ByteArray(Array[Byte](1)) 40 | 41 | private[index] object ColumnMap { 42 | def apply(families: Seq[ColumnFamily]): ColumnMap = { 43 | collection.mutable.Map(families.map { case ColumnFamily(family, columns) => 44 | (family, collection.mutable.Map(columns.map { column => (ByteArray(column.qualifier), column) }: _*)) 45 | }: _*) 46 | } 47 | 48 | def apply(family: String, columns: Seq[Column]): ColumnMap = { 49 | collection.mutable.Map(family -> collection.mutable.Map(columns.map { column => (ByteArray(column.qualifier), column) }: _*)) 50 | } 51 | 52 | def apply(row: Row): ColumnMap = { 53 | apply(row.families) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /json/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-json" 2 | 3 | libraryDependencies += "io.gatling" %% "jsonpath" % "0.6.4" 4 | 5 | -------------------------------------------------------------------------------- /json/src/main/scala/unicorn/json/CompactPrinter.scala: -------------------------------------------------------------------------------- 1 | package unicorn.json 2 | 3 | import java.lang.StringBuilder 4 | 5 | /** 6 | * A JsonPrinter that produces compact JSON source without any superfluous whitespace. 7 | * Adopt from spray-json. 8 | */ 9 | trait CompactPrinter extends JsonPrinter { 10 | 11 | def print(x: JsValue, sb: StringBuilder) { 12 | x match { 13 | case JsObject(x) => printObject(x, sb) 14 | case JsArray(x) => printArray(x, sb) 15 | case _ => printLeaf(x, sb) 16 | } 17 | } 18 | 19 | protected def printObject(members: Iterable[(String, JsValue)], sb: StringBuilder) { 20 | sb.append('{') 21 | printSeq(members, sb.append(',')) { m => 22 | printString(m._1, sb) 23 | sb.append(':') 24 | print(m._2, sb) 25 | } 26 | sb.append('}') 27 | } 28 | 29 | protected def printArray(elements: Seq[JsValue], sb: StringBuilder) { 30 | sb.append('[') 31 | printSeq(elements, sb.append(','))(print(_, sb)) 32 | sb.append(']') 33 | } 34 | } 35 | 36 | object CompactPrinter extends CompactPrinter 37 | -------------------------------------------------------------------------------- /json/src/main/scala/unicorn/json/JsonPrinter.scala: -------------------------------------------------------------------------------- 1 | package unicorn.json 2 | 3 | import java.lang.{StringBuilder => JStringBuilder} 4 | import scala.annotation.tailrec 5 | 6 | /** 7 | * A JsonPrinter serializes a JSON AST to a String. 8 | * Adopt from spray-json. 9 | */ 10 | trait JsonPrinter extends (JsValue => String) { 11 | 12 | def apply(x: JsValue): String = apply(x, None) 13 | 14 | def apply(x: JsValue, jsonpCallback: Option[String] = None, sb: JStringBuilder = new JStringBuilder(256)): String = { 15 | jsonpCallback match { 16 | case Some(callback) => 17 | sb.append(callback).append('(') 18 | print(x, sb) 19 | sb.append(')') 20 | case None => print(x, sb) 21 | } 22 | sb.toString 23 | } 24 | 25 | def print(x: JsValue, sb: JStringBuilder) 26 | 27 | protected def printLeaf(x: JsValue, sb: JStringBuilder) { 28 | x match { 29 | case JsNull => sb.append("null") 30 | case JsUndefined => sb.append("undefined") 31 | case JsBoolean(true) => sb.append("true") 32 | case JsBoolean(false) => sb.append("false") 33 | case JsInt(x) => sb.append(x) 34 | // Omit the ending 'L' to be compatible with other json parser/printer 35 | case JsLong(x) => sb.append(x)//.append('L') 36 | case JsCounter(x) => sb.append(x) 37 | case JsDouble(x) => sb.append(x) 38 | case JsDate(_) => sb.append('"').append(x.toString).append('"') 39 | case JsBinary(x) => sb.append('"').append(x.map("%02X" format _).mkString).append('"') 40 | case JsString(x) => printString(x, sb) 41 | case JsUUID(x) => sb.append('"').append(x.toString).append('"') 42 | case JsObjectId(x) => sb.append('"').append(x.toString).append('"') 43 | case _ => throw new IllegalStateException 44 | } 45 | } 46 | 47 | protected def printString(s: String, sb: JStringBuilder) { 48 | import JsonPrinter._ 49 | @tailrec def firstToBeEncoded(ix: Int = 0): Int = 50 | if (ix == s.length) -1 else if (requiresEncoding(s.charAt(ix))) ix else firstToBeEncoded(ix + 1) 51 | 52 | sb.append('"') 53 | firstToBeEncoded() match { 54 | case -1 ⇒ sb.append(s) 55 | case first ⇒ 56 | sb.append(s, 0, first) 57 | @tailrec def append(ix: Int): Unit = 58 | if (ix < s.length) { 59 | s.charAt(ix) match { 60 | case c if !requiresEncoding(c) => sb.append(c) 61 | case '"' => sb.append("\\\"") 62 | case '\\' => sb.append("\\\\") 63 | case '\b' => sb.append("\\b") 64 | case '\f' => sb.append("\\f") 65 | case '\n' => sb.append("\\n") 66 | case '\r' => sb.append("\\r") 67 | case '\t' => sb.append("\\t") 68 | case x if x <= 0xF => sb.append("\\u000").append(Integer.toHexString(x)) 69 | case x if x <= 0xFF => sb.append("\\u00").append(Integer.toHexString(x)) 70 | case x if x <= 0xFFF => sb.append("\\u0").append(Integer.toHexString(x)) 71 | case x => sb.append("\\u").append(Integer.toHexString(x)) 72 | } 73 | append(ix + 1) 74 | } 75 | append(first) 76 | } 77 | sb.append('"') 78 | } 79 | 80 | protected def printSeq[A](iterable: Iterable[A], printSeparator: => Unit)(f: A => Unit) { 81 | var first = true 82 | iterable.foreach { a => 83 | if (first) first = false else printSeparator 84 | f(a) 85 | } 86 | } 87 | } 88 | 89 | object JsonPrinter { 90 | def requiresEncoding(c: Char): Boolean = 91 | // from RFC 4627 92 | // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF 93 | c match { 94 | case '"' => true 95 | case '\\' => true 96 | case c => c < 0x20 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /json/src/main/scala/unicorn/json/JsonSerializer.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.json 18 | 19 | import java.nio.charset.Charset 20 | 21 | /** 22 | * @author Haifeng Li 23 | */ 24 | trait JsonSerializer { 25 | /** string encoder/decoder */ 26 | val charset: Charset 27 | 28 | /** The path to the root of a JsValue. */ 29 | val root: String 30 | 31 | /** The delimiter in the path to an embedded object or in an array. 32 | * In general, we follow the dot notation as in MongoDB (even for array elements). 33 | */ 34 | val pathDelimiter: String 35 | 36 | /** Byte array of undefined. */ 37 | val undefined: Array[Byte] 38 | 39 | /** Byte array of null. */ 40 | val `null`: Array[Byte] 41 | 42 | /** Serialize a string to bytes. */ 43 | def str2Bytes(s: String) = s.getBytes(charset) 44 | 45 | /** Returns the json path of a dot notation path as in MongoDB. */ 46 | def str2Path(path: String) = s"${root}${pathDelimiter}$path" 47 | 48 | /** Returns the byte array of json path */ 49 | def str2PathBytes(path: String) = str2Bytes(str2Path(path)) 50 | 51 | 52 | /** Serializes a JSON value to a list of key/value pairs, where key is the JSONPath of element. */ 53 | def serialize(value: JsValue, rootJsonPath: String = root): Map[String, Array[Byte]] 54 | 55 | /** Deserialize a JSON value from the given root JSONPath. */ 56 | def deserialize(values: Map[String, Array[Byte]], rootJsonPath: String = root): JsValue 57 | } 58 | -------------------------------------------------------------------------------- /json/src/main/scala/unicorn/json/PrettyPrinter.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.json 18 | 19 | import java.lang.StringBuilder 20 | import scala.annotation.tailrec 21 | 22 | /** 23 | * A JsonPrinter that produces a nicely readable JSON source. 24 | * Adopt from spray-json. 25 | */ 26 | trait PrettyPrinter extends JsonPrinter { 27 | val Indent = 2 28 | 29 | def print(x: JsValue, sb: StringBuilder) { 30 | print(x, sb, 0) 31 | } 32 | 33 | protected def print(x: JsValue, sb: StringBuilder, indent: Int) { 34 | x match { 35 | case JsObject(x) => printObject(x, sb, indent) 36 | case JsArray(x) => printArray(x, sb, indent) 37 | case _ => printLeaf(x, sb) 38 | } 39 | } 40 | 41 | protected def printObject(members: Iterable[(String, JsValue)], sb: StringBuilder, indent: Int) { 42 | sb.append("{\n") 43 | printSeq(members, sb.append(",\n")) { m => 44 | printIndent(sb, indent + Indent) 45 | printString(m._1, sb) 46 | sb.append(": ") 47 | print(m._2, sb, indent + Indent) 48 | } 49 | sb.append('\n') 50 | printIndent(sb, indent) 51 | sb.append("}") 52 | } 53 | 54 | protected def printArray(elements: Seq[JsValue], sb: StringBuilder, indent: Int) { 55 | sb.append('[') 56 | printSeq(elements, sb.append(", "))(print(_, sb, indent)) 57 | sb.append(']') 58 | } 59 | 60 | protected def printIndent(sb: StringBuilder, indent: Int) { 61 | @tailrec def rec(indent: Int): Unit = 62 | if (indent > 0) { 63 | sb.append(' ') 64 | rec(indent - 1) 65 | } 66 | rec(indent) 67 | } 68 | } 69 | 70 | object PrettyPrinter extends PrettyPrinter 71 | -------------------------------------------------------------------------------- /json/src/test/resources/store.json: -------------------------------------------------------------------------------- 1 | { 2 | "store": { 3 | "book": [ 4 | { 5 | "category": "reference", 6 | "author": "Nigel Rees", 7 | "title": "Sayings of the Century", 8 | "price": 8.95 9 | }, 10 | { 11 | "category": "fiction", 12 | "author": "Evelyn Waugh", 13 | "title": "Sword of Honour", 14 | "price": 12.99 15 | }, 16 | { 17 | "category": "fiction", 18 | "author": "Herman Melville", 19 | "title": "Moby Dick", 20 | "isbn": "0-553-21311-3", 21 | "price": 8.99 22 | }, 23 | { 24 | "category": "fiction", 25 | "author": "J. R. R. Tolkien", 26 | "title": "The Lord of the Rings", 27 | "isbn": "0-395-19395-8", 28 | "price": 22.99 29 | } 30 | ], 31 | "bicycle": { 32 | "color": "red", 33 | "price": 19.95 34 | } 35 | } 36 | } -------------------------------------------------------------------------------- /json/src/test/scala/unicorn/json/CompactPrinterSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.json 18 | 19 | import org.specs2.mutable._ 20 | 21 | class CompactPrinterSpec extends Specification { 22 | 23 | "The CompactPrinter" should { 24 | "print JsNull to 'null'" in { 25 | CompactPrinter(JsNull) mustEqual "null" 26 | } 27 | "print JsTrue to 'true'" in { 28 | CompactPrinter(JsTrue) mustEqual "true" 29 | } 30 | "print JsFalse to 'false'" in { 31 | CompactPrinter(JsFalse) mustEqual "false" 32 | } 33 | "print JsNumber(0) to '0'" in { 34 | CompactPrinter(JsInt(0)) mustEqual "0" 35 | } 36 | "print JsNumber(1.23) to '1.23'" in { 37 | CompactPrinter(JsDouble(1.23)) mustEqual "1.23" 38 | } 39 | "print JsNumber(1.23) to '1.23'" in { 40 | CompactPrinter(JsDouble(1.23)) mustEqual "1.23" 41 | } 42 | "print JsNumber(-1E10) to '-1E10'" in { 43 | CompactPrinter(JsDouble(-1E10)) mustEqual "-1.0E10" 44 | } 45 | "print JsNumber(12.34e-10) to '12.34e-10'" in { 46 | CompactPrinter(JsDouble(12.34e-10)) mustEqual "1.234E-9" 47 | } 48 | "print JsString(\"xyz\") to \"xyz\"" in { 49 | CompactPrinter(JsString("xyz")) mustEqual "\"xyz\"" 50 | } 51 | "properly escape special chars in JsString" in { 52 | CompactPrinter(JsString("\"\\\b\f\n\r\t")) mustEqual """"\"\\\b\f\n\r\t"""" 53 | CompactPrinter(JsString("\u1000")) mustEqual "\"\u1000\"" 54 | CompactPrinter(JsString("\u0100")) mustEqual "\"\u0100\"" 55 | CompactPrinter(JsString("\u0010")) mustEqual "\"\\u0010\"" 56 | CompactPrinter(JsString("\u0001")) mustEqual "\"\\u0001\"" 57 | CompactPrinter(JsString("\u001e")) mustEqual "\"\\u001e\"" 58 | // don't escape as it isn't required by the spec 59 | CompactPrinter(JsString("\u007f")) mustEqual "\"\u007f\"" 60 | CompactPrinter(JsString("飞机因此受到损伤")) mustEqual "\"飞机因此受到损伤\"" 61 | CompactPrinter(JsString("\uD834\uDD1E")) mustEqual "\"\uD834\uDD1E\"" 62 | } 63 | "properly print a simple JsObject" in ( 64 | CompactPrinter(JsObject("key" -> JsInt(42), "key2" -> JsString("value"))) 65 | mustEqual """{"key":42,"key2":"value"}""" 66 | ) 67 | "properly print a simple JsArray" in ( 68 | CompactPrinter(JsArray(JsNull, JsDouble(1.23), JsObject("key" -> JsBoolean(true)))) 69 | mustEqual """[null,1.23,{"key":true}]""" 70 | ) 71 | "properly print a JSON padding (JSONP) if requested" in { 72 | CompactPrinter(JsTrue, Some("customCallback")) mustEqual("customCallback(true)") 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /json/src/test/scala/unicorn/json/PrettyPrinterSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.json 18 | 19 | import scala.collection.immutable.ListMap 20 | import org.specs2.mutable._ 21 | 22 | class PrettyPrinterSpec extends Specification { 23 | 24 | "The PrettyPrinter" should { 25 | "print a more complicated JsObject nicely aligned" in { 26 | val json = JsonParser { 27 | """{ 28 | | "Boolean no": false, 29 | | "Boolean yes":true, 30 | | "Unic\u00f8de" : "Long string with newline\nescape", 31 | | "key with \"quotes\"" : "string", 32 | | "key with spaces": null, 33 | | "number": -1.2323424E-5, 34 | | "simpleKey" : "some value", 35 | | "sub object" : { 36 | | "sub key": 26.5, 37 | | "a": "b", 38 | | "array": [1, 2, { "yes":1, "no":0 }, ["a", "b", null], false] 39 | | }, 40 | | "zero": 0 41 | |}""".stripMargin 42 | } 43 | 44 | PrettyPrinter(json) mustEqual { 45 | """{ 46 | | "simpleKey": "some value", 47 | | "Boolean yes": true, 48 | | "key with \"quotes\"": "string", 49 | | "sub object": { 50 | | "sub key": 26.5, 51 | | "a": "b", 52 | | "array": [1, 2, { 53 | | "no": 0, 54 | | "yes": 1 55 | | }, ["a", "b", null], false] 56 | | }, 57 | | "Boolean no": false, 58 | | "key with spaces": null, 59 | | "number": -1.2323424E-5, 60 | | "Unic\u00f8de": "Long string with newline\nescape", 61 | | "zero": 0 62 | |}""".stripMargin 63 | } 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /narwhal/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-narwhal" 2 | 3 | libraryDependencies ++= { 4 | val sparkV = "1.6.1" 5 | Seq( 6 | "org.apache.spark" %% "spark-core" % sparkV exclude("org.slf4j", "slf4j-log4j12"), 7 | "org.apache.spark" %% "spark-sql" % sparkV, 8 | "org.apache.spark" %% "spark-graphx" % sparkV 9 | ) 10 | } 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /narwhal/src/main/scala/unicorn/narwhal/Narwhal.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.narwhal 18 | 19 | import unicorn.bigtable.hbase.HBase 20 | import unicorn.unibase.{TableMeta, Unibase} 21 | import unicorn.narwhal.graph.GraphX 22 | 23 | /** Unibase specialized for HBase. */ 24 | class Narwhal(hbase: HBase) extends Unibase(hbase) { 25 | /** Returns a document table. 26 | * @param name the name of table. 27 | */ 28 | override def apply(name: String): HTable = { 29 | new HTable(hbase(name), TableMeta(hbase, name)) 30 | } 31 | 32 | /** The connection string of ZooKeeper instance used by this HBase. */ 33 | val zookeeper = hbase.connection.getConfiguration.get("hbase.zookeeper.quorum") 34 | 35 | override def graph(name: String): GraphX = { 36 | new GraphX(hbase(name), hbase(unicorn.unibase.graph.GraphDocumentVertexTable)) 37 | } 38 | 39 | /* 40 | /** Returns a document table. 41 | * @param name the name of table. 42 | */ 43 | def getTableWithIndex(name: String): HTableWithIndex = { 44 | new HTableWithIndex(hbase.getTableWithIndex(name), TableMeta(hbase, name)) 45 | } 46 | */ 47 | } 48 | 49 | object Narwhal { 50 | def apply(db: HBase): Narwhal = { 51 | new Narwhal(db) 52 | } 53 | } -------------------------------------------------------------------------------- /narwhal/src/main/scala/unicorn/narwhal/graph/GraphX.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.narwhal.graph 18 | 19 | import org.apache.hadoop.hbase.HBaseConfiguration 20 | import org.apache.hadoop.hbase.client.Result 21 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable 22 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat 23 | import org.apache.spark.SparkContext 24 | 25 | import unicorn.bigtable.hbase.HBaseTable 26 | import unicorn.json._ 27 | import unicorn.unibase.graph.{ReadOnlyGraph, GraphSerializer, GraphVertexColumnFamily, GraphOutEdgeColumnFamily} 28 | 29 | /** Unibase graph specialized for HBase with Spark GraphX supports. */ 30 | class GraphX(override val table: HBaseTable, documentVertexTable: HBaseTable) extends ReadOnlyGraph(table, documentVertexTable) { 31 | 32 | /** Returns a Spark GraphX object. 33 | * 34 | * @param sc Spark context object. 35 | * @return a Spark GraphX Graph. 36 | */ 37 | def graphx(sc: SparkContext): org.apache.spark.graphx.Graph[JsObject, (String, JsValue)] = { 38 | 39 | val conf = HBaseConfiguration.create() 40 | conf.set(TableInputFormat.INPUT_TABLE, name) 41 | conf.setInt(TableInputFormat.SCAN_CACHEDROWS, 500) 42 | conf.setBoolean(TableInputFormat.SCAN_CACHEBLOCKS, false) 43 | conf.set(TableInputFormat.SCAN_COLUMNS, s"$GraphVertexColumnFamily $GraphOutEdgeColumnFamily") 44 | 45 | val rdd = sc.newAPIHadoopRDD( 46 | conf, 47 | classOf[TableInputFormat], 48 | classOf[ImmutableBytesWritable], 49 | classOf[Result] 50 | ) 51 | 52 | val rows = rdd.mapPartitions { it => 53 | val serializer = new GraphSerializer() 54 | it.map { tuple => 55 | val row = HBaseTable.getRow(tuple._2) 56 | serializer.deserializeVertex(row) 57 | } 58 | } 59 | 60 | val vertices = rows.map { vertex => 61 | (vertex.id, vertex.properties) 62 | } 63 | 64 | val edges = rows.flatMap { vertex => 65 | vertex.edges.map { edge => 66 | org.apache.spark.graphx.Edge(edge.from, edge.to, (edge.label, edge.properties)) 67 | } 68 | } 69 | 70 | org.apache.spark.graphx.Graph(vertices, edges) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /narwhal/src/test/scala/unicorn/narwhal/FilterExpressionSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.narwhal 18 | 19 | import java.time.Instant 20 | import java.util.Date 21 | 22 | import org.specs2.mutable._ 23 | 24 | class FilterExpressionSpec extends Specification { 25 | 26 | "The FilterExpressionParser" should { 27 | "parse 'a = 0'" in { 28 | FilterExpression("a = 0") === Eq("a", IntLiteral(0)) 29 | FilterExpression("0 = a") === Eq("a", IntLiteral(0)) 30 | } 31 | "parse 'a = 0.1'" in { 32 | FilterExpression("a = 0.1") === Eq("a", DoubleLiteral(0.1)) 33 | } 34 | "parse 'a = 1e-1'" in { 35 | FilterExpression("a = 1e-1") === Eq("a", DoubleLiteral(1e-1)) 36 | } 37 | "parse 'a = abc'" in { 38 | FilterExpression("""a = "abc"""") === Eq("a", StringLiteral("abc")) 39 | } 40 | "parse 'a = 2015-09-14T04:49:13Z'" in { 41 | FilterExpression("a = 2015-09-14T04:49:13Z") === Eq("a", DateLiteral(Date.from(Instant.parse("2015-09-14T04:49:13Z")))) 42 | FilterExpression("a = 2015-09-14T04:49:13.123Z") === Eq("a", DateLiteral(Date.from(Instant.parse("2015-09-14T04:49:13.123Z")))) 43 | } 44 | "parse 'a != 0'" in { 45 | FilterExpression("a != 0") === Ne("a", IntLiteral(0)) 46 | FilterExpression("a <> 0") === Ne("a", IntLiteral(0)) 47 | FilterExpression("0 != a") === Ne("a", IntLiteral(0)) 48 | FilterExpression("0 <> a") === Ne("a", IntLiteral(0)) 49 | } 50 | "parse 'a > 0'" in { 51 | FilterExpression("a > 0") === Gt("a", IntLiteral(0)) 52 | FilterExpression("0 < a") === Gt("a", IntLiteral(0)) 53 | } 54 | "parse 'a >= 0'" in { 55 | FilterExpression("a >= 0") === Ge("a", IntLiteral(0)) 56 | FilterExpression("0 <= a") === Ge("a", IntLiteral(0)) 57 | } 58 | "parse 'a < 0'" in { 59 | FilterExpression("a < 0") === Lt("a", IntLiteral(0)) 60 | FilterExpression("0 > a") === Lt("a", IntLiteral(0)) 61 | } 62 | "parse 'a <= 0'" in { 63 | FilterExpression("a <= 0") === Le("a", IntLiteral(0)) 64 | FilterExpression("0 >= a") === Le("a", IntLiteral(0)) 65 | } 66 | "parse 'is null'" in { 67 | FilterExpression("a is null") === IsNull("a", false) 68 | FilterExpression("a is not null") === IsNull("a", true) 69 | } 70 | "parse 'a.b = 0'" in { 71 | FilterExpression("a.b = 0") === Eq("a.b", IntLiteral(0)) 72 | } 73 | "parse 'a[0].b.c[1] = 0'" in { 74 | FilterExpression("a[0].b.c[1] = 0") === Eq("a[0].b.c[1]", IntLiteral(0)) 75 | } 76 | "parse '(a = 0)'" in { 77 | FilterExpression("(a = 0)") === Eq("a", IntLiteral(0)) 78 | } 79 | "parse 'a >= 0 && a < 10'" in { 80 | FilterExpression("a >= 0 && a < 10") === And(Ge("a", IntLiteral(0)), Lt("a", IntLiteral(10))) 81 | FilterExpression("a >= 0 and a < 10") === And(Ge("a", IntLiteral(0)), Lt("a", IntLiteral(10))) 82 | FilterExpression("a >= 0 AND a < 10") === And(Ge("a", IntLiteral(0)), Lt("a", IntLiteral(10))) 83 | } 84 | "parse 'a >= 0 || a < 10'" in { 85 | FilterExpression("a >= 0 || a < 10") === Or(Ge("a", IntLiteral(0)), Lt("a", IntLiteral(10))) 86 | FilterExpression("a >= 0 or a < 10") === Or(Ge("a", IntLiteral(0)), Lt("a", IntLiteral(10))) 87 | FilterExpression("a >= 0 OR a < 10") === Or(Ge("a", IntLiteral(0)), Lt("a", IntLiteral(10))) 88 | } 89 | "parse 'a >= 0 || a < 10 && b = 5'" in { 90 | FilterExpression("a >= 0 || a < 10 && b = 5") === Or(Ge("a", IntLiteral(0)), And(Lt("a", IntLiteral(10)), Eq("b", IntLiteral(5)))) 91 | } 92 | "parse '(a >= 0 || a < 10) && b = 5'" in { 93 | FilterExpression("(a >= 0 || a < 10) && b = 5") === And(Or(Ge("a", IntLiteral(0)), Lt("a", IntLiteral(10))), Eq("b", IntLiteral(5))) 94 | } 95 | } 96 | } -------------------------------------------------------------------------------- /narwhal/src/test/scala/unicorn/narwhal/graph/GraphXSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.narwhal.graph 18 | 19 | import org.specs2.mutable._ 20 | import org.specs2.specification.BeforeAfterAll 21 | import unicorn.bigtable.hbase.HBase 22 | import unicorn.json._ 23 | import unicorn.narwhal._ 24 | 25 | /** 26 | * @author Haifeng Li 27 | */ 28 | class GraphXSpec extends Specification with BeforeAfterAll { 29 | // Make sure running examples one by one. 30 | // Otherwise, test cases on same columns will fail due to concurrency 31 | sequential 32 | 33 | val bigtable = HBase() 34 | val db = new Narwhal(bigtable) 35 | val graphName = "unicorn_unibase_graph_test" 36 | 37 | var saturn = 0L 38 | var sky = 0L 39 | var sea = 0L 40 | var jupiter = 0L 41 | var neptune = 0L 42 | var hercules = 0L 43 | var alcmene = 0L 44 | var pluto = 0L 45 | var nemean = 0L 46 | var hydra = 0L 47 | var cerberus = 0L 48 | var tartarus = 0L 49 | 50 | override def beforeAll = { 51 | db.createGraph(graphName) 52 | 53 | val gods = db.graph(graphName, db.zookeeper) 54 | 55 | saturn = gods.addVertex(json"""{"label": "titan", "name": "saturn", "age": 10000}""") 56 | sky = gods.addVertex(json"""{"label": "location", "name": "sky"}""") 57 | sea = gods.addVertex(json"""{"label": "location", "name": "sea"}""") 58 | jupiter = gods.addVertex(json"""{"label": "god", "name": "jupiter", "age": 5000}""") 59 | neptune = gods.addVertex(json"""{"label": "god", "name": "neptune", "age": 4500}""") 60 | hercules = gods.addVertex(json"""{"label": "demigod", "name": "hercules", "age": 30}""") 61 | alcmene = gods.addVertex(json"""{"label": "human", "name": "alcmene", "age": 45}""") 62 | pluto = gods.addVertex(json"""{"label": "god", "name": "pluto", "age": 4000}""") 63 | nemean = gods.addVertex(json"""{"label": "monster", "name": "nemean"}""") 64 | hydra = gods.addVertex(json"""{"label": "monster", "name": "hydra"}""") 65 | cerberus = gods.addVertex(json"""{"label": "monster", "name": "cerberus"}""") 66 | tartarus = gods.addVertex(json"""{"label": "location", "name": "tartarus"}""") 67 | 68 | gods.addEdge(jupiter, "father", saturn) 69 | gods.addEdge(jupiter, "lives", sky, json"""{"reason": "loves fresh breezes"}""") 70 | gods.addEdge(jupiter, "brother", neptune) 71 | gods.addEdge(jupiter, "brother", pluto) 72 | 73 | gods.addEdge(neptune, "lives", sea, json"""{"reason": "loves waves"}""") 74 | gods.addEdge(neptune, "brother", jupiter) 75 | gods.addEdge(neptune, "brother", pluto) 76 | 77 | gods.addEdge(hercules, "father", jupiter) 78 | gods.addEdge(hercules, "mother", alcmene) 79 | gods.addEdge(hercules, "battled", nemean, json"""{"time": 1, "place": {"latitude": 38.1, "longitude": 23.7}}""") 80 | gods.addEdge(hercules, "battled", hydra, json"""{"time": 2, "place": {"latitude": 37.7, "longitude": 23.9}}""") 81 | gods.addEdge(hercules, "battled", cerberus, json"""{"time": 12, "place": {"latitude": 39.0, "longitude": 22.0}}""") 82 | 83 | gods.addEdge(pluto, "brother", jupiter) 84 | gods.addEdge(pluto, "brother", neptune) 85 | gods.addEdge(pluto, "lives", tartarus, json"""{"reason": "no fear of death"}""") 86 | gods.addEdge(pluto, "pet", cerberus) 87 | 88 | gods.addEdge(cerberus, "lives", tartarus) 89 | } 90 | 91 | override def afterAll = { 92 | db.dropGraph(graphName) 93 | } 94 | 95 | "GraphX" should { 96 | "spark" in { 97 | import org.apache.spark._ 98 | 99 | val conf = new SparkConf().setAppName("unicorn").setMaster("local[4]") 100 | val sc = new SparkContext(conf) 101 | 102 | val graph = db.graph(graphName) 103 | val rdd = graph.graphx(sc) 104 | 105 | rdd.numVertices === 12 106 | rdd.numEdges === 17 107 | 108 | sc.stop 109 | // hacking the return value of `in` 110 | 1 === 1 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /oid/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-oid" 2 | 3 | libraryDependencies += "org.apache.zookeeper" % "zookeeper" % "3.4.8" exclude("org.slf4j", "slf4j-log4j12") -------------------------------------------------------------------------------- /oid/src/main/scala/unicorn/oid/LongIdGenerator.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.oid 18 | 19 | /** 64-bit ID generator. 20 | * 21 | * @author Haifeng Li 22 | */ 23 | trait LongIdGenerator { 24 | /** Generate a unique 64 bit ID. */ 25 | def next: Long 26 | } 27 | -------------------------------------------------------------------------------- /oid/src/main/scala/unicorn/oid/ObjectId.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.oid 18 | 19 | import java.util.{Arrays, Date, UUID} 20 | import java.nio.ByteBuffer 21 | import unicorn.util._ 22 | 23 | /** Abstract Object Id type. 24 | * 25 | * @author Haifeng Li 26 | */ 27 | class ObjectId(val id: Array[Byte]) extends Comparable[ObjectId] { 28 | override def equals(that: Any): Boolean = { 29 | that.isInstanceOf[ObjectId] && Arrays.equals(id, that.asInstanceOf[ObjectId].id) 30 | } 31 | 32 | override def compareTo(o: ObjectId): Int = { 33 | compareByteArray(id, o.id) 34 | } 35 | 36 | /** Hexadecimal string representation */ 37 | override def toString = bytes2Hex(id) 38 | 39 | /** Suppose the byte array is the UTF-8 encoding of a printable string. */ 40 | def string: String = { 41 | new String(id, utf8) 42 | } 43 | } 44 | 45 | object ObjectId { 46 | def apply(id: Array[Byte]) = new ObjectId(id) 47 | def apply(id: String) = new ObjectId(id.getBytes("UTF-8")) 48 | 49 | def apply(id: Int) = { 50 | val array = Array[Byte](4) 51 | val buffer = ByteBuffer.wrap(array) 52 | buffer.putInt(id) 53 | new ObjectId(array) 54 | } 55 | 56 | def apply(id: Long) = { 57 | val array = Array[Byte](8) 58 | val buffer = ByteBuffer.wrap(array) 59 | buffer.putLong(id) 60 | new ObjectId(array) 61 | } 62 | 63 | def apply(id: Date) = { 64 | val array = Array[Byte](8) 65 | val buffer = ByteBuffer.wrap(array) 66 | buffer.putLong(id.getTime) 67 | new ObjectId(array) 68 | } 69 | 70 | def apply(id: UUID) = { 71 | val array = Array[Byte](16) 72 | val buffer = ByteBuffer.wrap(array) 73 | buffer.putLong(id.getMostSignificantBits) 74 | buffer.putLong(id.getLeastSignificantBits) 75 | new ObjectId(array) 76 | } 77 | } -------------------------------------------------------------------------------- /oid/src/main/scala/unicorn/oid/package.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn 18 | 19 | import java.util.{Date, UUID} 20 | 21 | /** 22 | * @author Haifeng Li 23 | */ 24 | package object oid { 25 | implicit def int2ObjectId(x: Int): ObjectId = ObjectId(x) 26 | implicit def long2ObjectId(x: Long): ObjectId = ObjectId(x) 27 | implicit def string2ObjectId(x: String): ObjectId = ObjectId(x) 28 | implicit def date2ObjectId(x: Date): ObjectId = ObjectId(x) 29 | implicit def uuid2ObjectId(x: UUID): ObjectId = ObjectId(x) 30 | implicit def bytes2ObjectId(x: Array[Byte]): ObjectId = ObjectId(x) 31 | } 32 | -------------------------------------------------------------------------------- /oid/src/test/scala/unicorn/oid/BsonObjectIdSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.oid 18 | 19 | import org.specs2.mutable._ 20 | 21 | class BsonObjectIdSpec extends Specification { 22 | 23 | "BsonObjectId" should { 24 | "change over time" in { 25 | println(BsonObjectId.generate) 26 | println(BsonObjectId.generate) 27 | BsonObjectId.generate !== BsonObjectId.generate 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /project/buildinfo.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.5.0") 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += "Typesafe repository" at "http://repo.typesafe.com/typesafe/releases/" 2 | 3 | addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.0.3") 4 | 5 | addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.1.1") 6 | 7 | addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0") 8 | 9 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2") 10 | -------------------------------------------------------------------------------- /rhino.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbt stage 4 | rhino/target/universal/stage/bin/rhino -v 5 | -------------------------------------------------------------------------------- /rhino/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-rhino" 2 | 3 | mainClass in Compile := Some("unicorn.rhino.Boot") 4 | 5 | enablePlugins(JavaServerAppPackaging) 6 | 7 | maintainer := "Haifeng Li " 8 | 9 | packageName := "unicorn-rhino" 10 | 11 | packageSummary := "Unicorn REST API" 12 | 13 | packageDescription := "Unicorn REST API" 14 | 15 | executableScriptName := "rhino" 16 | 17 | mappings in Universal += { 18 | val conf = (resourceDirectory in Compile).value / "application.conf" 19 | conf -> "conf/rhino.conf" 20 | } 21 | 22 | mappings in Universal += { 23 | val conf = (resourceDirectory in Compile).value / "log4j.properties" 24 | conf -> "conf/log4j.properties" 25 | } 26 | 27 | mappings in Universal += { 28 | val conf = (resourceDirectory in Compile).value / "logback.xml" 29 | conf -> "conf/logback.xml" 30 | } 31 | 32 | bashScriptConfigLocation := Some("${app_home}/../conf/rhino.ini") 33 | 34 | bashScriptExtraDefines += """addJava "-Dconfig.file=${app_home}/../conf/rhino.conf"""" 35 | 36 | bashScriptExtraDefines += """addJava "-Dlogback.configurationFile=${app_home}/../conf/logback.xml"""" 37 | 38 | libraryDependencies ++= { 39 | val akkaV = "2.4.4" 40 | val sprayV = "1.3.3" 41 | Seq( 42 | "io.spray" %% "spray-can" % sprayV, 43 | "io.spray" %% "spray-routing" % sprayV, 44 | "io.spray" %% "spray-testkit" % sprayV % "test", 45 | "com.typesafe.akka" %% "akka-actor" % akkaV, 46 | "com.typesafe.akka" %% "akka-slf4j" % akkaV, 47 | "com.typesafe.akka" %% "akka-testkit" % akkaV % "test" 48 | ) 49 | } 50 | 51 | libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.1.7" -------------------------------------------------------------------------------- /rhino/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | akka { 2 | loglevel = DEBUG 3 | loggers = ["akka.event.slf4j.Slf4jLogger"] 4 | event-handlers = ["akka.event.slf4j.Slf4jEventHandler"] 5 | logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" 6 | } 7 | 8 | spray.can.server { 9 | port = 8080 10 | } 11 | 12 | akka.actor { 13 | creation-timeout = 200s 14 | deployment { 15 | /rhino-router { 16 | router = round-robin-pool 17 | nr-of-instances = 64 18 | } 19 | } 20 | } 21 | 22 | unicorn.rhino { 23 | bigtable = "hbase" 24 | accumulo { 25 | instance = "local-poc" 26 | zookeeper = "127.0.0.1:2181" 27 | user = "root" 28 | password = "secret" 29 | } 30 | cassandra { 31 | host = "127.0.0.1" 32 | port = 9160 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /rhino/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | rhino.root.logger=DEBUG,console,RFA 3 | rhino.log.dir=/var/log/rhino 4 | rhino.log.file=rhino.log 5 | 6 | # Define the root logger to the system property "rhino.root.logger". 7 | log4j.rootLogger=${rhino.root.logger} 8 | 9 | # Logging Threshold 10 | log4j.threshold=ALL 11 | 12 | # 13 | # Daily Rolling File Appender 14 | # 15 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 16 | log4j.appender.DRFA.File=${rhino.log.dir}/${rhino.log.file} 17 | 18 | # Rollver at midnight 19 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 20 | 21 | # 30-day backup 22 | #log4j.appender.DRFA.MaxBackupIndex=30 23 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 24 | 25 | # Pattern format: Date LogLevel LoggerName LogMessage 26 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 27 | 28 | # Rolling File Appender properties 29 | rhino.log.maxfilesize=256MB 30 | rhino.log.maxbackupindex=20 31 | 32 | # Rolling File Appender 33 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 34 | log4j.appender.RFA.File=${rhino.log.dir}/${rhino.log.file} 35 | 36 | log4j.appender.RFA.MaxFileSize=${rhino.log.maxfilesize} 37 | log4j.appender.RFA.MaxBackupIndex=${rhino.log.maxbackupindex} 38 | 39 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 40 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 41 | 42 | 43 | # 44 | # Null Appender 45 | # 46 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 47 | 48 | # 49 | # console 50 | # Add "console" to rootlogger above if you want to use this 51 | # 52 | log4j.appender.console=org.apache.log4j.ConsoleAppender 53 | log4j.appender.console.target=System.err 54 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 55 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 56 | 57 | # Custom Logging levels 58 | log4j.logger.kafka=INFO 59 | log4j.logger.org.apache.zookeeper=INFO 60 | log4j.logger.com.jayway.jsonpath=INFO 61 | -------------------------------------------------------------------------------- /rhino/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n 10 | 11 | 12 | 13 | 14 | 16 | ${LOG_HOME}/rhino.log 17 | 18 | 19 | %d{yyyy-MM-dd HH:mm:ss} - %msg%n 20 | 21 | 22 | 23 | 24 | 25 | ${LOG_HOME}/archived/rhino.%d{yyyy-MM-dd}.%i.log 26 | 27 | 29 | 10MB 30 | 31 | 32 | 33 | 34 | 35 | 37 | ${LOG_HOME}/error.log 38 | 39 | 40 | %d{yyyy-MM-dd HH:mm:ss} - %msg%n 41 | 42 | 43 | 44 | 45 | 46 | ${LOG_HOME}/archived/error.%d{yyyy-MM-dd}.%i.log 47 | 48 | 50 | 10MB 51 | 52 | 53 | 54 | 55 | 56 | 57 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /rhino/src/main/resources/web/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Unicorn WebApp 10 | 11 | 12 | org.eclipse.jetty.server.context.ManagedAttributes 13 | QoSFilter,TransparentProxy.ThreadPool,TransparentProxy.HttpClient 14 | 15 | 16 | 17 | search 18 | com.adp.unicorn.demo.TextSearchServlet 19 | 1 20 | 21 | 22 | 23 | doc 24 | /doc/* 25 | 26 | 27 | 28 | doc 29 | com.adp.unicorn.demo.DocumentServlet 30 | 1 31 | 32 | 33 | 34 | link 35 | /link/* 36 | 37 | 38 | 39 | link 40 | com.adp.unicorn.demo.LinkServlet 41 | 1 42 | 43 | 44 | 45 | search 46 | /search/* 47 | 48 | 49 | 50 | default 51 | /css/* 52 | 53 | 54 | 55 | index.html 56 | 57 | -------------------------------------------------------------------------------- /rhino/src/main/resources/web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Unicorn HTTP API 7 | 8 | 9 | 10 | 48 | 49 | 50 | 51 |

RHINO

52 | 53 |
54 |
55 | 62 | 63 | 67 | 68 | 69 | 70 | 71 | 72 | 73 |

74 |
75 |
76 | 77 | 78 | -------------------------------------------------------------------------------- /rhino/src/main/resources/web/rhino.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adplabs/unicorn/48d35ddf17358c711d38fd685ace8e753c50a16c/rhino/src/main/resources/web/rhino.jpg -------------------------------------------------------------------------------- /rhino/src/main/resources/web/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-image:url("rhino.jpg"); 3 | background-repeat:no-repeat; 4 | margin:0px 0px; 5 | padding:0px; 6 | } 7 | 8 | #logo { 9 | margin-top: 50px; 10 | font-size:6em; 11 | color:red; 12 | text-align:center; 13 | text-shadow:0 1px 0 #ccc, 14 | 0 2px 0 #c9c9c9, 15 | 0 3px 0 #bbb, 16 | 0 4px 0 #b9b9b9, 17 | 0 5px 0 #aaa, 18 | 0 6px 1px rgba(0,0,0,.1), 19 | 0 0 5px rgba(0,0,0,.1), 20 | 0 1px 3px rgba(0,0,0,.3), 21 | 0 3px 5px rgba(0,0,0,.2), 22 | 0 5px 10px rgba(0,0,0,.25), 23 | 0 10px 10px rgba(0,0,0,.2), 24 | 0 20px 20px rgba(0,0,0,.15); 25 | } 26 | 27 | #content { 28 | text-align:center; 29 | } 30 | 31 | #bottom-right { 32 | position:absolute; 33 | bottom:0; 34 | right:0; 35 | width:100%; 36 | text-align:right; 37 | } 38 | 39 | .divider{ 40 | width:20px; 41 | height:auto; 42 | display:inline-block; 43 | } 44 | 45 | 46 | /* ....................................................................... */ 47 | /* SVG elements */ 48 | 49 | div#network svg { 50 | background-color: white; 51 | cursor: move; 52 | } 53 | 54 | line.link { 55 | stroke: #B2D9D8; 56 | } 57 | 58 | circle { 59 | cursor: crosshair; 60 | fill: #EBC763; 61 | } 62 | 63 | circle.level1 { 64 | stroke: #f00; 65 | } 66 | 67 | circle.level2 { 68 | fill-opacity: 0.8; 69 | stroke-opacity: 0.8; 70 | stroke: #777; 71 | } 72 | 73 | circle.level3 { 74 | fill-opacity: 0.5; 75 | stroke-opacity: 0.5; 76 | } 77 | 78 | circle.sibling { 79 | /*fill: blue;*/ 80 | fill: #455EE8; 81 | } 82 | 83 | circle.main { 84 | /*fill: red;*/ 85 | fill: #732A9A; 86 | fill-opacity: 1.0; 87 | } 88 | 89 | /* ....................................................................... */ 90 | /* Graph labels */ 91 | 92 | g.gLabel { 93 | font: 10px sans-serif; 94 | font-weight: normal; 95 | /*visibility: hidden;*/ 96 | } 97 | 98 | g.on { 99 | visibility: visible; 100 | } 101 | 102 | g.zoomed { 103 | font-family: Verdana, Arial, Helvetica, sans-serif; 104 | font-size: 10px; 105 | font-weight: normal; 106 | text-align: center; 107 | color: #000; 108 | border: none; 109 | z-index: 0; 110 | } 111 | 112 | text { 113 | font: 10px sans-serif; 114 | font-weight: normal; 115 | stroke-opacity: 1.0; 116 | } 117 | 118 | text.nlabel { 119 | /*stroke: #000000;*/ 120 | } 121 | 122 | text.nshadow { 123 | stroke: #fff; 124 | stroke-width: 3px; 125 | /*stroke-opacity: 0.5;*/ 126 | /*visibility: hidden;*/ 127 | } 128 | 129 | text.main { 130 | font: 12px sans-serif; 131 | font-weight: bold; 132 | } 133 | 134 | -------------------------------------------------------------------------------- /rhino/src/main/scala/unicorn/rhino/Boot.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.rhino 18 | 19 | import scala.concurrent.duration._ 20 | import akka.actor.{ActorSystem, Props} 21 | import akka.routing.FromConfig 22 | import akka.io.IO 23 | import spray.can.Http 24 | import akka.pattern.ask 25 | import akka.util.Timeout 26 | import com.typesafe.config.ConfigFactory 27 | 28 | /** 29 | * @author Haifeng Li 30 | */ 31 | object Boot extends App { 32 | 33 | // we need an ActorSystem to host our application in 34 | implicit val actorSystem = ActorSystem("unicorn-rhino") 35 | 36 | // create a pool of RhinoActors 37 | val service = actorSystem.actorOf(FromConfig.props(Props[RhinoActor]), "rhino-router") 38 | 39 | val conf = ConfigFactory.load() 40 | val serverPort = conf.getInt("spray.can.server.port") 41 | 42 | val ip = if (System.getProperty("loopback.only") != null) "127.0.0.1" else "0.0.0.0" 43 | IO(Http) ! Http.Bind(service, interface = ip, port = serverPort) 44 | } 45 | -------------------------------------------------------------------------------- /rhino/src/main/twirl/doc.scala.html: -------------------------------------------------------------------------------- 1 | @(id: String, doc: String, links: Seq[String]) 2 | 3 | 4 | 5 | Unicorn Full Text Search 6 | 7 | 37 | 38 | 39 |
40 |
41 | 42 | 43 |
44 |
45 |
@doc
46 | 47 | 52 | 53 |
54 | 55 |
56 | 57 | 58 | -------------------------------------------------------------------------------- /rhino/src/main/twirl/search.scala.html: -------------------------------------------------------------------------------- 1 | @(query: String, docs: Seq[(String, String)]) 2 | 3 | 4 | 5 | Unicorn Full Text Search 6 | 7 | 8 | 9 |
10 |
11 | 12 | 13 |
14 |
15 |
16 | @for(doc <- docs) { 17 |
@(doc._2)
18 | } 19 |
20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /rhino/src/templates/debian/systemv: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ### BEGIN INIT INFO 3 | # Provides: Unicorn Rhino 4 | # Required-Start: $local_fs $remote_fs $network 5 | # Required-Stop: $local_fs $remote_fs $network 6 | # Should-Start: $named 7 | # Should-Stop: $named 8 | # Default-Start: 2 3 4 5 9 | # Default-Stop: 0 1 6 10 | # Short-Description: Unicorn REST API 11 | # Description: Control the rhino daemon. 12 | ### END INIT INFO 13 | 14 | set -e 15 | 16 | if [ -z "${JAVA_HOME}" ]; then 17 | JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:/bin/java::") 18 | fi 19 | JAVA_OPTS="-Xms1024m -Xmx2048m" 20 | 21 | APP=${{app_name}} 22 | 23 | PID=/var/run/${APP}.pid 24 | OUT_LOG=/var/log/${APP}/${APP}_out.log 25 | ERR_LOG=/var/log/${APP}/${APP}_err.log 26 | 27 | DAEMON_USER=${{daemon_user}} 28 | 29 | APP_HOME=/opt/${APP} 30 | APP_CLASSPATH=${{app_classpath}} 31 | APP_CLASS=${{app_main_class}} 32 | APP_LOG_CONFIG=${APP_HOME}/conf/logback.xml 33 | APP_CONFIG=${APP_HOME}/conf/application.conf 34 | 35 | if [ -n "$APP_LOG_CONFIG}" ]; then 36 | JAVA_OPTS="-Dlogback.configurationFile=${APP_LOG_CONFIG} ${JAVA_OPTS}" 37 | fi 38 | 39 | DAEMON_ARGS="-home ${JAVA_HOME} -Dconfig.file=${APP_CONFIG} ${JAVA_OPTS} -pidfile ${PID}" 40 | DAEMON_ARGS="$DAEMON_ARGS -user ${DAEMON_USER} -outfile ${OUT_LOG} -errfile ${ERR_LOG}" 41 | DAEMON_ARGS="$DAEMON_ARGS -cp ${APP_CLASSPATH} ${APP_CLASS}" 42 | 43 | . /lib/lsb/init-functions 44 | 45 | case "$1" in 46 | start) 47 | log_daemon_msg "Starting ${APP}" 48 | cd ${APP_HOME} && jsvc ${DAEMON_ARGS} 49 | log_end_msg 0 50 | ;; 51 | stop) 52 | log_daemon_msg "Stopping ${APP}" 53 | cd ${APP_HOME} && jsvc -stop ${DAEMON_ARGS} 54 | log_end_msg 0 55 | ;; 56 | *) 57 | log_success_msg "Usage: {start|stop}" 58 | echo "Usage: {start|stop}" 59 | exit 1 60 | ;; 61 | esac 62 | 63 | exit 0 -------------------------------------------------------------------------------- /rhino/src/templates/rpm/systemd: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Unicorn REST API 3 | Documentation=https://github.com/haifengl/unicorn 4 | After=network.target remote-fs.target nss-lookup.target nginx.service 5 | Wants=nginx.service 6 | 7 | [Service] 8 | Type=simple 9 | User=unicorn 10 | Group=unicorn 11 | WorkingDirectory=/opt/${{app_name}} 12 | UMask=0022 13 | ExecStartPre=-/usr/bin/rm -f /opy/${{app_name}}/${{app_name}}.pid 14 | ExecStart=/opt/${{app_name}}/bin/${{exec}} 15 | ExecStop=/bin/kill -s QUIT $MAINPID 16 | PrivateTmp=true 17 | Restart=on-failure 18 | RestartSec=2 19 | 20 | [Install] 21 | WantedBy=multi-user.target 22 | -------------------------------------------------------------------------------- /rhino/src/test/scala/unicorn/rhino/RhinoSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.rhino 18 | 19 | import org.specs2.mutable.{BeforeAfter, Specification} 20 | import spray.testkit.Specs2RouteTest 21 | import spray.http.{HttpEntity, HttpRequest} 22 | import spray.http.HttpMethods._ 23 | import spray.http.StatusCodes._ 24 | 25 | import unicorn.json._ 26 | import unicorn.bigtable.hbase.HBase 27 | import unicorn.unibase.Unibase 28 | 29 | class RhinoSpec extends Specification with Specs2RouteTest with BeforeAfter with Rhino { 30 | // makes test execution sequential and prevents conflicts that may occur when the data is 31 | // changed simultaneously in the database 32 | sequential 33 | 34 | val table = "unicorn_rhino_test" 35 | val db = new Unibase(HBase()) 36 | 37 | var key: JsValue = JsUndefined 38 | 39 | override def before = { 40 | db.createTable(table) 41 | val bucket = db(table) 42 | key = bucket.upsert(json) 43 | } 44 | 45 | override def after= { 46 | db.dropTable(table) 47 | } 48 | 49 | // connects the DSL to the test ActorSystem 50 | override def actorRefFactory = system 51 | 52 | val json = JsonParser( 53 | """ 54 | |{ 55 | | "owner": "Rich", 56 | | "phone": "123-456-7890", 57 | | "address": { 58 | | "street": "1 ADP Blvd.", 59 | | "city": "Roseland", 60 | | "state": "NJ" 61 | | }, 62 | | "store": { 63 | | "book": [ 64 | | { 65 | | "category": "reference", 66 | | "author": "Nigel Rees", 67 | | "title": "Sayings of the Century", 68 | | "price": 8.95 69 | | }, 70 | | { 71 | | "category": "fiction", 72 | | "author": "Evelyn Waugh", 73 | | "title": "Sword of Honour", 74 | | "price": 12.99 75 | | }, 76 | | { 77 | | "category": "fiction", 78 | | "author": "Herman Melville", 79 | | "title": "Moby Dick", 80 | | "isbn": "0-553-21311-3", 81 | | "price": 8.99 82 | | }, 83 | | { 84 | | "category": "fiction", 85 | | "author": "J. R. R. Tolkien", 86 | | "title": "The Lord of the Rings", 87 | | "isbn": "0-395-19395-8", 88 | | "price": 22.99 89 | | } 90 | | ], 91 | | "bicycle": { 92 | | "color": "red", 93 | | "price": 19.95 94 | | } 95 | | } 96 | |} 97 | """.stripMargin).asInstanceOf[JsObject] 98 | 99 | val update = JsonParser( 100 | """ 101 | | { 102 | | "$set": { 103 | | "owner": "Poor", 104 | | "gender": "M", 105 | | "store.book.0.price": 9.95 106 | | } 107 | | } 108 | """.stripMargin).asInstanceOf[JsObject] 109 | 110 | "Rhino" should { 111 | "post" in { 112 | HttpRequest(POST, s"/$table", entity = HttpEntity(json.toString)) ~> apiRoute ~> check { 113 | response.status === OK 114 | } 115 | } 116 | 117 | "put" in { 118 | HttpRequest(PUT, s"/$table", entity = HttpEntity(json.toString)) ~> apiRoute ~> check { 119 | json("_id") = java.util.UUID.randomUUID 120 | response.status === OK 121 | } 122 | } 123 | 124 | "patch" in { 125 | HttpRequest(PATCH, s"/$table", entity = HttpEntity(update.toString)) ~> apiRoute ~> check { 126 | response.status === OK 127 | } 128 | } 129 | 130 | "delete" in { 131 | Delete(s"/$table/$key") ~> apiRoute ~> check { 132 | response.status === OK 133 | } 134 | } 135 | 136 | "get" in { 137 | Get(s"/$table/$key") ~> apiRoute ~> check { 138 | JsonParser(responseAs[String]) === json 139 | } 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /rhino/src/universal/conf/rhino.ini: -------------------------------------------------------------------------------- 1 | # Setting -X directly (-J is stripped) 2 | # -J-X 3 | -J-Xmx4096M 4 | -J-Xms1024M 5 | 6 | # Add additional jvm parameters 7 | -J-server 8 | 9 | # Performance optimization 10 | -J-XX:+AggressiveOpts 11 | 12 | # G1 garbage collector 13 | -J-XX:+UseG1GC 14 | 15 | # Optimize string duplication, which happens a lot when parsing a data file 16 | -J-XX:+UseStringDeduplication 17 | 18 | # Turn on JVM debugging, open at the given port 19 | # -jvm-debug 20 | 21 | # Don't run the java version check 22 | # -no-version-check 23 | 24 | # enabling debug and sending -d as app argument 25 | # the '--' prevents app-parameter swallowing when 26 | # using a reserved parameter. See #184 27 | # -d -- -d -------------------------------------------------------------------------------- /rocksdb/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-rocksdb" 2 | 3 | libraryDependencies += "org.rocksdb" % "rocksdbjni" % "4.5.1" 4 | 5 | -------------------------------------------------------------------------------- /rocksdb/src/main/scala/unicorn/bigtable/rocksdb/RocksDB.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.bigtable.rocksdb 18 | 19 | import java.io.File 20 | import java.util.Properties 21 | import org.rocksdb.{ColumnFamilyDescriptor, Options} 22 | import unicorn.bigtable.Database 23 | 24 | /** RocksDB abstraction. RocksDB is an embeddable persistent key-value store 25 | * for fast storage. There is no concept of tables in RocksDB. In fact, a 26 | * RocksDB is like a table in HBase. In this class, we create a higher level 27 | * concept of database that contains multiple RocksDB databases in a directory. 28 | * Each RocksDB is actually a subdirectory, which is encapsulated in RocksTable. 29 | * 30 | * @author Haifeng Li 31 | */ 32 | class RocksDB(val path: String) extends Database[RocksTable] { 33 | val dir = new File(path) 34 | require(dir.exists, s"Directory $path doesn't exist") 35 | 36 | override def close: Unit = () 37 | 38 | override def apply(name: String): RocksTable = { 39 | new RocksTable(s"$path/$name") 40 | } 41 | 42 | override def tables: Set[String] = { 43 | dir.listFiles.filter(_.isDirectory).map(_.getName).toSet 44 | } 45 | 46 | /** The parameter props is ignored. */ 47 | override def createTable(name: String, props: Properties, families: String*): RocksTable = { 48 | val options = new Options 49 | options.setCreateIfMissing(true) 50 | options.setErrorIfExists(true) 51 | options.setCreateMissingColumnFamilies(false) 52 | 53 | val rocksdb = org.rocksdb.RocksDB.open(options, s"$path/$name") 54 | families.foreach { family => 55 | val descriptor = new ColumnFamilyDescriptor(family.getBytes) 56 | rocksdb.createColumnFamily(descriptor) 57 | } 58 | 59 | rocksdb.close 60 | new RocksTable(s"$path/$name") 61 | } 62 | 63 | override def dropTable(name: String): Unit = { 64 | new File(s"$path/$name").delete 65 | } 66 | 67 | override def truncateTable(name: String): Unit = { 68 | throw new UnsupportedOperationException("RocksDB doesn't support truncateTable") 69 | } 70 | 71 | override def tableExists(name: String): Boolean = { 72 | val options = new Options().setCreateIfMissing(false) 73 | try { 74 | org.rocksdb.RocksDB.open(options, s"$path/$name") 75 | true 76 | } catch { 77 | case _: Exception => false 78 | } 79 | } 80 | 81 | override def compactTable(name: String): Unit = { 82 | org.rocksdb.RocksDB.open(s"$path/$name").compactRange 83 | } 84 | } 85 | 86 | object RocksDB { 87 | 88 | /** Creates a RocksDB database. 89 | * 90 | * @param path path to database. 91 | */ 92 | def create(path: String): RocksDB = { 93 | val dir = new java.io.File(path) 94 | require(!dir.exists, s"Directory $path exists") 95 | 96 | dir.mkdir 97 | new RocksDB(path) 98 | } 99 | 100 | def apply(path: String): RocksDB = { 101 | new RocksDB(path) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /rocksdb/src/test/scala/unicorn/bigtable/rocksdb/RocksDBSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.bigtable.rocksdb 18 | 19 | import org.specs2.mutable._ 20 | import org.specs2.specification.BeforeAfterAll 21 | import unicorn.bigtable._ 22 | import unicorn.util._ 23 | 24 | /** 25 | * @author Haifeng Li 26 | */ 27 | class RocksDBSpec extends Specification with BeforeAfterAll { 28 | // Make sure running examples one by one. 29 | // Otherwise, test cases on same columns will fail due to concurrency 30 | sequential 31 | val db = RocksDB.create("/tmp/unicorn-rocksdb") 32 | val tableName = "unicorn_test" 33 | var table: RocksTable = null 34 | 35 | override def beforeAll = { 36 | table = db.createTable(tableName, "cf1", "cf2") 37 | } 38 | 39 | override def afterAll = { 40 | if (table != null) table.close 41 | new java.io.File("/tmp/unicorn-rocksdb").delete 42 | } 43 | 44 | "RocksDB" should { 45 | "get the put" in { 46 | table.put("row1", "cf1", "c1", "v1", 0L) 47 | new String(table("row1", "cf1", "c1").get, utf8) === "v1" 48 | table.delete("row1", "cf1", "c1") 49 | table("row1", "cf1", "c1".getBytes(utf8)) === None 50 | } 51 | 52 | "get the family" in { 53 | table.put("row1", "cf1", Column("c1", "v1"), Column("c2", "v2")) 54 | val columns = table.get("row1", "cf1") 55 | columns.size === 2 56 | new String(columns(0).value, utf8) === "v1" 57 | new String(columns(1).value, utf8) === "v2" 58 | 59 | table.delete("row1", "cf1") 60 | val empty = table.get("row1", "cf1") 61 | empty.size === 0 62 | } 63 | 64 | "get empty family" in { 65 | val columns = table.get("row1", "cf1") 66 | columns.size === 0 67 | } 68 | 69 | "get nonexistent family" in { 70 | table.get("row1", "cf5") must throwA[java.util.NoSuchElementException] 71 | } 72 | 73 | "get the row" in { 74 | table.put("row1", Seq( 75 | ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))), 76 | ColumnFamily("cf2", Seq(Column("c3", "v3")))) 77 | ) 78 | val families = table.get("row1") 79 | families.size === 2 80 | families(0).columns.size === 2 81 | families(1).columns.size === 1 82 | families(0).family === "cf1" 83 | families(1).family === "cf2" 84 | 85 | new String(families(0).columns(0).value, utf8) === "v1" 86 | new String(families(0).columns(1).value, utf8) === "v2" 87 | new String(families(1).columns(0).value, utf8) === "v3" 88 | 89 | table.delete("row1", "cf1") 90 | val cf1 = table.get("row1", "cf1") 91 | cf1.size === 0 92 | 93 | table.get("row1").size === 1 94 | val cf2 = table.get("row1", "cf2") 95 | cf2.size === 1 96 | 97 | table.delete("row1") 98 | table.get("row1").size === 0 99 | } 100 | 101 | "get nonexistent row" in { 102 | val families = table.get("row5") 103 | families.size === 0 104 | } 105 | 106 | "get multiple rows" in { 107 | val row1 = Row("row1", 108 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))), 109 | ColumnFamily("cf2", Seq(Column("c3", "v3"))))) 110 | 111 | val row2 = Row("row2".getBytes(utf8), 112 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))))) 113 | 114 | table.putBatch(row1, row2) 115 | 116 | val keys = Seq("row1", "row2") 117 | val rows = table.getBatch(keys) 118 | rows.size === 2 119 | rows(0).families.size === 2 120 | rows(1).families.size === 1 121 | 122 | table.deleteBatch(keys) 123 | table.getBatch(keys).size === 0 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /search/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-search" 2 | 3 | libraryDependencies += "com.github.haifengl" % "smile-nlp" % "1.0.3" 4 | 5 | -------------------------------------------------------------------------------- /search/src/main/scala/unicorn/search/TextIndex.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.search 18 | 19 | import smile.nlp.stemmer.Stemmer 20 | 21 | /** 22 | * Text inverted index for full text search and relevance ranking. 23 | * 24 | * @author Haifeng Li 25 | */ 26 | trait TextIndex { 27 | val TextIndexFamily = "text_index" 28 | val TermIndexSuffix = " idx" 29 | val TermTitleIndexSuffix = " tidx" 30 | val TermAnchorIndexSuffix = " aidx" 31 | val TermPositionSuffix = " pos" 32 | val DocFieldSeparator = "##" 33 | 34 | val CorpusMetaKey = "unicorn.text.corpus.meta" 35 | val TextBodyLengthKey = "unicorn.text.corpus.text.size" 36 | val TextTitleLengthKey = "unicorn.text.corpus.text.title.size" 37 | val TextAnchorLengthKey = "unicorn.text.corpus.text.anchor.size" 38 | val PageRankKey = "unicorn.text.corpus.text.page_rank" 39 | val BrowseRankKey = "unicorn.text.corpus.text.browse_rank" 40 | 41 | /** 42 | * Optional stemmer. 43 | */ 44 | var stemmer: Option[Stemmer] = None 45 | } 46 | -------------------------------------------------------------------------------- /search/src/main/scala/unicorn/search/TextIndexBuilder.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.search 18 | 19 | import unicorn._, json._ 20 | import unicorn.core.Document 21 | import unicorn.store.Dataset 22 | import smile.nlp.tokenizer.SimpleTokenizer 23 | import smile.nlp.tokenizer.SimpleSentenceSplitter 24 | import smile.nlp.dictionary.EnglishStopWords 25 | import smile.nlp.dictionary.EnglishPunctuations 26 | 27 | /** 28 | * @author Haifeng Li 29 | */ 30 | class TextIndexBuilder(storage: Dataset) extends TextIndex { 31 | 32 | val textLength = new Document(TextBodyLengthKey, TextIndexFamily) 33 | val titleLength = new Document(TextTitleLengthKey, TextIndexFamily) 34 | val anchorLength = new Document(TextAnchorLengthKey, TextIndexFamily) 35 | 36 | /** 37 | * Sentence splitter. 38 | */ 39 | var sentenceSpliter = SimpleSentenceSplitter.getInstance 40 | 41 | /** 42 | * Tokenizer on sentences 43 | */ 44 | var tokenizer = new SimpleTokenizer 45 | 46 | /** 47 | * Dictionary of stop words. 48 | */ 49 | var stopWords = EnglishStopWords.DEFAULT 50 | 51 | /** 52 | * Punctuation. 53 | */ 54 | var punctuations = EnglishPunctuations.getInstance 55 | 56 | /** 57 | * Process each token (after filtering stop words, numbers, and optional stemming). 58 | */ 59 | def foreach[U](text: String)(f: ((String, Int)) => U): Unit = { 60 | var pos = 0 61 | 62 | sentenceSpliter.split(text).foreach { sentence => 63 | tokenizer.split(sentence).foreach { token => 64 | pos += 1 65 | val lower = token.toLowerCase 66 | if (!(punctuations.contains(lower) || 67 | stopWords.contains(lower) || 68 | lower.length == 1 || 69 | lower.matches("[0-9\\.\\-\\+\\|\\(\\)]+"))) { 70 | val word = stemmer match { 71 | case Some(stemmer) => stemmer.stem(lower) 72 | case None => lower 73 | } 74 | 75 | f(word, pos) 76 | } 77 | } 78 | 79 | pos += 1 80 | } 81 | } 82 | 83 | /** 84 | * Add a text into index. 85 | * @param doc The id of document that owns the text. 86 | * @param field The filed name of text in the document. 87 | * @param text The text body. 88 | */ 89 | private def add(doc: String, field: String, text: String, sizeDoc: Document, indexKeySuffix: String) { 90 | val termFreq = scala.collection.mutable.Map[String, Int]().withDefaultValue(0) 91 | //val termPos = scala.collection.mutable.Map[String, Array[Int]]().withDefaultValue(Array[Int]()) 92 | 93 | var size = 0 94 | foreach(text) { case (word, pos) => 95 | size += 1 96 | termFreq(word) += 1 97 | //termPos(word) :+ pos 98 | } 99 | 100 | val key = doc + DocFieldSeparator + field.replace(Document.FieldSeparator, DocFieldSeparator) 101 | 102 | sizeDoc(key) = JsInt(size) 103 | 104 | termFreq.foreach { case (word, freq) => 105 | //TODO storage.put(word + indexKeySuffix, TextIndexFamily, key, freq) 106 | } 107 | 108 | /* 109 | termPos.foreach { case (word, pos) => 110 | storage.put(word + TermPositionSuffix, TextIndexFamily, key, JsonBlobValue(pos).bytes) 111 | } 112 | */ 113 | 114 | // termFreq and termPos updates will also be commit here. 115 | sizeDoc into storage 116 | } 117 | 118 | /** 119 | * Add a text into index. 120 | * @param doc The id of document that owns the text. 121 | * @param field The filed name of text in the document. 122 | * @param text The text body. 123 | */ 124 | def add(doc: String, field: String, text: String) { 125 | add(doc, field, text, textLength, TermIndexSuffix) 126 | } 127 | 128 | /** 129 | * Add a title into index. 130 | * @param doc The id of document that owns the text. 131 | * @param field The filed name of text in the document. 132 | * @param title The title. 133 | */ 134 | def addTitle(doc: String, field: String, title: String) { 135 | add(doc, field, title, titleLength, TermTitleIndexSuffix) 136 | } 137 | 138 | /** 139 | * Add an anchor text into index. 140 | * @param doc The id of document that owns the text. 141 | * @param field The filed name of text in the document. 142 | * @param anchor The anchor text. 143 | */ 144 | def addAnchor(doc: String, field: String, anchor: String) { 145 | add(doc, field, anchor, anchorLength, TermAnchorIndexSuffix) 146 | } 147 | } 148 | 149 | object TextIndexBuilder { 150 | def apply(storage: Dataset): TextIndexBuilder = { 151 | new TextIndexBuilder(storage) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /search/src/main/scala/unicorn/search/TextSearch.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.search 18 | 19 | import unicorn._, json._ 20 | import unicorn.core.Document 21 | import unicorn.store.Dataset 22 | import smile.nlp.relevance.BM25 23 | 24 | /** 25 | * @author Haifeng Li 26 | */ 27 | class TextSearch(storage: Dataset, numTexts: Long) extends TextIndex { 28 | val pagerank = new Document("unicorn.text.corpus.text.page_rank", "text_index").from(storage) 29 | val defaultPageRank = math.log(0.85 / numTexts) 30 | 31 | val textLength = new Document(TextBodyLengthKey, TextIndexFamily).from(storage) 32 | val titleLength = new Document(TextTitleLengthKey, TextIndexFamily).from(storage) 33 | val anchorLength = new Document(TextAnchorLengthKey, TextIndexFamily).from(storage) 34 | 35 | /** 36 | * Relevance ranking algorithm. 37 | */ 38 | val ranker = new BM25 39 | 40 | /** 41 | * Search terms in corpus. The results are sorted by relevance. 42 | */ 43 | def search(terms: String*): Array[((Document, String), Double)] = { 44 | val rank = scala.collection.mutable.Map[(Document, String), Double]().withDefaultValue(0.0) 45 | terms.foreach { term => search(term, rank) } 46 | rank.toArray.sortBy(_._2).reverse 47 | } 48 | 49 | def search(term: String, rank: scala.collection.mutable.Map[(Document, String), Double]) { 50 | val lower = term.toLowerCase 51 | val word = stemmer match { 52 | case Some(stemmer) => stemmer.stem(lower) 53 | case None => lower 54 | } 55 | 56 | val key = word + TermIndexSuffix 57 | val invertedText = new Document(word + TermIndexSuffix, TextIndexFamily).from(storage).loadAttributes 58 | if (invertedText.attributes.size == 0) return 59 | 60 | val invertedTitle = new Document(word + TermTitleIndexSuffix, TextIndexFamily).from(storage).loadAttributes 61 | val invertedAnchor = new Document(word + TermAnchorIndexSuffix, TextIndexFamily).from(storage).loadAttributes 62 | 63 | val docs = (invertedText.map { case (docField, value) => docField }).toSeq 64 | textLength.select(docs: _*) 65 | titleLength.select(docs: _*) 66 | anchorLength.select(docs: _*) 67 | 68 | var avgTextLength = 0.0 69 | var avgTitleLength = 0.0 70 | var avgAnchorLength = 0.0 71 | 72 | var numMatchedTexts = 0 73 | var numMatchedTitles = 0 74 | var numMatchedAnchors = 0 75 | 76 | invertedText.foreach { case (docField, value) => 77 | val n1: Int = textLength(docField) 78 | if (n1 > 0) { 79 | numMatchedTexts += 1 80 | avgTextLength += n1 81 | } 82 | 83 | val n2: Int = titleLength(docField) 84 | if (n2 > 0) { 85 | numMatchedTitles += 1 86 | avgTitleLength += n2 87 | } 88 | 89 | val n3: Int = anchorLength(docField) 90 | if (n3 > 0) { 91 | numMatchedAnchors += 1 92 | avgAnchorLength += n3 93 | } 94 | } 95 | 96 | if (numMatchedTexts > 0) avgTextLength /= numMatchedTexts 97 | if (numMatchedTitles > 0) avgTitleLength /= numMatchedTitles 98 | if (numMatchedAnchors > 0) avgAnchorLength /= numMatchedAnchors 99 | 100 | pagerank.select(invertedText.map { case (docField, _) => docField }.toArray : _*) 101 | 102 | invertedText.foreach { case (docField, value) => 103 | val id = docField.split(DocFieldSeparator, 2) 104 | 105 | if (id.length == 2) { 106 | val doc = Document(id(0)).from(storage) 107 | val field = id(1).replace(DocFieldSeparator, Document.FieldSeparator) 108 | 109 | val termFreq: Int = value 110 | val titleTermFreq: Int = invertedTitle(docField) 111 | val anchorTermFreq: Int = invertedAnchor(docField) 112 | 113 | val bm25 = ranker.score(termFreq, textLength(docField), avgTextLength, 114 | titleTermFreq, titleLength(docField), avgTitleLength, 115 | anchorTermFreq, anchorLength(docField), avgAnchorLength, 116 | numTexts, invertedText.size) 117 | 118 | val pr = pagerank(docField) match { 119 | case JsDouble(value) => math.log(value) 120 | case _ => defaultPageRank 121 | } 122 | 123 | rank((doc, field)) += (bm25 + pr) 124 | } 125 | } 126 | } 127 | } 128 | 129 | object TextSearch { 130 | def apply(storage: Dataset, numTexts: Long): TextSearch = { 131 | new TextSearch(storage, numTexts) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /shell/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-shell" 2 | 3 | mainClass in Compile := Some("unicorn.shell.Main") 4 | 5 | // native packager 6 | enablePlugins(JavaAppPackaging) 7 | 8 | maintainer := "Haifeng Li " 9 | 10 | packageName := "unicorn" 11 | 12 | packageSummary := "Unicorn" 13 | 14 | packageDescription := "Unicorn" 15 | 16 | executableScriptName := "unicorn" 17 | 18 | bashScriptConfigLocation := Some("${app_home}/../conf/unicorn.ini") 19 | 20 | bashScriptExtraDefines += """addJava "-Dsmile.home=${app_home}"""" 21 | 22 | bashScriptExtraDefines += """addJava "-Dscala.repl.autoruncode=${app_home}/init.scala"""" 23 | 24 | bashScriptExtraDefines += """addJava "-Dconfig.file=${app_home}/../conf/unicorn.conf"""" 25 | 26 | // native packager Docker plugin 27 | enablePlugins(DockerPlugin) 28 | 29 | dockerBaseImage := "dajobe/hbase" 30 | 31 | packageName in Docker := "haifengl/unicorn" 32 | 33 | dockerUpdateLatest := true 34 | 35 | // BuildInfo 36 | enablePlugins(BuildInfoPlugin) 37 | 38 | buildInfoKeys := Seq[BuildInfoKey](name, version, scalaVersion, sbtVersion) 39 | 40 | buildInfoPackage := "unicorn.shell" 41 | 42 | buildInfoOptions += BuildInfoOption.BuildTime 43 | 44 | libraryDependencies += "org.scala-lang" % "scala-compiler" % "2.11.7" 45 | 46 | libraryDependencies += "org.slf4j" % "slf4j-simple" % "1.7.18" 47 | -------------------------------------------------------------------------------- /shell/src/main/scala/unicorn/shell/Main.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.shell 18 | 19 | import scala.tools.nsc._, GenericRunnerCommand._, io.File 20 | 21 | /** An object that runs Smile script or interactive shell. 22 | * Based on Scala MainGenericRunner. 23 | * 24 | * @author Haifeng Li 25 | */ 26 | object Main extends App { 27 | 28 | // This is actually the main function 29 | if (!process(args)) sys.exit(1) 30 | 31 | def errorFn(str: String, e: Option[Throwable] = None, isFailure: Boolean = true): Boolean = { 32 | if (str.nonEmpty) Console.err println str 33 | e foreach (_.printStackTrace()) 34 | !isFailure 35 | } 36 | 37 | def process(args: Array[String]): Boolean = { 38 | val command = new GenericRunnerCommand(args.toList, (x: String) => errorFn(x)) 39 | import command.{ settings, howToRun, thingToRun, shortUsageMsg, shouldStopWithInfo } 40 | settings.usejavacp.value = true 41 | settings.deprecation.value = true 42 | def sampleCompiler = new Global(settings) // def so it's not created unless needed 43 | 44 | def run(): Boolean = { 45 | def isE = !settings.execute.isDefault 46 | def dashe = settings.execute.value 47 | 48 | def isI = !settings.loadfiles.isDefault 49 | def dashi = settings.loadfiles.value 50 | 51 | // Deadlocks on startup under -i unless we disable async. 52 | if (isI) 53 | settings.Yreplsync.value = true 54 | 55 | def combinedCode = { 56 | val files = if (isI) dashi map (file => File(file).slurp()) else Nil 57 | val str = if (isE) List(dashe) else Nil 58 | 59 | files ++ str mkString "\n\n" 60 | } 61 | 62 | def runTarget(): Either[Throwable, Boolean] = howToRun match { 63 | case AsObject => 64 | ObjectRunner.runAndCatch(settings.classpathURLs, thingToRun, command.arguments) 65 | case AsScript => 66 | ScriptRunner.runScriptAndCatch(settings, thingToRun, command.arguments) 67 | case Error => 68 | Right(false) 69 | case _ => 70 | Right(new Shell process settings) 71 | } 72 | 73 | /** If -e and -i were both given, we want to execute the -e code after the 74 | * -i files have been included, so they are read into strings and prepended to 75 | * the code given in -e. The -i option is documented to only make sense 76 | * interactively so this is a pretty reasonable assumption. 77 | * 78 | * This all needs a rewrite though. 79 | */ 80 | if (isE) { 81 | ScriptRunner.runCommand(settings, combinedCode, thingToRun +: command.arguments) 82 | } 83 | else runTarget() match { 84 | case Left(ex) => errorFn("", Some(ex)) // there must be a useful message of hope to offer here 85 | case Right(b) => b 86 | } 87 | } 88 | 89 | if (!command.ok) 90 | errorFn(f"%n$shortUsageMsg") 91 | else if (shouldStopWithInfo) 92 | errorFn(command getInfoMessage sampleCompiler, isFailure = false) 93 | else 94 | run() 95 | } 96 | } -------------------------------------------------------------------------------- /shell/src/main/scala/unicorn/shell/Shell.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.shell 18 | 19 | import scala.tools.nsc.interpreter.ILoop 20 | 21 | /** Unicorn shell. 22 | * 23 | * @author Haifeng Li 24 | */ 25 | class Shell extends ILoop { 26 | override def prompt = "unicorn> " 27 | override def printWelcome = echo( 28 | raw""" 29 | | . . . . 30 | | ,`,`,`,`, 31 | | . . . . `\`\`\`\; 32 | | `\`\`\`\`, ~|;!;!;\! 33 | | ~\;\;\;\|\ (--,!!!~`! . 34 | | (--,\\\===~\ (--,|||~`! ./ 35 | | (--,\\\===~\ `,-,~,=,:. _,// 36 | | (--,\\\==~`\ ~-=~-.---|\;/J, Welcome to the Unicorn Database 37 | | (--,\\\((```==. ~'`~/ a | BigTable, Document and Graph 38 | | (-,.\\('('(`\\. ~'=~| \_. \ Full Text Search 39 | | (,--(,(,(,'\\. ~'=| \\_;> 40 | | (,-( ,(,(,;\\ ~=/ \ Haifeng Li 41 | | (,-/ (.(.(,;\\,/ ) ADP Innovation Lab 42 | | (,--/,;,;,;,\\ ./------. 43 | | (==,-;-'`;' /_,----`. \ 44 | | ,.--_,__.-' `--. ` \ 45 | | (='~-_,--/ , ,!,___--. \ \_) 46 | | (-/~( | \ ,_- | ) /_| 47 | | (~/((\ )\._, |-' _,/ / 48 | | \\)))) / ./~. | \_\; 49 | | ,__///// / / ) / 50 | | '===~' | | (, <. 51 | | / / \. \ 52 | | _/ / \_\ 53 | | /_!/ >_\ 54 | | 55 | | Welcome to Unicorn Shell; enter ':help' for the list of commands. 56 | | Type ":quit" to leave the Unicorn Shell 57 | | Version ${BuildInfo.version}, Scala ${BuildInfo.scalaVersion}, SBT ${BuildInfo.sbtVersion}, Built at ${BuildInfo.builtAtString} 58 | |=============================================================================== 59 | """.stripMargin 60 | ) 61 | } 62 | -------------------------------------------------------------------------------- /shell/src/universal/bin/init.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | import java.util.{Date, UUID} 18 | import unicorn.util._, unicorn.oid._, unicorn.json._ 19 | import unicorn.bigtable.hbase.HBase 20 | import unicorn.bigtable.cassandra.Cassandra 21 | import unicorn.bigtable.accumulo.Accumulo 22 | import unicorn.bigtable.rocksdb.RocksDB 23 | import unicorn.unibase._ 24 | import unicorn.unibase.graph._ 25 | import unicorn.narwhal._ 26 | import unicorn.sql._ 27 | 28 | -------------------------------------------------------------------------------- /shell/src/universal/conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | unicorn.root.logger=DEBUG,console,RFA 3 | unicorn.log.dir=/var/log/unicorn 4 | unicorn.log.file=unicorn.log 5 | 6 | # Define the root logger to the system property "unicorn.root.logger". 7 | log4j.rootLogger=${unicorn.root.logger} 8 | 9 | # Logging Threshold 10 | log4j.threshold=ALL 11 | 12 | # 13 | # Daily Rolling File Appender 14 | # 15 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 16 | log4j.appender.DRFA.File=${unicorn.log.dir}/${unicorn.log.file} 17 | 18 | # Rollver at midnight 19 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 20 | 21 | # 30-day backup 22 | #log4j.appender.DRFA.MaxBackupIndex=30 23 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 24 | 25 | # Pattern format: Date LogLevel LoggerName LogMessage 26 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 27 | 28 | # Rolling File Appender properties 29 | unicorn.log.maxfilesize=256MB 30 | unicorn.log.maxbackupindex=20 31 | 32 | # Rolling File Appender 33 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 34 | log4j.appender.RFA.File=${unicorn.log.dir}/${unicorn.log.file} 35 | 36 | log4j.appender.RFA.MaxFileSize=${unicorn.log.maxfilesize} 37 | log4j.appender.RFA.MaxBackupIndex=${unicorn.log.maxbackupindex} 38 | 39 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 40 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 41 | 42 | 43 | # 44 | # Null Appender 45 | # 46 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 47 | 48 | # 49 | # console 50 | # Add "console" to rootlogger above if you want to use this 51 | # 52 | log4j.appender.console=org.apache.log4j.ConsoleAppender 53 | log4j.appender.console.target=System.err 54 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 55 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 56 | 57 | # Custom Logging levels 58 | log4j.logger.kafka=INFO 59 | log4j.logger.org.apache.zookeeper=INFO 60 | log4j.logger.com.jayway.jsonpath=INFO 61 | -------------------------------------------------------------------------------- /shell/src/universal/conf/unicorn.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adplabs/unicorn/48d35ddf17358c711d38fd685ace8e753c50a16c/shell/src/universal/conf/unicorn.conf -------------------------------------------------------------------------------- /shell/src/universal/conf/unicorn.ini: -------------------------------------------------------------------------------- 1 | # Setting -X directly (-J is stripped) 2 | # -J-X 3 | -J-Xmx4096M 4 | -J-Xms1024M 5 | 6 | # Add additional jvm parameters 7 | -J-server 8 | 9 | # Performance optimization 10 | -J-XX:+AggressiveOpts 11 | 12 | # G1 garbage collector 13 | -J-XX:+UseG1GC 14 | 15 | # Optimize string duplication, which happens a lot when parsing a data file 16 | -J-XX:+UseStringDeduplication 17 | 18 | # Turn on JVM debugging, open at the given port 19 | # -jvm-debug 20 | 21 | # Don't run the java version check 22 | # -no-version-check 23 | 24 | # enabling debug and sending -d as app argument 25 | # the '--' prevents app-parameter swallowing when 26 | # using a reserved parameter. See #184 27 | # -d -- -d -------------------------------------------------------------------------------- /shell/src/universal/examples/dbpedia.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | // import dbpedia to unibase 6 | 7 | import scala.io.Source 8 | import unicorn.json._ 9 | import unicorn.bigtable.rocksdb._ 10 | import unicorn.unibase._ 11 | import unicorn.unibase.graph._ 12 | 13 | val db = Unibase(RocksDB.create("/tmp/unicorn-dbpedia")) 14 | db.createGraph("dbpedia") 15 | val dbpedia = db.graph("dbpedia", new Snowflake(0)) 16 | 17 | // Although we can parse .gz file directly, we don't support bz2 compressed files. 18 | // Please download and unzip first. 19 | dbpedia.rdf("http://downloads.dbpedia.org/2015-10/core-i18n/en/page_links_en.ttl.bz2") -------------------------------------------------------------------------------- /shell/src/universal/examples/gods.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | import java.util._ 6 | import unicorn.json._ 7 | import unicorn.bigtable._ 8 | import unicorn.bigtable.accumulo._ 9 | import unicorn.unibase._ 10 | import unicorn.unibase.idgen._ 11 | import unicorn.unibase.graph._ 12 | 13 | val db = Unibase(Accumulo()) 14 | db.createGraph("gods") 15 | val gods = db.graph("gods", new Snowflake(0)) 16 | 17 | val saturn = gods.addVertex(json"""{"label": "titan", "name": "saturn", "age": 10000}""") 18 | val sky = gods.addVertex(json"""{"label": "location", "name": "sky"}""") 19 | val sea = gods.addVertex(json"""{"label": "location", "name": "sea"}""") 20 | val jupiter = gods.addVertex(json"""{"label": "god", "name": "jupiter", "age": 5000}""") 21 | val neptune = gods.addVertex(json"""{"label": "god", "name": "neptune", "age": 4500}""") 22 | val hercules = gods.addVertex(json"""{"label": "demigod", "name": "hercules", "age": 30}""") 23 | val alcmene = gods.addVertex(json"""{"label": "human", "name": "alcmene", "age": 45}""") 24 | val pluto = gods.addVertex(json"""{"label": "god", "name": "pluto", "age": 4000}""") 25 | val nemean = gods.addVertex(json"""{"label": "monster", "name": "nemean"}""") 26 | val hydra = gods.addVertex(json"""{"label": "monster", "name": "hydra"}""") 27 | val cerberus = gods.addVertex(json"""{"label": "monster", "name": "cerberus"}""") 28 | val tartarus = gods.addVertex(json"""{"label": "location", "name": "tartarus"}""") 29 | 30 | gods.addEdge(jupiter, "father", saturn) 31 | gods.addEdge(jupiter, "lives", sky, json"""{"reason": "loves fresh breezes"}""") 32 | gods.addEdge(jupiter, "brother", neptune) 33 | gods.addEdge(jupiter, "brother", pluto) 34 | 35 | gods.addEdge(neptune, "lives", sea, json"""{"reason": "loves waves"}""") 36 | gods.addEdge(neptune, "brother", jupiter) 37 | gods.addEdge(neptune, "brother", pluto) 38 | 39 | gods.addEdge(hercules, "father", jupiter) 40 | gods.addEdge(hercules, "mother", alcmene) 41 | gods.addEdge(hercules, "battled", nemean, json"""{"time": 1, "place": {"latitude": 38.1, "longitude": 23.7}}""") 42 | gods.addEdge(hercules, "battled", hydra, json"""{"time": 2, "place": {"latitude": 37.7, "longitude": 23.9}}""") 43 | gods.addEdge(hercules, "battled", cerberus, json"""{"time": 12, "place": {"latitude": 39.0, "longitude": 22.0}}""") 44 | 45 | gods.addEdge(pluto, "brother", jupiter) 46 | gods.addEdge(pluto, "brother", neptune) 47 | gods.addEdge(pluto, "lives", tartarus, json"""{"reason": "no fear of death"}""") 48 | gods.addEdge(pluto, "pet", cerberus) 49 | 50 | gods.addEdge(cerberus, "lives", tartarus) 51 | 52 | gods(hydra) 53 | gods(hydra).in("battled") 54 | gods(hydra).out("battled") 55 | gods(neptune, "lives", sea) 56 | gods(neptune, "lives", jupiter) 57 | gods(neptune, "brother", jupiter) 58 | 59 | gods(hydra).label 60 | 61 | // Gremline 62 | val g = gods.traversal 63 | g.v(saturn).in("father").in("father").name -------------------------------------------------------------------------------- /shell/src/universal/examples/json.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | import java.util._ 6 | import unicorn.json._ 7 | 8 | // Basic JSON 9 | val json = 10 | json""" 11 | { 12 | "store": { 13 | "book": [ 14 | { 15 | "category": "reference", 16 | "author": "Nigel Rees", 17 | "title": "Sayings of the Century", 18 | "price": 8.95 19 | }, 20 | { 21 | "category": "fiction", 22 | "author": "Evelyn Waugh", 23 | "title": "Sword of Honour", 24 | "price": 12.99 25 | }, 26 | { 27 | "category": "fiction", 28 | "author": "Herman Melville", 29 | "title": "Moby Dick", 30 | "isbn": "0-553-21311-3", 31 | "price": 8.99 32 | }, 33 | { 34 | "category": "fiction", 35 | "author": "J. R. R. Tolkien", 36 | "title": "The Lord of the Rings", 37 | "isbn": "0-395-19395-8", 38 | "price": 22.99 39 | } 40 | ], 41 | "bicycle": { 42 | "color": "red", 43 | "price": 19.95 44 | } 45 | } 46 | } 47 | """ 48 | 49 | println(json("store")("bicycle")("color")) 50 | println(json.store.bicycle.color) 51 | println(json.store.book(0).author) 52 | 53 | json.store.bicycle.color = "green" 54 | println(json.store.bicycle.color) 55 | 56 | json("store")("book") remove 0 57 | println(json.store.book) 58 | 59 | val a = JsArray(1, 2, 3, 4) 60 | a += 5 61 | println(a) 62 | 63 | val b: JsArray = Array(1, 2, 3, 4) 64 | b ++= JsArray(5, 6) 65 | println(b) 66 | 67 | val obj = JsObject( 68 | "key1" -> JsObject( 69 | "key11" -> JsObject("tags" -> JsArray("alpha1", "beta1", "gamma1")) 70 | ), 71 | "key2" -> JsObject( 72 | "key21" -> JsObject("tags" -> JsArray("alpha2", "beta2", "gamma2")) 73 | ), 74 | "key3" -> "blabla" 75 | ) 76 | 77 | // retrieve 1-level recursive path 78 | println(obj \\ "tags") 79 | // retrieve 2-level recursive path 80 | println(obj \ "key1" \\ "tags") 81 | 82 | 83 | // JsonPath 84 | val jsonPath = JsonPath( 85 | json""" 86 | { 87 | "id": 1, 88 | "name": "Joe", 89 | "tags": ["programmer", "husband", "father", "golfer"], 90 | "address": [ 91 | { 92 | "id": 2, 93 | "street": "123 Main St.", 94 | "city": "Springfield", 95 | "state": "PA" 96 | }, 97 | { 98 | "id": 3, 99 | "street": "456 Main St.", 100 | "city": "Devon", 101 | "state": "PA", 102 | "work": true 103 | }, 104 | { 105 | "id": 4, 106 | "street": "789 Main St.", 107 | "city": "Sea Isle City", 108 | "state": "NJ" 109 | } 110 | ] 111 | } 112 | """) 113 | 114 | // field 115 | println(jsonPath("$.id")) 116 | println(jsonPath("$['id']")) 117 | 118 | // recursive field 119 | println(jsonPath("$..id")) 120 | 121 | // multi fields 122 | println(jsonPath("$['id', 'name']")) 123 | 124 | // any field 125 | println(jsonPath("$.*")) 126 | println(jsonPath("$.tags.*")) 127 | println(jsonPath("$['tags'].*")) 128 | 129 | // recursive any 130 | println(jsonPath("$..*")) 131 | 132 | // array slices 133 | println(jsonPath("$.tags[2]")) 134 | println(jsonPath("$.tags[0:3:2]")) 135 | println(jsonPath("$.tags[-2:]")) 136 | println(jsonPath("$.tags[:-2]")) 137 | 138 | // array random 139 | println(jsonPath("$.tags[0,2]")) 140 | println(jsonPath("$.tags[-1]")) 141 | 142 | // array recursive 143 | println(jsonPath("$.address[*].city")) 144 | 145 | 146 | // has filter 147 | println(jsonPath("$.address[?(@.work)]")) 148 | 149 | 150 | // comparison filter 151 | println(jsonPath("$.address[?(@.id < 3)]")) 152 | println(jsonPath("$.address[?(@.id <= 3)]")) 153 | 154 | println(jsonPath("$.address[?(@.id > 2)]")) 155 | println(jsonPath("$.address[?(@.id >= 2)]")) 156 | 157 | println(jsonPath("$.address[?(@.state == 'PA')]")) 158 | println(jsonPath("$.address[?(@.city == 'Springfield')]")) 159 | println(jsonPath("$.address[?(@.city != 'Devon')]")) 160 | 161 | 162 | // boolean filter 163 | println(jsonPath("$.address[?(@.id > 1 && @.state != 'PA')]")) 164 | println(jsonPath("$.address[?(@.id < 4 && @.state == 'PA')]")) 165 | println(jsonPath("$.address[?(@.id == 4 || @.state == 'PA')]")) 166 | println(jsonPath("$.address[?(@.id == 4 || @.state == 'NJ')]")) 167 | 168 | // update field of nonexistent object 169 | jsonPath("$.person.id") = 10 170 | println(jsonPath("$.person")) 171 | println(jsonPath("$.person.id")) 172 | 173 | 174 | // update multi fields of nonexistent object 175 | jsonPath("$['person']['id', 'name']") = 30 176 | println(jsonPath("$.person")) 177 | println(jsonPath("$['person']['id', 'name']")) 178 | 179 | 180 | // update array slices of nonexistent object 181 | jsonPath("$.person.tags[1:3]") = "father" 182 | println(jsonPath("$.person.tags")) 183 | 184 | 185 | // update array random of nonexistent object 186 | jsonPath("$.person.tags[2]") = "father" 187 | println(jsonPath("$.person.tags")) 188 | -------------------------------------------------------------------------------- /shell/src/universal/examples/pagerank.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc -J-Xmx8192M "$0" "$@" 3 | !# 4 | 5 | import scala.io.Source 6 | 7 | def triple(line: String): (String, String, String) = { 8 | val tokens = line.split(" ", 3) 9 | val subject = java.net.URLDecoder.decode(tokens(0).replace("", "") 10 | val predicate = tokens(1) 11 | var obj = tokens(2) 12 | 13 | (subject, predicate, obj) 14 | } 15 | 16 | val pages = scala.collection.mutable.Map[String, Int]() 17 | Source.fromFile("../../data/dbpedia/long_abstracts_en.nt").getLines.foreach { line => 18 | if (!line.startsWith("#")) { 19 | val nt = triple(line) 20 | if (!pages.contains(nt._1)) { 21 | pages(nt._1) = -1 22 | } 23 | } 24 | } 25 | 26 | var pageIndex = 0 27 | Source.fromFile("../../data/dbpedia/page_links_en.nt").getLines.foreach { line => 28 | if (!line.startsWith("#")) { 29 | val nt = triple(line) 30 | if (pages.contains(nt._1) && pages(nt._1) == -1) { 31 | pages(nt._1) = pageIndex 32 | pageIndex += 1 33 | } 34 | } 35 | } 36 | 37 | val d = 0.85 38 | val n = pageIndex 39 | val len = 172308908 40 | val colIndex = new Array[Int](n + 1) 41 | val rowIndex = new Array[Int](len) 42 | val value = new Array[Double](len) 43 | 44 | var k = 0 45 | var col = 0 46 | colIndex(0) = 0 47 | Source.fromFile("../../data/dbpedia/page_links_en.nt").getLines.foreach { line => 48 | if (!line.startsWith("#")) { 49 | val nt = triple(line) 50 | val source = nt._1 51 | val sink = java.net.URLDecoder.decode(nt._3.replace(" .", "") 52 | if (pages.contains(source) && pages.contains(sink)) { 53 | val j = pages(source) 54 | val i = pages(sink) 55 | if (i != -1 && j != -1) { 56 | value(k) = 1.0 57 | rowIndex(k) = i 58 | if (j < col) { 59 | println("smaller col index", j, col, "skip") 60 | } else { 61 | if (j > col) { 62 | (col+1) to j foreach { idx => colIndex(idx) = k } 63 | col = j 64 | } 65 | 66 | k += 1 67 | if (k % 1000000 == 0) println("build links matrix", k) 68 | } 69 | } 70 | } 71 | } 72 | } 73 | colIndex(col+1) = k 74 | 75 | 0 until n foreach { idx => 76 | val l = colIndex(idx+1) - colIndex(idx) 77 | if (l > 0) colIndex(idx) until colIndex(idx+1) foreach { i => value(i) = value(i) / l} 78 | } 79 | 80 | val matrix = new smile.math.matrix.SparseMatrix(n, n, value.slice(0,k), rowIndex.slice(0,k), colIndex) 81 | val rank = smile.math.matrix.EigenValueDecomposition.pagerank(matrix) 82 | 83 | val pagerank = pages.toSeq.filter(_._2 != -1).map { case (page, index) => 84 | (page, rank(index)) 85 | }.sortBy(-_._2) 86 | 87 | pagerank.foreach { case (page, rank) => 88 | println(page, rank) 89 | } 90 | -------------------------------------------------------------------------------- /shell/src/universal/examples/rhino.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo -e "PUT" 4 | curl -X PUT -H "Content-Type: application/json" -d '{"_id":"dude","username":"xyz","password":"xyz"}' http://localhost:8080/unicorn_rhino_test 5 | 6 | echo -e "\nPUT again" 7 | curl -X PUT -H "Content-Type: application/json" -d '{"_id":"dude","username":"xyz","password":"xyz"}' http://localhost:8080/unicorn_rhino_test 8 | 9 | echo -e "\nGET" 10 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude 11 | 12 | echo -e "\nPOST" 13 | curl -X POST -H "Content-Type: application/json" -d '{"_id":"dude","username":"dude","password":"xyz"}' http://localhost:8080/unicorn_rhino_test 14 | 15 | echo -e "\GET" 16 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude 17 | 18 | echo -e "\nPATCH" 19 | curl -X PATCH -H "Content-Type: application/json" -d '{"_id":"dude","$set":{"password":"abc"}}' http://localhost:8080/unicorn_rhino_test 20 | 21 | echo -e "\nGET" 22 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude 23 | 24 | echo -e "\nDELETE" 25 | curl -X DELETE http://localhost:8080/unicorn_rhino_test/dude 26 | 27 | echo -e "\nGET" 28 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude 29 | 30 | echo -e "\nPUT IBM" 31 | curl -X PUT -H "Content-Type: application/json" --header 'tenant: "IBM"' -d '{"_id":"dude","username":"xyz","password":"xyz"}' http://localhost:8080/unicorn_rhino_test 32 | 33 | echo -e "\nGET IBM" 34 | curl -X GET --header 'tenant: "IBM"' http://localhost:8080/unicorn_rhino_test/dude 35 | 36 | echo -e "\nGET MSFT" 37 | curl -X GET --header 'tenant: "MSFT"' http://localhost:8080/unicorn_rhino_test/dude 38 | 39 | echo -e "\nGET NONE" 40 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude 41 | 42 | echo -e "\nDELETE" 43 | curl -X DELETE --header 'tenant: "IBM"' http://localhost:8080/unicorn_rhino_test/dude 44 | 45 | echo -e "\nGET" 46 | curl -X GET --header 'tenant: "IBM"' http://localhost:8080/unicorn_rhino_test/dude 47 | 48 | -------------------------------------------------------------------------------- /shell/src/universal/examples/spark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | import java.util._ 6 | import unicorn.json._ 7 | import unicorn.bigtable._ 8 | import unicorn.bigtable.hbase._ 9 | import unicorn.unibase._ 10 | import unicorn.narwhal._ 11 | import unicorn.graph._ 12 | import org.apache.spark._ 13 | import org.apache.spark.graphx._ 14 | import org.apache.spark.rdd.RDD 15 | 16 | val conf = new SparkConf().setAppName("unicorn").setMaster("local[4]") 17 | val sc = new SparkContext(conf) 18 | val db = new Narwhal(HBase()) 19 | 20 | val table = db("worker") 21 | table.tenant = "IBM" 22 | val rdd = table.rdd(sc) 23 | rdd.count() 24 | 25 | val table = db("narwhal") 26 | val rdd = table.rdd(sc, json""" 27 | { 28 | "$$or": [ 29 | { 30 | "age": {"$$gt": 30} 31 | }, 32 | { 33 | "state": "NJ" 34 | } 35 | ] 36 | } 37 | """) 38 | rdd.count() 39 | 40 | 41 | val sqlContext = new org.apache.spark.sql.SQLContext(sc) 42 | import sqlContext.implicits._ 43 | 44 | case class Worker(name: String, age: Int) 45 | val workers = rdd.map { js => Worker(js.name, js.age) } 46 | val df = sqlContext.createDataFrame(workers) 47 | df.show 48 | 49 | df.registerTempTable("worker") 50 | sqlContext.sql("SELECT * FROM worker WHERE age > 30").show -------------------------------------------------------------------------------- /shell/src/universal/examples/sql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | import unicorn.json._ 6 | import unicorn.bigtable.hbase.HBase 7 | import unicorn.unibase._ 8 | import unicorn.sql.hunibase2SQL 9 | 10 | val hbase = Narwhal(HBase()) 11 | hbase.createTable("unicorn_sql") 12 | val table = hbase("unicorn_sql") 13 | 14 | val haifeng = JsObject( 15 | "name" -> "Haifeng", 16 | "salary" -> 1 17 | ) 18 | 19 | val roberto = JsObject( 20 | "name" -> "Roberto", 21 | "salary" -> 3 22 | ) 23 | 24 | val jerome = JsObject( 25 | "name" -> "Jerome", 26 | "salary" -> 2 27 | ) 28 | 29 | val keith = JsObject( 30 | "name" -> "Keith", 31 | "salary" -> 2 32 | ) 33 | 34 | val amit = JsObject( 35 | "name" -> "Amit", 36 | "salary" -> 3 37 | ) 38 | 39 | val stuart = JsObject( 40 | "name" -> "Stuart", 41 | "salary" -> 4 42 | ) 43 | 44 | val don = JsObject( 45 | "name" -> "Don", 46 | "salary" -> 4 47 | ) 48 | 49 | val carlos = JsObject( 50 | "name" -> "Carlos", 51 | "salary" -> 5 52 | ) 53 | 54 | table.upsert(haifeng) 55 | table.upsert(roberto) 56 | table.upsert(jerome) 57 | table.upsert(keith) 58 | table.upsert(amit) 59 | table.upsert(stuart) 60 | table.upsert(don) 61 | table.upsert(carlos) 62 | 63 | hbase.sql("select * from unicorn_sql where name = 'Haifeng'") 64 | 65 | hbase.dropTable("unicorn_sql") 66 | 67 | hbase.sql("select address.state, count(address.state), max(age), avg(salary) as avg_salary from worker group by address.state order by avg_salary") -------------------------------------------------------------------------------- /shell/src/universal/examples/traversal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | import unicorn.json._ 6 | import unicorn.bigtable.hbase.HBase 7 | import unicorn.unibase._ 8 | import unicorn.unibase.Unibase.{$id, $graph} 9 | import unicorn.graph._ 10 | 11 | // Google+ 12 | val hbase = Unibase(HBase()) 13 | val gplus = hbase("gplus") 14 | 15 | def trace(vertex: JsValue, edge: Edge[JsValue, (String, JsValue)], hops: Int) { 16 | if (hops > 0) println(s"111065108889012087599 --$hops--> $vertex") 17 | } 18 | 19 | val dan = JsString("111065108889012087599") 20 | 21 | val visitor = new SimpleUnibaseVisitor(gplus, 2) 22 | visitor.relationships = Some(Set("follows")) 23 | visitor.addVisitHook(trace) 24 | 25 | val danCircle = JsGraph(dan, visitor) 26 | danCircle.topologicalSort 27 | danCircle.dijkstra 28 | 29 | 30 | // Make a small org chart for A* search 31 | hbase.createTable("astar") 32 | val astar = hbase("astar") 33 | 34 | val haifeng = JsObject( 35 | $id -> "Haifeng", 36 | "rank" -> 1 37 | ) 38 | graph(haifeng)("works with", "Jerome") = 1 39 | graph(haifeng)("reports to", "Roberto") = 2 40 | 41 | val roberto = JsObject( 42 | $id -> "Roberto", 43 | "rank" -> 3 44 | ) 45 | graph(roberto)("works with", "Keith") = 1 46 | graph(roberto)("reports to", "Stuart") = 2 47 | 48 | val jerome = JsObject( 49 | $id -> "Jerome", 50 | "rank" -> 2 51 | ) 52 | graph(jerome)("works with", "Roberto") = 1 53 | graph(jerome)("reports to", "Don") = 2 54 | 55 | val keith = JsObject( 56 | $id -> "Keith", 57 | "rank" -> 2 58 | ) 59 | graph(keith)("works with", "Roberto") = 1 60 | graph(keith)("works with", "Amit") = 1 61 | graph(keith)("reports to", "Stuart") = 2 62 | 63 | val amit = JsObject( 64 | $id -> "Amit", 65 | "rank" -> 3 66 | ) 67 | graph(amit)("works with", "Roberto") = 1 68 | graph(amit)("works with", "Keith") = 1 69 | graph(amit)("reports to", "Stuart") = 2 70 | 71 | val stuart = JsObject( 72 | $id -> "Stuart", 73 | "rank" -> 4 74 | ) 75 | graph(stuart)("works with", "Don") = true 76 | graph(stuart)("reports to", "Carlos") = true 77 | 78 | val don = JsObject( 79 | $id -> "Don", 80 | "rank" -> 4 81 | ) 82 | graph(don)("works with", "Stuart") = true 83 | graph(don)("reports to", "Carlos") = true 84 | 85 | val carlos = JsObject( 86 | $id -> "Carlos", 87 | "rank" -> 5 88 | ) 89 | 90 | astar.insert(haifeng) 91 | astar.insert(roberto) 92 | astar.insert(jerome) 93 | astar.insert(keith) 94 | astar.insert(amit) 95 | astar.insert(stuart) 96 | astar.insert(don) 97 | astar.insert(carlos) 98 | 99 | println(astar.find(where = JsObject("_id" -> JsString("Haifeng"))).next) 100 | 101 | val graphOps = new GraphOps[JsObject, (String, JsValue)]() 102 | val path = graphOps.dijkstra(haifeng, carlos, 103 | (doc: JsObject) => { 104 | doc($graph).asInstanceOf[JsObject].fields.toSeq.flatMap { case (relationship, links) => 105 | links.asInstanceOf[JsObject].fields.toSeq.map { case (_, link) => 106 | val vertex = astar(link($id)).get 107 | val edge = (relationship, link($data)) 108 | (vertex, edge) 109 | } 110 | }.iterator 111 | } 112 | ) 113 | 114 | path.map { 115 | case (doc, Some(edge)) => edge._1 + " --> " + doc($id) 116 | case (doc, None) => doc($id) 117 | }.mkString(" -- ") 118 | 119 | val shortPath = graphOps.astar(haifeng, carlos, 120 | (doc: JsObject) => { 121 | doc($graph).asInstanceOf[JsObject].fields.toSeq.flatMap { case (relationship, links) => 122 | links.asInstanceOf[JsObject].fields.toSeq.map { case (_, link) => 123 | val vertex = astar(link($id)).get 124 | val edge = (relationship, link($data)) 125 | (vertex, edge) 126 | } 127 | }.iterator 128 | }, 129 | (a: JsObject, b: JsObject) => (a.rank, b.rank) match { 130 | case (ar: JsInt, br: JsInt) => Math.abs(ar.value - br.value) 131 | case _ => 100 132 | }, 133 | (a: JsObject, b: JsObject, e: (String, JsValue)) => e._2 match { 134 | case JsInt(weight) => weight.toDouble 135 | case _ => 3.0 136 | } 137 | ) 138 | 139 | shortPath.map { 140 | case (doc, Some(edge)) => edge._1 + " --> " + doc($id) 141 | case (doc, None) => doc($id) 142 | }.mkString(" -- ") 143 | 144 | 145 | hbase.dropTable("astar") -------------------------------------------------------------------------------- /shell/src/universal/examples/twitter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | // import dbpedia to unibase 6 | 7 | import scala.io.Source 8 | import unicorn.json._ 9 | import unicorn.bigtable.rocksdb._ 10 | import unicorn.unibase._ 11 | import unicorn.unibase.graph._ 12 | 13 | val db = Unibase(RocksDB.create("/tmp/unicorn-twitter")) 14 | db.createGraph("twitter") 15 | val twitter = db.graph("twitter", new Snowflake(0)) 16 | 17 | // Please download and unzip first. 18 | twitter.csv("http://an.kaist.ac.kr/~haewoon/release/twitter_social_graph/twitter_rv.zip", longVertexId = true) -------------------------------------------------------------------------------- /shell/src/universal/examples/wiki.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | // import wikipedia dump to unibase 6 | 7 | import java.util._ 8 | import scala.collection.mutable.Stack 9 | import scala.io.Source 10 | import scala.xml.pull._ 11 | import unicorn.json._ 12 | import unicorn.bigtable._ 13 | import unicorn.bigtable.accumulo._ 14 | import unicorn.bigtable.hbase._ 15 | import unicorn.unibase._ 16 | 17 | def wikipedia(bucket: HBaseBucket, files: String*): Unit = { 18 | files.foreach { xmlFile => 19 | val xml = new XMLEventReader(Source.fromFile(xmlFile)) 20 | 21 | var field: String = null 22 | val doc = Stack[JsObject]() 23 | for (event <- xml) { 24 | event match { 25 | case EvElemStart(_, "page", _, _) => { 26 | doc.push(JsObject()) 27 | } 28 | case EvElemEnd(_, "page") => { 29 | if (!doc.isEmpty) { 30 | val d = doc.pop 31 | assert(doc.isEmpty) 32 | 33 | d("ns") match { 34 | case JsString(value) if value == "0" => 35 | val title = d("title") match { 36 | case JsString(value) => value 37 | case _ => "" 38 | } 39 | 40 | if (title != "") { 41 | d($id) = d.id.toString.toInt 42 | println(d($id)) 43 | bucket.upsert(d) 44 | } 45 | case _ => 46 | } 47 | } 48 | } 49 | case e @ EvElemStart(_, tag, _, _) => { 50 | if (!doc.isEmpty) { 51 | if (field != null) { 52 | val child = JsObject() 53 | val parent = doc.top 54 | parent(field) = child 55 | doc.push(child) 56 | } 57 | field = tag 58 | } 59 | } 60 | case e @ EvElemEnd(_, tag) => { 61 | if (field == null) { 62 | if (!doc.isEmpty) doc.pop 63 | } 64 | else field = null 65 | } 66 | case EvText(t) => { 67 | if (!doc.isEmpty && field != null) { 68 | doc.top(field) = t 69 | } 70 | } 71 | case _ => // ignore 72 | } 73 | } 74 | } 75 | } 76 | 77 | val hbase = Unibase(HBase()) 78 | hbase.createTable("wiki") 79 | val bucket = hbase("wiki") 80 | 81 | wikipedia(bucket, "../../data/wiki/enwikinews-20140410-pages-articles-multistream.xml") 82 | -------------------------------------------------------------------------------- /shell/src/universal/examples/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec unicorn -nc "$0" "$@" 3 | !# 4 | 5 | import java.util._ 6 | import unicorn.json._ 7 | import unicorn.bigtable._ 8 | import unicorn.bigtable.accumulo._ 9 | import unicorn.bigtable.hbase._ 10 | import unicorn.unibase._ 11 | 12 | // measure running time of a function/block 13 | def time[A](f: => A) = { 14 | val s = System.nanoTime 15 | val ret = f 16 | if (ret.isInstanceOf[JsValue]) println(ret.asInstanceOf[JsValue].prettyPrint) 17 | else println(ret) 18 | println("time: " + (System.nanoTime - s)/1e6 + " ms") 19 | ret 20 | } 21 | 22 | // connect to Accumulo mock 23 | val accumulo = Unibase(Accumulo()) 24 | time { accumulo.createTable("worker") } 25 | val bucket = accumulo("worker") 26 | 27 | // Read a non-existing row. It is the pure time of round trip. 28 | time { bucket("row1") } 29 | 30 | // Create a document 31 | val person = JsObject( 32 | "name" -> "Haifeng", 33 | "gender" -> "Male", 34 | "salary" -> 1.0, 35 | "address" -> JsObject( 36 | "street" -> "135 W. 18th ST", 37 | "city" -> "New York", 38 | "state" -> "NY", 39 | "zip" -> 10011 40 | ), 41 | "project" -> JsArray("HCM", "Analytics"), 42 | "graph" -> JsObject( 43 | "work with" -> JsObject( 44 | "Jim" -> JsObject( 45 | "_id" -> "Jim", 46 | "data" -> 1 47 | ), 48 | "Mike" -> JsObject( 49 | "id" -> "Mike", 50 | "data" -> 1 51 | ) 52 | ), 53 | "report to" -> JsObject( 54 | "Tom" -> JsObject( 55 | "_id" -> "Tome", 56 | "data" -> 1 57 | ) 58 | ) 59 | ) 60 | ) 61 | 62 | 63 | // save document into a dataset 64 | val key = time { bucket.upsert(person) } 65 | 66 | val worker = time { bucket(key).get } 67 | worker.prettyPrint 68 | 69 | // Read partially a document 70 | val doc = time { bucket(key, "name").get } 71 | doc.prettyPrint 72 | 73 | val update = JsObject( 74 | "_id" -> key, 75 | "$set" -> JsObject( 76 | "salary" -> 100000.0, 77 | "address.street" -> "5 ADP Blvd" 78 | ), 79 | "$unset" -> JsObject( 80 | "gender" -> JsTrue 81 | ) 82 | ) 83 | 84 | time { bucket.update(update) } 85 | 86 | val updated = time { bucket(key, "name").get } 87 | updated.prettyPrint 88 | 89 | // HBase 90 | val hbase = Unibase(HBase()) 91 | 92 | time { hbase.createTable("worker") } 93 | val hbucket = hbase("worker") 94 | 95 | time { hbucket.upsert(person) } 96 | 97 | val asOfDate = new Date 98 | 99 | time { hbucket.update(update) } 100 | 101 | val old = time { hbucket(asOfDate, key).get } 102 | old.prettyPrint 103 | 104 | val latest = time { hbucket(key).get } 105 | latest.prettyPrint 106 | 107 | val rollback = """ 108 | { 109 | "$rollback": { 110 | "salary": 1, 111 | "address.street": 1, 112 | "gender": 1 113 | } 114 | } 115 | """.parseJsObject 116 | 117 | rollback(Unibase.$id) = key 118 | time { hbucket.update(rollback) } 119 | 120 | val yesterdayOnceMore = time { hbucket(key).get } 121 | yesterdayOnceMore.prettyPrint 122 | 123 | // delete the bucket 124 | time { hbase.dropTable("worker") } 125 | -------------------------------------------------------------------------------- /sql/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-sql" 2 | 3 | libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.4" 4 | -------------------------------------------------------------------------------- /sql/src/main/scala/unicorn/sql/package.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn 18 | 19 | import unicorn.narwhal.Narwhal 20 | 21 | package object sql { 22 | implicit def narwhalSQLContext(db: Narwhal) = new SQLContext(db) 23 | } 24 | -------------------------------------------------------------------------------- /unibase/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-unibase" 2 | 3 | libraryDependencies += "org.apache.jena" % "jena-arq" % "3.1.0" -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/DocumentSerializer.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase 18 | 19 | import java.nio.ByteBuffer 20 | 21 | import unicorn.bigtable.{Column, ColumnFamily} 22 | import unicorn.json._ 23 | 24 | /** Document serializer. By default, document key size is up to 64KB, column size is up to 10MB. 25 | * 26 | * @author Haifeng Li 27 | */ 28 | class DocumentSerializer( 29 | val keySerializer: BsonSerializer = new BsonSerializer(ByteBuffer.allocate(65536)), 30 | val valueSerializer: ColumnarJsonSerializer = new ColumnarJsonSerializer(ByteBuffer.allocate(10485760))) { 31 | 32 | /** Serialize document data. */ 33 | def serialize(json: JsObject): Seq[Column] = { 34 | valueSerializer.serialize(json).map { case (path, value) => 35 | Column(valueSerializer.str2Bytes(path), value) 36 | }.toSeq 37 | } 38 | 39 | /** Serialize document id. */ 40 | def serialize(tenant: JsValue, id: JsValue): Array[Byte] = { 41 | keySerializer.clear 42 | keySerializer.put(tenant) 43 | keySerializer.put(id) 44 | keySerializer.toBytes 45 | } 46 | 47 | /** Return the row prefix of a tenant. */ 48 | def tenantRowKeyPrefix(tenant: JsValue): Array[Byte] = { 49 | keySerializer.clear 50 | keySerializer.put(tenant) 51 | keySerializer.toBytes 52 | } 53 | 54 | /** Deserialize document key. */ 55 | def deserialize(key: Array[Byte]): (JsValue, JsValue) = { 56 | val buffer = ByteBuffer.wrap(key) 57 | val tenant = keySerializer.deserialize(buffer) 58 | val id = keySerializer.deserialize(buffer) 59 | (tenant, id) 60 | } 61 | 62 | /** Assembles the document from multi-column family data. */ 63 | def deserialize(data: Seq[ColumnFamily]): Option[JsObject] = { 64 | val objects = data.map { case ColumnFamily(family, columns) => 65 | val map = columns.map { case Column(qualifier, value, _) => 66 | (new String(qualifier, valueSerializer.charset), value.bytes) 67 | }.toMap 68 | val json = valueSerializer.deserialize(map) 69 | json.asInstanceOf[JsObject] 70 | } 71 | 72 | if (objects.size == 0) 73 | None 74 | else if (objects.size == 1) 75 | Some(objects(0)) 76 | else { 77 | val fold = objects.foldLeft(JsObject()) { (doc, family) => 78 | doc.fields ++= family.fields 79 | doc 80 | } 81 | Some(fold) 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/graph/Edge.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase.graph 18 | 19 | import scala.language.dynamics 20 | import unicorn.json.JsValue 21 | 22 | /** Graph (directed) edge. For an edge 1 - follows -> 3, 23 | * "1" and "3" are vertex ids, `follows` is the label of edge. 24 | * Vertex 1 is the `out vertex` of edge, and vertex 3 is the `in vertex`. 25 | * Besides the label, an edge may have optional data. 26 | * 27 | * @author Haifeng Li 28 | */ 29 | case class Edge(val from: Long, val label: String, val to: Long, val properties: JsValue) extends Dynamic { 30 | 31 | override def toString = s"($from - [$label] -> $to) = ${properties.prettyPrint}" 32 | 33 | def apply(property: String): JsValue = properties.apply(property) 34 | 35 | def applyDynamic(property: String): JsValue = apply(property) 36 | 37 | def selectDynamic(property: String): JsValue = apply(property) 38 | } -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/graph/GraphSerializer.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase.graph 18 | 19 | import java.nio.ByteBuffer 20 | import unicorn.bigtable.{Column, Row} 21 | import unicorn.json._ 22 | import unicorn.util._ 23 | 24 | /** Graph serializer. By default, edge label size is up to 256, 25 | * vertex property size is up to 64KB, overall data size of each edge is up to 10MB. 26 | * 27 | * @author Haifeng Li 28 | */ 29 | class GraphSerializer( 30 | val buffer: ByteBuffer = ByteBuffer.allocate(1024), 31 | val vertexSerializer: ColumnarJsonSerializer = new ColumnarJsonSerializer(ByteBuffer.allocate(65536)), 32 | val edgeSerializer: BsonSerializer = new BsonSerializer(ByteBuffer.allocate(10485760))) extends Logging { 33 | 34 | /** Serializes vertex id. */ 35 | def serialize(id: Long): Array[Byte] = { 36 | buffer.clear 37 | buffer.putLong(id) 38 | buffer 39 | } 40 | 41 | /** Serializes the document vertex lookup table row key. */ 42 | def serialize(table: String, tenant: JsValue, key: JsValue): Array[Byte] = { 43 | buffer.clear 44 | edgeSerializer.serialize(buffer, table) 45 | edgeSerializer.serialize(buffer, tenant) 46 | edgeSerializer.serialize(buffer, key) 47 | buffer 48 | } 49 | 50 | /** Serializes vertex property data. */ 51 | def serializeVertex(json: JsObject): Seq[Column] = { 52 | vertexSerializer.serialize(json).map { case (path, value) => 53 | Column(vertexSerializer.str2Bytes(path), value) 54 | }.toSeq 55 | } 56 | 57 | def deserializeVertex(row: Row): Vertex = { 58 | val vertex = deserializeVertexId(row.key) 59 | val families = row.families 60 | 61 | val properties = families.find(_.family == GraphVertexColumnFamily).map { family => 62 | deserializeVertexProperties(family.columns) 63 | } 64 | 65 | val in = families.find(_.family == GraphInEdgeColumnFamily).map { family => 66 | family.columns.map { column => 67 | val (label, source) = deserializeEdgeColumnQualifier(column.qualifier) 68 | val properties = deserializeEdgeProperties(column.value) 69 | Edge(source, label, vertex, properties) 70 | } 71 | }.getOrElse(Seq.empty) 72 | 73 | val out = families.find(_.family == GraphOutEdgeColumnFamily).map { family => 74 | family.columns.map { column => 75 | val (label, target) = deserializeEdgeColumnQualifier(column.qualifier) 76 | val properties = deserializeEdgeProperties(column.value) 77 | Edge(vertex, label, target, properties) 78 | } 79 | }.getOrElse(Seq.empty) 80 | 81 | val edges = (in.size, out.size) match { 82 | case (0, _) => out 83 | case (_, 0) => in 84 | case _ => out ++ in 85 | } 86 | 87 | Vertex(vertex, properties.getOrElse(JsObject("id" -> JsLong(vertex))), edges) 88 | } 89 | 90 | /** Deserializes vertex property data. */ 91 | def deserializeVertexProperties(columns: Seq[Column]): JsObject = { 92 | val map = columns.map { case Column(qualifier, value, _) => 93 | (new String(qualifier, vertexSerializer.charset), value.bytes) 94 | }.toMap 95 | vertexSerializer.deserialize(map).asInstanceOf[JsObject] 96 | } 97 | 98 | /** Serializes an edge column qualifier. */ 99 | def serializeEdgeColumnQualifier(label: Array[Byte], vertex: Long): Array[Byte] = { 100 | buffer.clear 101 | buffer.put(label) 102 | buffer.put(0.toByte) 103 | buffer.putLong(vertex) 104 | buffer 105 | } 106 | 107 | /** Deserializes an edge column qualifier. */ 108 | def deserializeEdgeColumnQualifier(bytes: Array[Byte]): (String, Long) = { 109 | val buffer = ByteBuffer.wrap(bytes) 110 | val label = edgeSerializer.cstring(buffer) 111 | val vertex = buffer.getLong 112 | (label, vertex) 113 | } 114 | 115 | /** Serializes edge property data. */ 116 | def serializeEdge(json: JsValue): Array[Byte] = { 117 | edgeSerializer.clear 118 | edgeSerializer.put(json) 119 | edgeSerializer.toBytes 120 | } 121 | 122 | /** Deserializes vertex id. */ 123 | def deserializeVertexId(bytes: Array[Byte]): Long = { 124 | require(bytes.length == 8, s"vertex id bytes size is not 8: ${bytes.length}") 125 | ByteBuffer.wrap(bytes).getLong 126 | } 127 | 128 | /** Deserializes edge property data. */ 129 | def deserializeEdgeProperties(bytes: Array[Byte]): JsValue = { 130 | edgeSerializer.deserialize(bytes) 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/graph/SimpleTraveler.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase.graph 18 | 19 | import VertexColor._ 20 | import Direction._ 21 | 22 | /** Simple graph visitor with cache management. 23 | * In DFS and BFS, the user should create a sub class overriding 24 | * the `apply` method, which is nop by default. 25 | * 26 | * @param graph The graph to visit. 27 | * @param relationships Relationship of interest. Only neighbors with given 28 | * relationship will be visited. Empty set means all 29 | * relationships. 30 | * @param maxHops Maximum number of hops during graph traversal. 31 | * @param direction Edges to follow in the traversal. 32 | * 33 | * @author Haifeng Li 34 | */ 35 | class SimpleTraveler(val graph: ReadOnlyGraph, val relationships: Set[String] = Set.empty, val maxHops: Int = 3, val direction: Direction = Outgoing) extends Traveler { 36 | /** The color mark if a vertex was already visited. */ 37 | private val mark = collection.mutable.Map[Long, VertexColor]().withDefaultValue(White) 38 | 39 | /** The cache of vertices. */ 40 | private val cache = collection.mutable.Map[Long, Vertex]() 41 | 42 | /** User defined vertex visit function. The default implementation is nop. 43 | * The user should create a sub class overriding this method. 44 | * 45 | * @param vertex the vertex on visiting. 46 | * @param edge the incoming arc (None for starting vertex). 47 | * @param hops the number of hops from the starting vertex to this vertex. 48 | */ 49 | def apply(vertex: Vertex, edge: Option[Edge], hops: Int): Unit = { 50 | 51 | } 52 | 53 | /** Resets the vertex color to unvisited and clean up the cache. */ 54 | def reset: Unit = { 55 | mark.clear 56 | cache.clear 57 | } 58 | 59 | override def vertex(id: Long): Vertex = { 60 | cache.get(id) match { 61 | case Some(node) => node 62 | case None => 63 | val node = graph(id, direction) 64 | cache(id) = node 65 | node 66 | } 67 | } 68 | 69 | override def vertex(key: String): Vertex = { 70 | val _id = id(key) 71 | require(_id.isDefined, s"Vertex $key doesn't exist") 72 | vertex(_id.get) 73 | } 74 | 75 | /** Translates a vertex string key to 64 bit id. */ 76 | override def id(key: String): Option[Long] = { 77 | graph.id(key) 78 | } 79 | 80 | override def color(id: Long): VertexColor = mark(id) 81 | 82 | override def visit(vertex: Vertex, edge: Option[Edge], hops: Int): Unit = { 83 | apply(vertex, edge, hops) 84 | 85 | val black = vertex.neighbors.forall { neighbor => 86 | mark.contains(neighbor) 87 | } 88 | 89 | mark(vertex.id) = if (black) Black else Gray 90 | } 91 | 92 | override def neighbors(vertex: Vertex, hops: Int): Iterator[(Long, Edge)] = { 93 | if (hops >= maxHops) return Seq.empty.iterator 94 | 95 | val edges = if (relationships.isEmpty) vertex.edges 96 | else vertex.edges.filter { edge => 97 | relationships.contains(edge.label) 98 | } 99 | 100 | edges.map { edge => 101 | val neighbor = if (edge.to != vertex.id) edge.to else edge.from 102 | (neighbor, edge) 103 | }.iterator 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/graph/Traveler.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase.graph 18 | 19 | import unicorn.json._ 20 | 21 | /** Vertex color mark in a graph graversal. */ 22 | object VertexColor extends Enumeration { 23 | type VertexColor = Value 24 | 25 | /** White marks vertices that have yet to be discovered. */ 26 | val White = Value 27 | 28 | /** Gray marks a vertex that is discovered but still 29 | * has vertices adjacent to it that are undiscovered. */ 30 | val Gray = Value 31 | 32 | /** A black vertex is discovered vertex that is not 33 | * adjacent to any white vertices. 34 | */ 35 | val Black = Value 36 | } 37 | 38 | /** The edges to follow in a graph traversal. */ 39 | object Direction extends Enumeration { 40 | type Direction = Value 41 | 42 | /** Outgoing edges. */ 43 | val Outgoing = Value 44 | 45 | /** Incoming edges. */ 46 | val Incoming = Value 47 | 48 | /** Both directions. */ 49 | val Both = Value 50 | } 51 | 52 | import VertexColor._ 53 | 54 | /** Graph traveler is a proxy to the graph during the 55 | * graph traversal. Beyond the visitor design pattern 56 | * that process a vertex during the traversal, 57 | * the traveler also provides the method to access 58 | * graph vertices, the neighbors of a vertex to explore, 59 | * and the weight of an edge. 60 | * 61 | * @author Haifeng Li 62 | */ 63 | trait Traveler { 64 | /** Translates a vertex string key to 64 bit id. */ 65 | def id(key: String): Option[Long] 66 | 67 | /** Returns the vertex of given ID. */ 68 | def vertex(id: Long): Vertex 69 | 70 | /** Returns the vertex of given string key. */ 71 | def vertex(key: String): Vertex 72 | 73 | /** The color mark if a vertex was already visited. */ 74 | def color(id: Long): VertexColor 75 | 76 | /** Visit a vertex during graph traversal. 77 | * 78 | * @param vertex the vertex on visiting. 79 | * @param edge the incoming arc (None for starting vertex). 80 | * @param hops the number of hops from the starting vertex to this vertex. 81 | */ 82 | def visit(vertex: Vertex, edge: Option[Edge], hops: Int): Unit 83 | 84 | /** Returns an iterator of the neighbors and associated edges of a vertex. 85 | * 86 | * @param vertex the vertex on visiting. 87 | * @param hops the number of hops from starting vertex, which may be used for early termination. 88 | * @return an iterator of the outgoing edges 89 | */ 90 | def neighbors(vertex: Vertex, hops: Int): Iterator[(Long, Edge)] 91 | 92 | /** The weight of edge (e.g. shortest path search). */ 93 | def weight(edge: Edge): Double = edge.properties match { 94 | case JsInt(x) => x 95 | case JsCounter(x) => x 96 | case JsLong(x) => x 97 | case _ => 1.0 98 | } 99 | } 100 | 101 | /** Traveler for A* searcher. */ 102 | trait AstarTraveler extends Traveler { 103 | /** The future path-cost function, which is an admissible 104 | * "heuristic estimate" of the distance from the current vertex to the goal. 105 | * Note that the heuristic function must be monotonic. 106 | */ 107 | def h(v1: Long, v2: Long): Double 108 | } -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/graph/Vertex.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase.graph 18 | 19 | import scala.language.dynamics 20 | import unicorn.json._ 21 | 22 | /** Graph vertex. 23 | * 24 | * @author Haifeng Li 25 | */ 26 | case class Vertex(val id: Long, val properties: JsObject, val edges: Seq[Edge]) extends Dynamic { 27 | 28 | /** In vertices of outgoing edges. */ 29 | @transient lazy val in: Map[String, Seq[Long]] = { 30 | edges.filter(_.from == id).groupBy(_.label).mapValues(_.map(_.to)) 31 | } 32 | 33 | /** Out vertices of incoming vertices. */ 34 | @transient lazy val out: Map[String, Seq[Long]] = { 35 | edges.filter(_.to == id).groupBy(_.label).mapValues(_.map(_.from)) 36 | } 37 | 38 | /** Incoming arcs. */ 39 | @transient lazy val inE: Map[String, Seq[Edge]] = { 40 | edges.filter(_.to == id).groupBy(_.label) 41 | } 42 | 43 | /** Outgoing arcs. */ 44 | @transient lazy val outE: Map[String, Seq[Edge]] = { 45 | edges.filter(_.from == id).groupBy(_.label) 46 | } 47 | 48 | /* Neighbor vertices. */ 49 | @transient lazy val neighbors: Seq[Long] = { 50 | edges.map { case Edge(from, _, to, _) => 51 | if (from == id) to else from 52 | } 53 | } 54 | 55 | override def toString = s"Vertex[$id] = ${properties.prettyPrint}" 56 | 57 | def apply(property: String): JsValue = properties.apply(property) 58 | 59 | def applyDynamic(property: String): JsValue = apply(property) 60 | 61 | def selectDynamic(property: String): JsValue = apply(property) 62 | } -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/graph/package.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase 18 | 19 | /** 20 | * @author Haifeng Li 21 | */ 22 | package object graph { 23 | val $doc = "_doc" 24 | val $table = "_table" 25 | 26 | private[unicorn] val GraphDocumentVertexTable = "unicorn_graph_doc_vertex" 27 | private[unicorn] val GraphVertexColumnFamily = "vertex" 28 | private[unicorn] val GraphInEdgeColumnFamily = "in" 29 | private[unicorn] val GraphOutEdgeColumnFamily = "out" 30 | } 31 | -------------------------------------------------------------------------------- /unibase/src/main/scala/unicorn/unibase/package.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn 18 | 19 | /** 20 | * @author Haifeng Li 21 | */ 22 | package object unibase { 23 | val $id = "_id" 24 | val $tenant = "_tenant" 25 | 26 | private[unibase] val DocumentColumnFamily = "doc" 27 | 28 | // Originally we used "." as delimiter in table name. 29 | // However, "." cannot be part of table name in Accumulo. 30 | // So we switch to "_". 31 | private[unibase] val MetaTableName = "unicorn_meta_table" 32 | private[unibase] val MetaTableColumnFamily = "meta" 33 | 34 | private[unibase] val DefaultLocalityField = "default_locality" 35 | } 36 | -------------------------------------------------------------------------------- /unibase/src/test/scala/unicorn/unibase/UnibaseSpec.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.unibase 18 | 19 | import org.specs2.mutable._ 20 | import unicorn.bigtable.accumulo.Accumulo 21 | 22 | /** 23 | * @author Haifeng Li 24 | */ 25 | class UnibaseSpec extends Specification { 26 | // Make sure running examples one by one. 27 | // Otherwise, test cases on same columns will fail due to concurrency 28 | sequential 29 | val bigtable = Accumulo() 30 | val db = new Unibase(bigtable) 31 | val tableName = "unicorn_unibase_test" 32 | 33 | "Unibase" should { 34 | "create table" in { 35 | db.createTable(tableName) 36 | bigtable.tableExists(tableName) === true 37 | 38 | db.dropTable(tableName) 39 | bigtable.tableExists(tableName) === false 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /unicorn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbt stage 4 | shell/target/universal/stage/bin/unicorn -v 5 | -------------------------------------------------------------------------------- /util/.idea/.name: -------------------------------------------------------------------------------- 1 | eDatabase -------------------------------------------------------------------------------- /util/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 22 | -------------------------------------------------------------------------------- /util/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /util/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /util/.idea/libraries/SBT__org_scala_lang_scala_library_2_11_2_jar.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /util/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /util/.idea/modules/edatabase.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /util/.idea/sbt.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | -------------------------------------------------------------------------------- /util/.idea/scala_compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /util/.idea/scopes/scope_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /util/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /util/build.sbt: -------------------------------------------------------------------------------- 1 | name := "unicorn-util" 2 | 3 | libraryDependencies += "org.slf4j" % "slf4j-api" % "1.7.21" 4 | 5 | libraryDependencies += "com.typesafe" % "config" % "1.2.1" 6 | -------------------------------------------------------------------------------- /util/src/main/scala/unicorn/util/ByteArray.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.util 18 | 19 | /** 20 | * Pimped byte array. 21 | * 22 | * @author Haifeng Li 23 | */ 24 | case class ByteArray(bytes: Array[Byte]) extends Ordered[ByteArray] { 25 | /** Flip each bit of a byte string */ 26 | def unary_~ = ByteArray(bytes.map { b => (~b).toByte }) 27 | 28 | /** Hexadecimal string representation */ 29 | def hex = bytes2Hex(bytes) 30 | 31 | /** Covert UTF-8 bytes back to string */ 32 | override def toString = new String(bytes, utf8) 33 | 34 | override def compare(that: ByteArray): Int = compareByteArray(bytes, that.bytes) 35 | 36 | override def compareTo(that: ByteArray): Int = compareByteArray(bytes, that.bytes) 37 | 38 | override def equals(that: Any): Boolean = { 39 | if (!that.isInstanceOf[ByteArray]) return false 40 | compareTo(that.asInstanceOf[ByteArray]) == 0 41 | } 42 | 43 | override def hashCode: Int = { 44 | var hash = 7 45 | bytes.foreach { i => hash = 31 * hash + i } 46 | hash 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /util/src/main/scala/unicorn/util/Config.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.util 18 | 19 | import com.typesafe.config.ConfigFactory 20 | 21 | /** 22 | * @author Haifeng Li 23 | */ 24 | object Config { 25 | 26 | def config = { 27 | val configNamespace = "unicorn" 28 | ConfigFactory.load().getConfig(configNamespace) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /util/src/main/scala/unicorn/util/Logging.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.util 18 | 19 | import org.slf4j.LoggerFactory 20 | 21 | /** 22 | * @author Haifeng Li 23 | */ 24 | trait Logging { 25 | 26 | lazy val log = LoggerFactory.getLogger(getClass) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /util/src/main/scala/unicorn/util/Using.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn.util 18 | 19 | import java.io.Closeable 20 | 21 | /** 22 | * @author Haifeng Li 23 | */ 24 | object Using { 25 | def apply[S <: Closeable, T](resource: S)(use: S => T): T = { 26 | try { 27 | use(resource) 28 | } 29 | finally { 30 | if (resource != null) resource.close 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /util/src/main/scala/unicorn/util/package.scala: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * (C) Copyright 2015 ADP, LLC. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | *******************************************************************************/ 16 | 17 | package unicorn 18 | 19 | import java.nio.ByteBuffer 20 | import java.nio.charset.Charset 21 | import java.time.format.DateTimeFormatter 22 | 23 | /** 24 | * Utility functions. 25 | * 26 | * @author Haifeng Li 27 | */ 28 | package object util { 29 | 30 | val iso8601DateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") 31 | val iso8601DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss[.SSS]['Z']") 32 | 33 | val utf8 = Charset.forName("UTF-8") 34 | 35 | implicit def boxByteArray(x: Array[Byte]) = new ByteArray(x) 36 | implicit def unboxByteArray(x: ByteArray) = x.bytes 37 | implicit def string2Bytes(x: String) = x.getBytes(utf8) 38 | implicit def string2ByteArray(x: String) = new ByteArray(x.getBytes(utf8)) 39 | implicit def bytesSeq2ByteArray(x: Seq[Array[Byte]]) = x.map { bytes => new ByteArray(bytes) } 40 | implicit def stringSeq2ByteArray(x: Seq[String]) = x.map { s => new ByteArray(s.getBytes(utf8)) } 41 | 42 | /** Measure running time of a function/block. */ 43 | def time[A](f: => A) = { 44 | val s = System.nanoTime 45 | val ret = f 46 | println("time: " + (System.nanoTime - s)/1e6 + " ms") 47 | ret 48 | } 49 | 50 | /** Helper function convert ByteBuffer to Array[Byte]. */ 51 | implicit def byteBuffer2ArrayByte(buffer: ByteBuffer): Array[Byte] = { 52 | val bytes = new Array[Byte](buffer.position) 53 | buffer.position(0) 54 | buffer.get(bytes) 55 | bytes 56 | } 57 | 58 | /** Helper function convert ByteBuffer to ByteArray. */ 59 | implicit def byteBuffer2ByteArray(buffer: ByteBuffer): ByteArray = ByteArray(byteBuffer2ArrayByte(buffer)) 60 | 61 | /** Byte array to hexadecimal string. */ 62 | def bytes2Hex(bytes: Array[Byte]): String = { 63 | bytes.map("%02X" format _).mkString 64 | } 65 | 66 | /** Hexadecimal string to byte array. */ 67 | def hex2Bytes(s: String): Array[Byte] = { 68 | require(s.length % 2 == 0, "Hexadecimal string must contain an even number of characters") 69 | 70 | val bytes = new Array[Byte](s.length / 2) 71 | for (i <- 0 until s.length by 2) { 72 | bytes(i/2) = java.lang.Integer.parseInt(s.substring(i, i+2), 16).toByte 73 | } 74 | bytes 75 | } 76 | 77 | val md5Encoder = java.security.MessageDigest.getInstance("MD5") 78 | 79 | /** MD5 hash function */ 80 | def md5(bytes: Array[Byte]) = md5Encoder.digest(bytes) 81 | 82 | /** Byte array ordering */ 83 | def compareByteArray(x: Array[Byte], y: Array[Byte]): Int = { 84 | val n = Math.min(x.length, y.length) 85 | for (i <- 0 until n) { 86 | val a: Int = x(i) & 0xFF 87 | val b: Int = y(i) & 0xFF 88 | if (a != b) return a - b 89 | } 90 | x.length - y.length 91 | } 92 | 93 | /** Left pad a String with a specified character. 94 | * 95 | * @param str the String to pad out, may be null 96 | * @param size the size to pad to 97 | * @param padChar the character to pad with 98 | * @return left padded String or original String if no padding is necessary, 99 | * null if null String input 100 | */ 101 | def leftPad(str: String, size: Int, padChar: Char = ' '): String = { 102 | if (str == null) 103 | return null 104 | 105 | val pads = size - str.length 106 | if (pads <= 0) 107 | return str // returns original String when possible 108 | 109 | return (String.valueOf(padChar) * pads).concat(str) 110 | } 111 | 112 | /** Right pad a String with a specified character. 113 | * 114 | * @param str the String to pad out, may be null 115 | * @param size the size to pad to 116 | * @param padChar the character to pad with 117 | * @return left padded String or original String if no padding is necessary, 118 | * null if null String input 119 | */ 120 | def rightPad(str: String, size: Int, padChar: Char = ' '): String = { 121 | if (str == null) 122 | return null 123 | 124 | val pads = size - str.length 125 | if (pads <= 0) 126 | return str // returns original String when possible 127 | 128 | return str.concat(String.valueOf(padChar) * pads) 129 | } 130 | } 131 | --------------------------------------------------------------------------------