21 |
22 |
23 |
--------------------------------------------------------------------------------
/rhino/src/templates/debian/systemv:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | ### BEGIN INIT INFO
3 | # Provides: Unicorn Rhino
4 | # Required-Start: $local_fs $remote_fs $network
5 | # Required-Stop: $local_fs $remote_fs $network
6 | # Should-Start: $named
7 | # Should-Stop: $named
8 | # Default-Start: 2 3 4 5
9 | # Default-Stop: 0 1 6
10 | # Short-Description: Unicorn REST API
11 | # Description: Control the rhino daemon.
12 | ### END INIT INFO
13 |
14 | set -e
15 |
16 | if [ -z "${JAVA_HOME}" ]; then
17 | JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:/bin/java::")
18 | fi
19 | JAVA_OPTS="-Xms1024m -Xmx2048m"
20 |
21 | APP=${{app_name}}
22 |
23 | PID=/var/run/${APP}.pid
24 | OUT_LOG=/var/log/${APP}/${APP}_out.log
25 | ERR_LOG=/var/log/${APP}/${APP}_err.log
26 |
27 | DAEMON_USER=${{daemon_user}}
28 |
29 | APP_HOME=/opt/${APP}
30 | APP_CLASSPATH=${{app_classpath}}
31 | APP_CLASS=${{app_main_class}}
32 | APP_LOG_CONFIG=${APP_HOME}/conf/logback.xml
33 | APP_CONFIG=${APP_HOME}/conf/application.conf
34 |
35 | if [ -n "$APP_LOG_CONFIG}" ]; then
36 | JAVA_OPTS="-Dlogback.configurationFile=${APP_LOG_CONFIG} ${JAVA_OPTS}"
37 | fi
38 |
39 | DAEMON_ARGS="-home ${JAVA_HOME} -Dconfig.file=${APP_CONFIG} ${JAVA_OPTS} -pidfile ${PID}"
40 | DAEMON_ARGS="$DAEMON_ARGS -user ${DAEMON_USER} -outfile ${OUT_LOG} -errfile ${ERR_LOG}"
41 | DAEMON_ARGS="$DAEMON_ARGS -cp ${APP_CLASSPATH} ${APP_CLASS}"
42 |
43 | . /lib/lsb/init-functions
44 |
45 | case "$1" in
46 | start)
47 | log_daemon_msg "Starting ${APP}"
48 | cd ${APP_HOME} && jsvc ${DAEMON_ARGS}
49 | log_end_msg 0
50 | ;;
51 | stop)
52 | log_daemon_msg "Stopping ${APP}"
53 | cd ${APP_HOME} && jsvc -stop ${DAEMON_ARGS}
54 | log_end_msg 0
55 | ;;
56 | *)
57 | log_success_msg "Usage: {start|stop}"
58 | echo "Usage: {start|stop}"
59 | exit 1
60 | ;;
61 | esac
62 |
63 | exit 0
--------------------------------------------------------------------------------
/rhino/src/templates/rpm/systemd:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Unicorn REST API
3 | Documentation=https://github.com/haifengl/unicorn
4 | After=network.target remote-fs.target nss-lookup.target nginx.service
5 | Wants=nginx.service
6 |
7 | [Service]
8 | Type=simple
9 | User=unicorn
10 | Group=unicorn
11 | WorkingDirectory=/opt/${{app_name}}
12 | UMask=0022
13 | ExecStartPre=-/usr/bin/rm -f /opy/${{app_name}}/${{app_name}}.pid
14 | ExecStart=/opt/${{app_name}}/bin/${{exec}}
15 | ExecStop=/bin/kill -s QUIT $MAINPID
16 | PrivateTmp=true
17 | Restart=on-failure
18 | RestartSec=2
19 |
20 | [Install]
21 | WantedBy=multi-user.target
22 |
--------------------------------------------------------------------------------
/rhino/src/test/scala/unicorn/rhino/RhinoSpec.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.rhino
18 |
19 | import org.specs2.mutable.{BeforeAfter, Specification}
20 | import spray.testkit.Specs2RouteTest
21 | import spray.http.{HttpEntity, HttpRequest}
22 | import spray.http.HttpMethods._
23 | import spray.http.StatusCodes._
24 |
25 | import unicorn.json._
26 | import unicorn.bigtable.hbase.HBase
27 | import unicorn.unibase.Unibase
28 |
29 | class RhinoSpec extends Specification with Specs2RouteTest with BeforeAfter with Rhino {
30 | // makes test execution sequential and prevents conflicts that may occur when the data is
31 | // changed simultaneously in the database
32 | sequential
33 |
34 | val table = "unicorn_rhino_test"
35 | val db = new Unibase(HBase())
36 |
37 | var key: JsValue = JsUndefined
38 |
39 | override def before = {
40 | db.createTable(table)
41 | val bucket = db(table)
42 | key = bucket.upsert(json)
43 | }
44 |
45 | override def after= {
46 | db.dropTable(table)
47 | }
48 |
49 | // connects the DSL to the test ActorSystem
50 | override def actorRefFactory = system
51 |
52 | val json = JsonParser(
53 | """
54 | |{
55 | | "owner": "Rich",
56 | | "phone": "123-456-7890",
57 | | "address": {
58 | | "street": "1 ADP Blvd.",
59 | | "city": "Roseland",
60 | | "state": "NJ"
61 | | },
62 | | "store": {
63 | | "book": [
64 | | {
65 | | "category": "reference",
66 | | "author": "Nigel Rees",
67 | | "title": "Sayings of the Century",
68 | | "price": 8.95
69 | | },
70 | | {
71 | | "category": "fiction",
72 | | "author": "Evelyn Waugh",
73 | | "title": "Sword of Honour",
74 | | "price": 12.99
75 | | },
76 | | {
77 | | "category": "fiction",
78 | | "author": "Herman Melville",
79 | | "title": "Moby Dick",
80 | | "isbn": "0-553-21311-3",
81 | | "price": 8.99
82 | | },
83 | | {
84 | | "category": "fiction",
85 | | "author": "J. R. R. Tolkien",
86 | | "title": "The Lord of the Rings",
87 | | "isbn": "0-395-19395-8",
88 | | "price": 22.99
89 | | }
90 | | ],
91 | | "bicycle": {
92 | | "color": "red",
93 | | "price": 19.95
94 | | }
95 | | }
96 | |}
97 | """.stripMargin).asInstanceOf[JsObject]
98 |
99 | val update = JsonParser(
100 | """
101 | | {
102 | | "$set": {
103 | | "owner": "Poor",
104 | | "gender": "M",
105 | | "store.book.0.price": 9.95
106 | | }
107 | | }
108 | """.stripMargin).asInstanceOf[JsObject]
109 |
110 | "Rhino" should {
111 | "post" in {
112 | HttpRequest(POST, s"/$table", entity = HttpEntity(json.toString)) ~> apiRoute ~> check {
113 | response.status === OK
114 | }
115 | }
116 |
117 | "put" in {
118 | HttpRequest(PUT, s"/$table", entity = HttpEntity(json.toString)) ~> apiRoute ~> check {
119 | json("_id") = java.util.UUID.randomUUID
120 | response.status === OK
121 | }
122 | }
123 |
124 | "patch" in {
125 | HttpRequest(PATCH, s"/$table", entity = HttpEntity(update.toString)) ~> apiRoute ~> check {
126 | response.status === OK
127 | }
128 | }
129 |
130 | "delete" in {
131 | Delete(s"/$table/$key") ~> apiRoute ~> check {
132 | response.status === OK
133 | }
134 | }
135 |
136 | "get" in {
137 | Get(s"/$table/$key") ~> apiRoute ~> check {
138 | JsonParser(responseAs[String]) === json
139 | }
140 | }
141 | }
142 | }
143 |
--------------------------------------------------------------------------------
/rhino/src/universal/conf/rhino.ini:
--------------------------------------------------------------------------------
1 | # Setting -X directly (-J is stripped)
2 | # -J-X
3 | -J-Xmx4096M
4 | -J-Xms1024M
5 |
6 | # Add additional jvm parameters
7 | -J-server
8 |
9 | # Performance optimization
10 | -J-XX:+AggressiveOpts
11 |
12 | # G1 garbage collector
13 | -J-XX:+UseG1GC
14 |
15 | # Optimize string duplication, which happens a lot when parsing a data file
16 | -J-XX:+UseStringDeduplication
17 |
18 | # Turn on JVM debugging, open at the given port
19 | # -jvm-debug
20 |
21 | # Don't run the java version check
22 | # -no-version-check
23 |
24 | # enabling debug and sending -d as app argument
25 | # the '--' prevents app-parameter swallowing when
26 | # using a reserved parameter. See #184
27 | # -d -- -d
--------------------------------------------------------------------------------
/rocksdb/build.sbt:
--------------------------------------------------------------------------------
1 | name := "unicorn-rocksdb"
2 |
3 | libraryDependencies += "org.rocksdb" % "rocksdbjni" % "4.5.1"
4 |
5 |
--------------------------------------------------------------------------------
/rocksdb/src/main/scala/unicorn/bigtable/rocksdb/RocksDB.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.bigtable.rocksdb
18 |
19 | import java.io.File
20 | import java.util.Properties
21 | import org.rocksdb.{ColumnFamilyDescriptor, Options}
22 | import unicorn.bigtable.Database
23 |
24 | /** RocksDB abstraction. RocksDB is an embeddable persistent key-value store
25 | * for fast storage. There is no concept of tables in RocksDB. In fact, a
26 | * RocksDB is like a table in HBase. In this class, we create a higher level
27 | * concept of database that contains multiple RocksDB databases in a directory.
28 | * Each RocksDB is actually a subdirectory, which is encapsulated in RocksTable.
29 | *
30 | * @author Haifeng Li
31 | */
32 | class RocksDB(val path: String) extends Database[RocksTable] {
33 | val dir = new File(path)
34 | require(dir.exists, s"Directory $path doesn't exist")
35 |
36 | override def close: Unit = ()
37 |
38 | override def apply(name: String): RocksTable = {
39 | new RocksTable(s"$path/$name")
40 | }
41 |
42 | override def tables: Set[String] = {
43 | dir.listFiles.filter(_.isDirectory).map(_.getName).toSet
44 | }
45 |
46 | /** The parameter props is ignored. */
47 | override def createTable(name: String, props: Properties, families: String*): RocksTable = {
48 | val options = new Options
49 | options.setCreateIfMissing(true)
50 | options.setErrorIfExists(true)
51 | options.setCreateMissingColumnFamilies(false)
52 |
53 | val rocksdb = org.rocksdb.RocksDB.open(options, s"$path/$name")
54 | families.foreach { family =>
55 | val descriptor = new ColumnFamilyDescriptor(family.getBytes)
56 | rocksdb.createColumnFamily(descriptor)
57 | }
58 |
59 | rocksdb.close
60 | new RocksTable(s"$path/$name")
61 | }
62 |
63 | override def dropTable(name: String): Unit = {
64 | new File(s"$path/$name").delete
65 | }
66 |
67 | override def truncateTable(name: String): Unit = {
68 | throw new UnsupportedOperationException("RocksDB doesn't support truncateTable")
69 | }
70 |
71 | override def tableExists(name: String): Boolean = {
72 | val options = new Options().setCreateIfMissing(false)
73 | try {
74 | org.rocksdb.RocksDB.open(options, s"$path/$name")
75 | true
76 | } catch {
77 | case _: Exception => false
78 | }
79 | }
80 |
81 | override def compactTable(name: String): Unit = {
82 | org.rocksdb.RocksDB.open(s"$path/$name").compactRange
83 | }
84 | }
85 |
86 | object RocksDB {
87 |
88 | /** Creates a RocksDB database.
89 | *
90 | * @param path path to database.
91 | */
92 | def create(path: String): RocksDB = {
93 | val dir = new java.io.File(path)
94 | require(!dir.exists, s"Directory $path exists")
95 |
96 | dir.mkdir
97 | new RocksDB(path)
98 | }
99 |
100 | def apply(path: String): RocksDB = {
101 | new RocksDB(path)
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/rocksdb/src/test/scala/unicorn/bigtable/rocksdb/RocksDBSpec.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.bigtable.rocksdb
18 |
19 | import org.specs2.mutable._
20 | import org.specs2.specification.BeforeAfterAll
21 | import unicorn.bigtable._
22 | import unicorn.util._
23 |
24 | /**
25 | * @author Haifeng Li
26 | */
27 | class RocksDBSpec extends Specification with BeforeAfterAll {
28 | // Make sure running examples one by one.
29 | // Otherwise, test cases on same columns will fail due to concurrency
30 | sequential
31 | val db = RocksDB.create("/tmp/unicorn-rocksdb")
32 | val tableName = "unicorn_test"
33 | var table: RocksTable = null
34 |
35 | override def beforeAll = {
36 | table = db.createTable(tableName, "cf1", "cf2")
37 | }
38 |
39 | override def afterAll = {
40 | if (table != null) table.close
41 | new java.io.File("/tmp/unicorn-rocksdb").delete
42 | }
43 |
44 | "RocksDB" should {
45 | "get the put" in {
46 | table.put("row1", "cf1", "c1", "v1", 0L)
47 | new String(table("row1", "cf1", "c1").get, utf8) === "v1"
48 | table.delete("row1", "cf1", "c1")
49 | table("row1", "cf1", "c1".getBytes(utf8)) === None
50 | }
51 |
52 | "get the family" in {
53 | table.put("row1", "cf1", Column("c1", "v1"), Column("c2", "v2"))
54 | val columns = table.get("row1", "cf1")
55 | columns.size === 2
56 | new String(columns(0).value, utf8) === "v1"
57 | new String(columns(1).value, utf8) === "v2"
58 |
59 | table.delete("row1", "cf1")
60 | val empty = table.get("row1", "cf1")
61 | empty.size === 0
62 | }
63 |
64 | "get empty family" in {
65 | val columns = table.get("row1", "cf1")
66 | columns.size === 0
67 | }
68 |
69 | "get nonexistent family" in {
70 | table.get("row1", "cf5") must throwA[java.util.NoSuchElementException]
71 | }
72 |
73 | "get the row" in {
74 | table.put("row1", Seq(
75 | ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))),
76 | ColumnFamily("cf2", Seq(Column("c3", "v3"))))
77 | )
78 | val families = table.get("row1")
79 | families.size === 2
80 | families(0).columns.size === 2
81 | families(1).columns.size === 1
82 | families(0).family === "cf1"
83 | families(1).family === "cf2"
84 |
85 | new String(families(0).columns(0).value, utf8) === "v1"
86 | new String(families(0).columns(1).value, utf8) === "v2"
87 | new String(families(1).columns(0).value, utf8) === "v3"
88 |
89 | table.delete("row1", "cf1")
90 | val cf1 = table.get("row1", "cf1")
91 | cf1.size === 0
92 |
93 | table.get("row1").size === 1
94 | val cf2 = table.get("row1", "cf2")
95 | cf2.size === 1
96 |
97 | table.delete("row1")
98 | table.get("row1").size === 0
99 | }
100 |
101 | "get nonexistent row" in {
102 | val families = table.get("row5")
103 | families.size === 0
104 | }
105 |
106 | "get multiple rows" in {
107 | val row1 = Row("row1",
108 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2"))),
109 | ColumnFamily("cf2", Seq(Column("c3", "v3")))))
110 |
111 | val row2 = Row("row2".getBytes(utf8),
112 | Seq(ColumnFamily("cf1", Seq(Column("c1", "v1"), Column("c2", "v2")))))
113 |
114 | table.putBatch(row1, row2)
115 |
116 | val keys = Seq("row1", "row2")
117 | val rows = table.getBatch(keys)
118 | rows.size === 2
119 | rows(0).families.size === 2
120 | rows(1).families.size === 1
121 |
122 | table.deleteBatch(keys)
123 | table.getBatch(keys).size === 0
124 | }
125 | }
126 | }
127 |
--------------------------------------------------------------------------------
/search/build.sbt:
--------------------------------------------------------------------------------
1 | name := "unicorn-search"
2 |
3 | libraryDependencies += "com.github.haifengl" % "smile-nlp" % "1.0.3"
4 |
5 |
--------------------------------------------------------------------------------
/search/src/main/scala/unicorn/search/TextIndex.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.search
18 |
19 | import smile.nlp.stemmer.Stemmer
20 |
21 | /**
22 | * Text inverted index for full text search and relevance ranking.
23 | *
24 | * @author Haifeng Li
25 | */
26 | trait TextIndex {
27 | val TextIndexFamily = "text_index"
28 | val TermIndexSuffix = " idx"
29 | val TermTitleIndexSuffix = " tidx"
30 | val TermAnchorIndexSuffix = " aidx"
31 | val TermPositionSuffix = " pos"
32 | val DocFieldSeparator = "##"
33 |
34 | val CorpusMetaKey = "unicorn.text.corpus.meta"
35 | val TextBodyLengthKey = "unicorn.text.corpus.text.size"
36 | val TextTitleLengthKey = "unicorn.text.corpus.text.title.size"
37 | val TextAnchorLengthKey = "unicorn.text.corpus.text.anchor.size"
38 | val PageRankKey = "unicorn.text.corpus.text.page_rank"
39 | val BrowseRankKey = "unicorn.text.corpus.text.browse_rank"
40 |
41 | /**
42 | * Optional stemmer.
43 | */
44 | var stemmer: Option[Stemmer] = None
45 | }
46 |
--------------------------------------------------------------------------------
/search/src/main/scala/unicorn/search/TextIndexBuilder.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.search
18 |
19 | import unicorn._, json._
20 | import unicorn.core.Document
21 | import unicorn.store.Dataset
22 | import smile.nlp.tokenizer.SimpleTokenizer
23 | import smile.nlp.tokenizer.SimpleSentenceSplitter
24 | import smile.nlp.dictionary.EnglishStopWords
25 | import smile.nlp.dictionary.EnglishPunctuations
26 |
27 | /**
28 | * @author Haifeng Li
29 | */
30 | class TextIndexBuilder(storage: Dataset) extends TextIndex {
31 |
32 | val textLength = new Document(TextBodyLengthKey, TextIndexFamily)
33 | val titleLength = new Document(TextTitleLengthKey, TextIndexFamily)
34 | val anchorLength = new Document(TextAnchorLengthKey, TextIndexFamily)
35 |
36 | /**
37 | * Sentence splitter.
38 | */
39 | var sentenceSpliter = SimpleSentenceSplitter.getInstance
40 |
41 | /**
42 | * Tokenizer on sentences
43 | */
44 | var tokenizer = new SimpleTokenizer
45 |
46 | /**
47 | * Dictionary of stop words.
48 | */
49 | var stopWords = EnglishStopWords.DEFAULT
50 |
51 | /**
52 | * Punctuation.
53 | */
54 | var punctuations = EnglishPunctuations.getInstance
55 |
56 | /**
57 | * Process each token (after filtering stop words, numbers, and optional stemming).
58 | */
59 | def foreach[U](text: String)(f: ((String, Int)) => U): Unit = {
60 | var pos = 0
61 |
62 | sentenceSpliter.split(text).foreach { sentence =>
63 | tokenizer.split(sentence).foreach { token =>
64 | pos += 1
65 | val lower = token.toLowerCase
66 | if (!(punctuations.contains(lower) ||
67 | stopWords.contains(lower) ||
68 | lower.length == 1 ||
69 | lower.matches("[0-9\\.\\-\\+\\|\\(\\)]+"))) {
70 | val word = stemmer match {
71 | case Some(stemmer) => stemmer.stem(lower)
72 | case None => lower
73 | }
74 |
75 | f(word, pos)
76 | }
77 | }
78 |
79 | pos += 1
80 | }
81 | }
82 |
83 | /**
84 | * Add a text into index.
85 | * @param doc The id of document that owns the text.
86 | * @param field The filed name of text in the document.
87 | * @param text The text body.
88 | */
89 | private def add(doc: String, field: String, text: String, sizeDoc: Document, indexKeySuffix: String) {
90 | val termFreq = scala.collection.mutable.Map[String, Int]().withDefaultValue(0)
91 | //val termPos = scala.collection.mutable.Map[String, Array[Int]]().withDefaultValue(Array[Int]())
92 |
93 | var size = 0
94 | foreach(text) { case (word, pos) =>
95 | size += 1
96 | termFreq(word) += 1
97 | //termPos(word) :+ pos
98 | }
99 |
100 | val key = doc + DocFieldSeparator + field.replace(Document.FieldSeparator, DocFieldSeparator)
101 |
102 | sizeDoc(key) = JsInt(size)
103 |
104 | termFreq.foreach { case (word, freq) =>
105 | //TODO storage.put(word + indexKeySuffix, TextIndexFamily, key, freq)
106 | }
107 |
108 | /*
109 | termPos.foreach { case (word, pos) =>
110 | storage.put(word + TermPositionSuffix, TextIndexFamily, key, JsonBlobValue(pos).bytes)
111 | }
112 | */
113 |
114 | // termFreq and termPos updates will also be commit here.
115 | sizeDoc into storage
116 | }
117 |
118 | /**
119 | * Add a text into index.
120 | * @param doc The id of document that owns the text.
121 | * @param field The filed name of text in the document.
122 | * @param text The text body.
123 | */
124 | def add(doc: String, field: String, text: String) {
125 | add(doc, field, text, textLength, TermIndexSuffix)
126 | }
127 |
128 | /**
129 | * Add a title into index.
130 | * @param doc The id of document that owns the text.
131 | * @param field The filed name of text in the document.
132 | * @param title The title.
133 | */
134 | def addTitle(doc: String, field: String, title: String) {
135 | add(doc, field, title, titleLength, TermTitleIndexSuffix)
136 | }
137 |
138 | /**
139 | * Add an anchor text into index.
140 | * @param doc The id of document that owns the text.
141 | * @param field The filed name of text in the document.
142 | * @param anchor The anchor text.
143 | */
144 | def addAnchor(doc: String, field: String, anchor: String) {
145 | add(doc, field, anchor, anchorLength, TermAnchorIndexSuffix)
146 | }
147 | }
148 |
149 | object TextIndexBuilder {
150 | def apply(storage: Dataset): TextIndexBuilder = {
151 | new TextIndexBuilder(storage)
152 | }
153 | }
154 |
--------------------------------------------------------------------------------
/search/src/main/scala/unicorn/search/TextSearch.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.search
18 |
19 | import unicorn._, json._
20 | import unicorn.core.Document
21 | import unicorn.store.Dataset
22 | import smile.nlp.relevance.BM25
23 |
24 | /**
25 | * @author Haifeng Li
26 | */
27 | class TextSearch(storage: Dataset, numTexts: Long) extends TextIndex {
28 | val pagerank = new Document("unicorn.text.corpus.text.page_rank", "text_index").from(storage)
29 | val defaultPageRank = math.log(0.85 / numTexts)
30 |
31 | val textLength = new Document(TextBodyLengthKey, TextIndexFamily).from(storage)
32 | val titleLength = new Document(TextTitleLengthKey, TextIndexFamily).from(storage)
33 | val anchorLength = new Document(TextAnchorLengthKey, TextIndexFamily).from(storage)
34 |
35 | /**
36 | * Relevance ranking algorithm.
37 | */
38 | val ranker = new BM25
39 |
40 | /**
41 | * Search terms in corpus. The results are sorted by relevance.
42 | */
43 | def search(terms: String*): Array[((Document, String), Double)] = {
44 | val rank = scala.collection.mutable.Map[(Document, String), Double]().withDefaultValue(0.0)
45 | terms.foreach { term => search(term, rank) }
46 | rank.toArray.sortBy(_._2).reverse
47 | }
48 |
49 | def search(term: String, rank: scala.collection.mutable.Map[(Document, String), Double]) {
50 | val lower = term.toLowerCase
51 | val word = stemmer match {
52 | case Some(stemmer) => stemmer.stem(lower)
53 | case None => lower
54 | }
55 |
56 | val key = word + TermIndexSuffix
57 | val invertedText = new Document(word + TermIndexSuffix, TextIndexFamily).from(storage).loadAttributes
58 | if (invertedText.attributes.size == 0) return
59 |
60 | val invertedTitle = new Document(word + TermTitleIndexSuffix, TextIndexFamily).from(storage).loadAttributes
61 | val invertedAnchor = new Document(word + TermAnchorIndexSuffix, TextIndexFamily).from(storage).loadAttributes
62 |
63 | val docs = (invertedText.map { case (docField, value) => docField }).toSeq
64 | textLength.select(docs: _*)
65 | titleLength.select(docs: _*)
66 | anchorLength.select(docs: _*)
67 |
68 | var avgTextLength = 0.0
69 | var avgTitleLength = 0.0
70 | var avgAnchorLength = 0.0
71 |
72 | var numMatchedTexts = 0
73 | var numMatchedTitles = 0
74 | var numMatchedAnchors = 0
75 |
76 | invertedText.foreach { case (docField, value) =>
77 | val n1: Int = textLength(docField)
78 | if (n1 > 0) {
79 | numMatchedTexts += 1
80 | avgTextLength += n1
81 | }
82 |
83 | val n2: Int = titleLength(docField)
84 | if (n2 > 0) {
85 | numMatchedTitles += 1
86 | avgTitleLength += n2
87 | }
88 |
89 | val n3: Int = anchorLength(docField)
90 | if (n3 > 0) {
91 | numMatchedAnchors += 1
92 | avgAnchorLength += n3
93 | }
94 | }
95 |
96 | if (numMatchedTexts > 0) avgTextLength /= numMatchedTexts
97 | if (numMatchedTitles > 0) avgTitleLength /= numMatchedTitles
98 | if (numMatchedAnchors > 0) avgAnchorLength /= numMatchedAnchors
99 |
100 | pagerank.select(invertedText.map { case (docField, _) => docField }.toArray : _*)
101 |
102 | invertedText.foreach { case (docField, value) =>
103 | val id = docField.split(DocFieldSeparator, 2)
104 |
105 | if (id.length == 2) {
106 | val doc = Document(id(0)).from(storage)
107 | val field = id(1).replace(DocFieldSeparator, Document.FieldSeparator)
108 |
109 | val termFreq: Int = value
110 | val titleTermFreq: Int = invertedTitle(docField)
111 | val anchorTermFreq: Int = invertedAnchor(docField)
112 |
113 | val bm25 = ranker.score(termFreq, textLength(docField), avgTextLength,
114 | titleTermFreq, titleLength(docField), avgTitleLength,
115 | anchorTermFreq, anchorLength(docField), avgAnchorLength,
116 | numTexts, invertedText.size)
117 |
118 | val pr = pagerank(docField) match {
119 | case JsDouble(value) => math.log(value)
120 | case _ => defaultPageRank
121 | }
122 |
123 | rank((doc, field)) += (bm25 + pr)
124 | }
125 | }
126 | }
127 | }
128 |
129 | object TextSearch {
130 | def apply(storage: Dataset, numTexts: Long): TextSearch = {
131 | new TextSearch(storage, numTexts)
132 | }
133 | }
134 |
--------------------------------------------------------------------------------
/shell/build.sbt:
--------------------------------------------------------------------------------
1 | name := "unicorn-shell"
2 |
3 | mainClass in Compile := Some("unicorn.shell.Main")
4 |
5 | // native packager
6 | enablePlugins(JavaAppPackaging)
7 |
8 | maintainer := "Haifeng Li "
9 |
10 | packageName := "unicorn"
11 |
12 | packageSummary := "Unicorn"
13 |
14 | packageDescription := "Unicorn"
15 |
16 | executableScriptName := "unicorn"
17 |
18 | bashScriptConfigLocation := Some("${app_home}/../conf/unicorn.ini")
19 |
20 | bashScriptExtraDefines += """addJava "-Dsmile.home=${app_home}""""
21 |
22 | bashScriptExtraDefines += """addJava "-Dscala.repl.autoruncode=${app_home}/init.scala""""
23 |
24 | bashScriptExtraDefines += """addJava "-Dconfig.file=${app_home}/../conf/unicorn.conf""""
25 |
26 | // native packager Docker plugin
27 | enablePlugins(DockerPlugin)
28 |
29 | dockerBaseImage := "dajobe/hbase"
30 |
31 | packageName in Docker := "haifengl/unicorn"
32 |
33 | dockerUpdateLatest := true
34 |
35 | // BuildInfo
36 | enablePlugins(BuildInfoPlugin)
37 |
38 | buildInfoKeys := Seq[BuildInfoKey](name, version, scalaVersion, sbtVersion)
39 |
40 | buildInfoPackage := "unicorn.shell"
41 |
42 | buildInfoOptions += BuildInfoOption.BuildTime
43 |
44 | libraryDependencies += "org.scala-lang" % "scala-compiler" % "2.11.7"
45 |
46 | libraryDependencies += "org.slf4j" % "slf4j-simple" % "1.7.18"
47 |
--------------------------------------------------------------------------------
/shell/src/main/scala/unicorn/shell/Main.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.shell
18 |
19 | import scala.tools.nsc._, GenericRunnerCommand._, io.File
20 |
21 | /** An object that runs Smile script or interactive shell.
22 | * Based on Scala MainGenericRunner.
23 | *
24 | * @author Haifeng Li
25 | */
26 | object Main extends App {
27 |
28 | // This is actually the main function
29 | if (!process(args)) sys.exit(1)
30 |
31 | def errorFn(str: String, e: Option[Throwable] = None, isFailure: Boolean = true): Boolean = {
32 | if (str.nonEmpty) Console.err println str
33 | e foreach (_.printStackTrace())
34 | !isFailure
35 | }
36 |
37 | def process(args: Array[String]): Boolean = {
38 | val command = new GenericRunnerCommand(args.toList, (x: String) => errorFn(x))
39 | import command.{ settings, howToRun, thingToRun, shortUsageMsg, shouldStopWithInfo }
40 | settings.usejavacp.value = true
41 | settings.deprecation.value = true
42 | def sampleCompiler = new Global(settings) // def so it's not created unless needed
43 |
44 | def run(): Boolean = {
45 | def isE = !settings.execute.isDefault
46 | def dashe = settings.execute.value
47 |
48 | def isI = !settings.loadfiles.isDefault
49 | def dashi = settings.loadfiles.value
50 |
51 | // Deadlocks on startup under -i unless we disable async.
52 | if (isI)
53 | settings.Yreplsync.value = true
54 |
55 | def combinedCode = {
56 | val files = if (isI) dashi map (file => File(file).slurp()) else Nil
57 | val str = if (isE) List(dashe) else Nil
58 |
59 | files ++ str mkString "\n\n"
60 | }
61 |
62 | def runTarget(): Either[Throwable, Boolean] = howToRun match {
63 | case AsObject =>
64 | ObjectRunner.runAndCatch(settings.classpathURLs, thingToRun, command.arguments)
65 | case AsScript =>
66 | ScriptRunner.runScriptAndCatch(settings, thingToRun, command.arguments)
67 | case Error =>
68 | Right(false)
69 | case _ =>
70 | Right(new Shell process settings)
71 | }
72 |
73 | /** If -e and -i were both given, we want to execute the -e code after the
74 | * -i files have been included, so they are read into strings and prepended to
75 | * the code given in -e. The -i option is documented to only make sense
76 | * interactively so this is a pretty reasonable assumption.
77 | *
78 | * This all needs a rewrite though.
79 | */
80 | if (isE) {
81 | ScriptRunner.runCommand(settings, combinedCode, thingToRun +: command.arguments)
82 | }
83 | else runTarget() match {
84 | case Left(ex) => errorFn("", Some(ex)) // there must be a useful message of hope to offer here
85 | case Right(b) => b
86 | }
87 | }
88 |
89 | if (!command.ok)
90 | errorFn(f"%n$shortUsageMsg")
91 | else if (shouldStopWithInfo)
92 | errorFn(command getInfoMessage sampleCompiler, isFailure = false)
93 | else
94 | run()
95 | }
96 | }
--------------------------------------------------------------------------------
/shell/src/main/scala/unicorn/shell/Shell.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.shell
18 |
19 | import scala.tools.nsc.interpreter.ILoop
20 |
21 | /** Unicorn shell.
22 | *
23 | * @author Haifeng Li
24 | */
25 | class Shell extends ILoop {
26 | override def prompt = "unicorn> "
27 | override def printWelcome = echo(
28 | raw"""
29 | | . . . .
30 | | ,`,`,`,`,
31 | | . . . . `\`\`\`\;
32 | | `\`\`\`\`, ~|;!;!;\!
33 | | ~\;\;\;\|\ (--,!!!~`! .
34 | | (--,\\\===~\ (--,|||~`! ./
35 | | (--,\\\===~\ `,-,~,=,:. _,//
36 | | (--,\\\==~`\ ~-=~-.---|\;/J, Welcome to the Unicorn Database
37 | | (--,\\\((```==. ~'`~/ a | BigTable, Document and Graph
38 | | (-,.\\('('(`\\. ~'=~| \_. \ Full Text Search
39 | | (,--(,(,(,'\\. ~'=| \\_;>
40 | | (,-( ,(,(,;\\ ~=/ \ Haifeng Li
41 | | (,-/ (.(.(,;\\,/ ) ADP Innovation Lab
42 | | (,--/,;,;,;,\\ ./------.
43 | | (==,-;-'`;' /_,----`. \
44 | | ,.--_,__.-' `--. ` \
45 | | (='~-_,--/ , ,!,___--. \ \_)
46 | | (-/~( | \ ,_- | ) /_|
47 | | (~/((\ )\._, |-' _,/ /
48 | | \\)))) / ./~. | \_\;
49 | | ,__///// / / ) /
50 | | '===~' | | (, <.
51 | | / / \. \
52 | | _/ / \_\
53 | | /_!/ >_\
54 | |
55 | | Welcome to Unicorn Shell; enter ':help' for the list of commands.
56 | | Type ":quit" to leave the Unicorn Shell
57 | | Version ${BuildInfo.version}, Scala ${BuildInfo.scalaVersion}, SBT ${BuildInfo.sbtVersion}, Built at ${BuildInfo.builtAtString}
58 | |===============================================================================
59 | """.stripMargin
60 | )
61 | }
62 |
--------------------------------------------------------------------------------
/shell/src/universal/bin/init.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | import java.util.{Date, UUID}
18 | import unicorn.util._, unicorn.oid._, unicorn.json._
19 | import unicorn.bigtable.hbase.HBase
20 | import unicorn.bigtable.cassandra.Cassandra
21 | import unicorn.bigtable.accumulo.Accumulo
22 | import unicorn.bigtable.rocksdb.RocksDB
23 | import unicorn.unibase._
24 | import unicorn.unibase.graph._
25 | import unicorn.narwhal._
26 | import unicorn.sql._
27 |
28 |
--------------------------------------------------------------------------------
/shell/src/universal/conf/log4j.properties:
--------------------------------------------------------------------------------
1 | # Define some default values that can be overridden by system properties
2 | unicorn.root.logger=DEBUG,console,RFA
3 | unicorn.log.dir=/var/log/unicorn
4 | unicorn.log.file=unicorn.log
5 |
6 | # Define the root logger to the system property "unicorn.root.logger".
7 | log4j.rootLogger=${unicorn.root.logger}
8 |
9 | # Logging Threshold
10 | log4j.threshold=ALL
11 |
12 | #
13 | # Daily Rolling File Appender
14 | #
15 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
16 | log4j.appender.DRFA.File=${unicorn.log.dir}/${unicorn.log.file}
17 |
18 | # Rollver at midnight
19 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
20 |
21 | # 30-day backup
22 | #log4j.appender.DRFA.MaxBackupIndex=30
23 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
24 |
25 | # Pattern format: Date LogLevel LoggerName LogMessage
26 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
27 |
28 | # Rolling File Appender properties
29 | unicorn.log.maxfilesize=256MB
30 | unicorn.log.maxbackupindex=20
31 |
32 | # Rolling File Appender
33 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
34 | log4j.appender.RFA.File=${unicorn.log.dir}/${unicorn.log.file}
35 |
36 | log4j.appender.RFA.MaxFileSize=${unicorn.log.maxfilesize}
37 | log4j.appender.RFA.MaxBackupIndex=${unicorn.log.maxbackupindex}
38 |
39 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
40 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
41 |
42 |
43 | #
44 | # Null Appender
45 | #
46 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
47 |
48 | #
49 | # console
50 | # Add "console" to rootlogger above if you want to use this
51 | #
52 | log4j.appender.console=org.apache.log4j.ConsoleAppender
53 | log4j.appender.console.target=System.err
54 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
55 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
56 |
57 | # Custom Logging levels
58 | log4j.logger.kafka=INFO
59 | log4j.logger.org.apache.zookeeper=INFO
60 | log4j.logger.com.jayway.jsonpath=INFO
61 |
--------------------------------------------------------------------------------
/shell/src/universal/conf/unicorn.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adplabs/unicorn/48d35ddf17358c711d38fd685ace8e753c50a16c/shell/src/universal/conf/unicorn.conf
--------------------------------------------------------------------------------
/shell/src/universal/conf/unicorn.ini:
--------------------------------------------------------------------------------
1 | # Setting -X directly (-J is stripped)
2 | # -J-X
3 | -J-Xmx4096M
4 | -J-Xms1024M
5 |
6 | # Add additional jvm parameters
7 | -J-server
8 |
9 | # Performance optimization
10 | -J-XX:+AggressiveOpts
11 |
12 | # G1 garbage collector
13 | -J-XX:+UseG1GC
14 |
15 | # Optimize string duplication, which happens a lot when parsing a data file
16 | -J-XX:+UseStringDeduplication
17 |
18 | # Turn on JVM debugging, open at the given port
19 | # -jvm-debug
20 |
21 | # Don't run the java version check
22 | # -no-version-check
23 |
24 | # enabling debug and sending -d as app argument
25 | # the '--' prevents app-parameter swallowing when
26 | # using a reserved parameter. See #184
27 | # -d -- -d
--------------------------------------------------------------------------------
/shell/src/universal/examples/dbpedia.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | // import dbpedia to unibase
6 |
7 | import scala.io.Source
8 | import unicorn.json._
9 | import unicorn.bigtable.rocksdb._
10 | import unicorn.unibase._
11 | import unicorn.unibase.graph._
12 |
13 | val db = Unibase(RocksDB.create("/tmp/unicorn-dbpedia"))
14 | db.createGraph("dbpedia")
15 | val dbpedia = db.graph("dbpedia", new Snowflake(0))
16 |
17 | // Although we can parse .gz file directly, we don't support bz2 compressed files.
18 | // Please download and unzip first.
19 | dbpedia.rdf("http://downloads.dbpedia.org/2015-10/core-i18n/en/page_links_en.ttl.bz2")
--------------------------------------------------------------------------------
/shell/src/universal/examples/gods.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | import java.util._
6 | import unicorn.json._
7 | import unicorn.bigtable._
8 | import unicorn.bigtable.accumulo._
9 | import unicorn.unibase._
10 | import unicorn.unibase.idgen._
11 | import unicorn.unibase.graph._
12 |
13 | val db = Unibase(Accumulo())
14 | db.createGraph("gods")
15 | val gods = db.graph("gods", new Snowflake(0))
16 |
17 | val saturn = gods.addVertex(json"""{"label": "titan", "name": "saturn", "age": 10000}""")
18 | val sky = gods.addVertex(json"""{"label": "location", "name": "sky"}""")
19 | val sea = gods.addVertex(json"""{"label": "location", "name": "sea"}""")
20 | val jupiter = gods.addVertex(json"""{"label": "god", "name": "jupiter", "age": 5000}""")
21 | val neptune = gods.addVertex(json"""{"label": "god", "name": "neptune", "age": 4500}""")
22 | val hercules = gods.addVertex(json"""{"label": "demigod", "name": "hercules", "age": 30}""")
23 | val alcmene = gods.addVertex(json"""{"label": "human", "name": "alcmene", "age": 45}""")
24 | val pluto = gods.addVertex(json"""{"label": "god", "name": "pluto", "age": 4000}""")
25 | val nemean = gods.addVertex(json"""{"label": "monster", "name": "nemean"}""")
26 | val hydra = gods.addVertex(json"""{"label": "monster", "name": "hydra"}""")
27 | val cerberus = gods.addVertex(json"""{"label": "monster", "name": "cerberus"}""")
28 | val tartarus = gods.addVertex(json"""{"label": "location", "name": "tartarus"}""")
29 |
30 | gods.addEdge(jupiter, "father", saturn)
31 | gods.addEdge(jupiter, "lives", sky, json"""{"reason": "loves fresh breezes"}""")
32 | gods.addEdge(jupiter, "brother", neptune)
33 | gods.addEdge(jupiter, "brother", pluto)
34 |
35 | gods.addEdge(neptune, "lives", sea, json"""{"reason": "loves waves"}""")
36 | gods.addEdge(neptune, "brother", jupiter)
37 | gods.addEdge(neptune, "brother", pluto)
38 |
39 | gods.addEdge(hercules, "father", jupiter)
40 | gods.addEdge(hercules, "mother", alcmene)
41 | gods.addEdge(hercules, "battled", nemean, json"""{"time": 1, "place": {"latitude": 38.1, "longitude": 23.7}}""")
42 | gods.addEdge(hercules, "battled", hydra, json"""{"time": 2, "place": {"latitude": 37.7, "longitude": 23.9}}""")
43 | gods.addEdge(hercules, "battled", cerberus, json"""{"time": 12, "place": {"latitude": 39.0, "longitude": 22.0}}""")
44 |
45 | gods.addEdge(pluto, "brother", jupiter)
46 | gods.addEdge(pluto, "brother", neptune)
47 | gods.addEdge(pluto, "lives", tartarus, json"""{"reason": "no fear of death"}""")
48 | gods.addEdge(pluto, "pet", cerberus)
49 |
50 | gods.addEdge(cerberus, "lives", tartarus)
51 |
52 | gods(hydra)
53 | gods(hydra).in("battled")
54 | gods(hydra).out("battled")
55 | gods(neptune, "lives", sea)
56 | gods(neptune, "lives", jupiter)
57 | gods(neptune, "brother", jupiter)
58 |
59 | gods(hydra).label
60 |
61 | // Gremline
62 | val g = gods.traversal
63 | g.v(saturn).in("father").in("father").name
--------------------------------------------------------------------------------
/shell/src/universal/examples/json.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | import java.util._
6 | import unicorn.json._
7 |
8 | // Basic JSON
9 | val json =
10 | json"""
11 | {
12 | "store": {
13 | "book": [
14 | {
15 | "category": "reference",
16 | "author": "Nigel Rees",
17 | "title": "Sayings of the Century",
18 | "price": 8.95
19 | },
20 | {
21 | "category": "fiction",
22 | "author": "Evelyn Waugh",
23 | "title": "Sword of Honour",
24 | "price": 12.99
25 | },
26 | {
27 | "category": "fiction",
28 | "author": "Herman Melville",
29 | "title": "Moby Dick",
30 | "isbn": "0-553-21311-3",
31 | "price": 8.99
32 | },
33 | {
34 | "category": "fiction",
35 | "author": "J. R. R. Tolkien",
36 | "title": "The Lord of the Rings",
37 | "isbn": "0-395-19395-8",
38 | "price": 22.99
39 | }
40 | ],
41 | "bicycle": {
42 | "color": "red",
43 | "price": 19.95
44 | }
45 | }
46 | }
47 | """
48 |
49 | println(json("store")("bicycle")("color"))
50 | println(json.store.bicycle.color)
51 | println(json.store.book(0).author)
52 |
53 | json.store.bicycle.color = "green"
54 | println(json.store.bicycle.color)
55 |
56 | json("store")("book") remove 0
57 | println(json.store.book)
58 |
59 | val a = JsArray(1, 2, 3, 4)
60 | a += 5
61 | println(a)
62 |
63 | val b: JsArray = Array(1, 2, 3, 4)
64 | b ++= JsArray(5, 6)
65 | println(b)
66 |
67 | val obj = JsObject(
68 | "key1" -> JsObject(
69 | "key11" -> JsObject("tags" -> JsArray("alpha1", "beta1", "gamma1"))
70 | ),
71 | "key2" -> JsObject(
72 | "key21" -> JsObject("tags" -> JsArray("alpha2", "beta2", "gamma2"))
73 | ),
74 | "key3" -> "blabla"
75 | )
76 |
77 | // retrieve 1-level recursive path
78 | println(obj \\ "tags")
79 | // retrieve 2-level recursive path
80 | println(obj \ "key1" \\ "tags")
81 |
82 |
83 | // JsonPath
84 | val jsonPath = JsonPath(
85 | json"""
86 | {
87 | "id": 1,
88 | "name": "Joe",
89 | "tags": ["programmer", "husband", "father", "golfer"],
90 | "address": [
91 | {
92 | "id": 2,
93 | "street": "123 Main St.",
94 | "city": "Springfield",
95 | "state": "PA"
96 | },
97 | {
98 | "id": 3,
99 | "street": "456 Main St.",
100 | "city": "Devon",
101 | "state": "PA",
102 | "work": true
103 | },
104 | {
105 | "id": 4,
106 | "street": "789 Main St.",
107 | "city": "Sea Isle City",
108 | "state": "NJ"
109 | }
110 | ]
111 | }
112 | """)
113 |
114 | // field
115 | println(jsonPath("$.id"))
116 | println(jsonPath("$['id']"))
117 |
118 | // recursive field
119 | println(jsonPath("$..id"))
120 |
121 | // multi fields
122 | println(jsonPath("$['id', 'name']"))
123 |
124 | // any field
125 | println(jsonPath("$.*"))
126 | println(jsonPath("$.tags.*"))
127 | println(jsonPath("$['tags'].*"))
128 |
129 | // recursive any
130 | println(jsonPath("$..*"))
131 |
132 | // array slices
133 | println(jsonPath("$.tags[2]"))
134 | println(jsonPath("$.tags[0:3:2]"))
135 | println(jsonPath("$.tags[-2:]"))
136 | println(jsonPath("$.tags[:-2]"))
137 |
138 | // array random
139 | println(jsonPath("$.tags[0,2]"))
140 | println(jsonPath("$.tags[-1]"))
141 |
142 | // array recursive
143 | println(jsonPath("$.address[*].city"))
144 |
145 |
146 | // has filter
147 | println(jsonPath("$.address[?(@.work)]"))
148 |
149 |
150 | // comparison filter
151 | println(jsonPath("$.address[?(@.id < 3)]"))
152 | println(jsonPath("$.address[?(@.id <= 3)]"))
153 |
154 | println(jsonPath("$.address[?(@.id > 2)]"))
155 | println(jsonPath("$.address[?(@.id >= 2)]"))
156 |
157 | println(jsonPath("$.address[?(@.state == 'PA')]"))
158 | println(jsonPath("$.address[?(@.city == 'Springfield')]"))
159 | println(jsonPath("$.address[?(@.city != 'Devon')]"))
160 |
161 |
162 | // boolean filter
163 | println(jsonPath("$.address[?(@.id > 1 && @.state != 'PA')]"))
164 | println(jsonPath("$.address[?(@.id < 4 && @.state == 'PA')]"))
165 | println(jsonPath("$.address[?(@.id == 4 || @.state == 'PA')]"))
166 | println(jsonPath("$.address[?(@.id == 4 || @.state == 'NJ')]"))
167 |
168 | // update field of nonexistent object
169 | jsonPath("$.person.id") = 10
170 | println(jsonPath("$.person"))
171 | println(jsonPath("$.person.id"))
172 |
173 |
174 | // update multi fields of nonexistent object
175 | jsonPath("$['person']['id', 'name']") = 30
176 | println(jsonPath("$.person"))
177 | println(jsonPath("$['person']['id', 'name']"))
178 |
179 |
180 | // update array slices of nonexistent object
181 | jsonPath("$.person.tags[1:3]") = "father"
182 | println(jsonPath("$.person.tags"))
183 |
184 |
185 | // update array random of nonexistent object
186 | jsonPath("$.person.tags[2]") = "father"
187 | println(jsonPath("$.person.tags"))
188 |
--------------------------------------------------------------------------------
/shell/src/universal/examples/pagerank.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc -J-Xmx8192M "$0" "$@"
3 | !#
4 |
5 | import scala.io.Source
6 |
7 | def triple(line: String): (String, String, String) = {
8 | val tokens = line.split(" ", 3)
9 | val subject = java.net.URLDecoder.decode(tokens(0).replace("", "")
10 | val predicate = tokens(1)
11 | var obj = tokens(2)
12 |
13 | (subject, predicate, obj)
14 | }
15 |
16 | val pages = scala.collection.mutable.Map[String, Int]()
17 | Source.fromFile("../../data/dbpedia/long_abstracts_en.nt").getLines.foreach { line =>
18 | if (!line.startsWith("#")) {
19 | val nt = triple(line)
20 | if (!pages.contains(nt._1)) {
21 | pages(nt._1) = -1
22 | }
23 | }
24 | }
25 |
26 | var pageIndex = 0
27 | Source.fromFile("../../data/dbpedia/page_links_en.nt").getLines.foreach { line =>
28 | if (!line.startsWith("#")) {
29 | val nt = triple(line)
30 | if (pages.contains(nt._1) && pages(nt._1) == -1) {
31 | pages(nt._1) = pageIndex
32 | pageIndex += 1
33 | }
34 | }
35 | }
36 |
37 | val d = 0.85
38 | val n = pageIndex
39 | val len = 172308908
40 | val colIndex = new Array[Int](n + 1)
41 | val rowIndex = new Array[Int](len)
42 | val value = new Array[Double](len)
43 |
44 | var k = 0
45 | var col = 0
46 | colIndex(0) = 0
47 | Source.fromFile("../../data/dbpedia/page_links_en.nt").getLines.foreach { line =>
48 | if (!line.startsWith("#")) {
49 | val nt = triple(line)
50 | val source = nt._1
51 | val sink = java.net.URLDecoder.decode(nt._3.replace(" .", "")
52 | if (pages.contains(source) && pages.contains(sink)) {
53 | val j = pages(source)
54 | val i = pages(sink)
55 | if (i != -1 && j != -1) {
56 | value(k) = 1.0
57 | rowIndex(k) = i
58 | if (j < col) {
59 | println("smaller col index", j, col, "skip")
60 | } else {
61 | if (j > col) {
62 | (col+1) to j foreach { idx => colIndex(idx) = k }
63 | col = j
64 | }
65 |
66 | k += 1
67 | if (k % 1000000 == 0) println("build links matrix", k)
68 | }
69 | }
70 | }
71 | }
72 | }
73 | colIndex(col+1) = k
74 |
75 | 0 until n foreach { idx =>
76 | val l = colIndex(idx+1) - colIndex(idx)
77 | if (l > 0) colIndex(idx) until colIndex(idx+1) foreach { i => value(i) = value(i) / l}
78 | }
79 |
80 | val matrix = new smile.math.matrix.SparseMatrix(n, n, value.slice(0,k), rowIndex.slice(0,k), colIndex)
81 | val rank = smile.math.matrix.EigenValueDecomposition.pagerank(matrix)
82 |
83 | val pagerank = pages.toSeq.filter(_._2 != -1).map { case (page, index) =>
84 | (page, rank(index))
85 | }.sortBy(-_._2)
86 |
87 | pagerank.foreach { case (page, rank) =>
88 | println(page, rank)
89 | }
90 |
--------------------------------------------------------------------------------
/shell/src/universal/examples/rhino.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo -e "PUT"
4 | curl -X PUT -H "Content-Type: application/json" -d '{"_id":"dude","username":"xyz","password":"xyz"}' http://localhost:8080/unicorn_rhino_test
5 |
6 | echo -e "\nPUT again"
7 | curl -X PUT -H "Content-Type: application/json" -d '{"_id":"dude","username":"xyz","password":"xyz"}' http://localhost:8080/unicorn_rhino_test
8 |
9 | echo -e "\nGET"
10 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude
11 |
12 | echo -e "\nPOST"
13 | curl -X POST -H "Content-Type: application/json" -d '{"_id":"dude","username":"dude","password":"xyz"}' http://localhost:8080/unicorn_rhino_test
14 |
15 | echo -e "\GET"
16 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude
17 |
18 | echo -e "\nPATCH"
19 | curl -X PATCH -H "Content-Type: application/json" -d '{"_id":"dude","$set":{"password":"abc"}}' http://localhost:8080/unicorn_rhino_test
20 |
21 | echo -e "\nGET"
22 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude
23 |
24 | echo -e "\nDELETE"
25 | curl -X DELETE http://localhost:8080/unicorn_rhino_test/dude
26 |
27 | echo -e "\nGET"
28 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude
29 |
30 | echo -e "\nPUT IBM"
31 | curl -X PUT -H "Content-Type: application/json" --header 'tenant: "IBM"' -d '{"_id":"dude","username":"xyz","password":"xyz"}' http://localhost:8080/unicorn_rhino_test
32 |
33 | echo -e "\nGET IBM"
34 | curl -X GET --header 'tenant: "IBM"' http://localhost:8080/unicorn_rhino_test/dude
35 |
36 | echo -e "\nGET MSFT"
37 | curl -X GET --header 'tenant: "MSFT"' http://localhost:8080/unicorn_rhino_test/dude
38 |
39 | echo -e "\nGET NONE"
40 | curl -X GET http://localhost:8080/unicorn_rhino_test/dude
41 |
42 | echo -e "\nDELETE"
43 | curl -X DELETE --header 'tenant: "IBM"' http://localhost:8080/unicorn_rhino_test/dude
44 |
45 | echo -e "\nGET"
46 | curl -X GET --header 'tenant: "IBM"' http://localhost:8080/unicorn_rhino_test/dude
47 |
48 |
--------------------------------------------------------------------------------
/shell/src/universal/examples/spark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | import java.util._
6 | import unicorn.json._
7 | import unicorn.bigtable._
8 | import unicorn.bigtable.hbase._
9 | import unicorn.unibase._
10 | import unicorn.narwhal._
11 | import unicorn.graph._
12 | import org.apache.spark._
13 | import org.apache.spark.graphx._
14 | import org.apache.spark.rdd.RDD
15 |
16 | val conf = new SparkConf().setAppName("unicorn").setMaster("local[4]")
17 | val sc = new SparkContext(conf)
18 | val db = new Narwhal(HBase())
19 |
20 | val table = db("worker")
21 | table.tenant = "IBM"
22 | val rdd = table.rdd(sc)
23 | rdd.count()
24 |
25 | val table = db("narwhal")
26 | val rdd = table.rdd(sc, json"""
27 | {
28 | "$$or": [
29 | {
30 | "age": {"$$gt": 30}
31 | },
32 | {
33 | "state": "NJ"
34 | }
35 | ]
36 | }
37 | """)
38 | rdd.count()
39 |
40 |
41 | val sqlContext = new org.apache.spark.sql.SQLContext(sc)
42 | import sqlContext.implicits._
43 |
44 | case class Worker(name: String, age: Int)
45 | val workers = rdd.map { js => Worker(js.name, js.age) }
46 | val df = sqlContext.createDataFrame(workers)
47 | df.show
48 |
49 | df.registerTempTable("worker")
50 | sqlContext.sql("SELECT * FROM worker WHERE age > 30").show
--------------------------------------------------------------------------------
/shell/src/universal/examples/sql.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | import unicorn.json._
6 | import unicorn.bigtable.hbase.HBase
7 | import unicorn.unibase._
8 | import unicorn.sql.hunibase2SQL
9 |
10 | val hbase = Narwhal(HBase())
11 | hbase.createTable("unicorn_sql")
12 | val table = hbase("unicorn_sql")
13 |
14 | val haifeng = JsObject(
15 | "name" -> "Haifeng",
16 | "salary" -> 1
17 | )
18 |
19 | val roberto = JsObject(
20 | "name" -> "Roberto",
21 | "salary" -> 3
22 | )
23 |
24 | val jerome = JsObject(
25 | "name" -> "Jerome",
26 | "salary" -> 2
27 | )
28 |
29 | val keith = JsObject(
30 | "name" -> "Keith",
31 | "salary" -> 2
32 | )
33 |
34 | val amit = JsObject(
35 | "name" -> "Amit",
36 | "salary" -> 3
37 | )
38 |
39 | val stuart = JsObject(
40 | "name" -> "Stuart",
41 | "salary" -> 4
42 | )
43 |
44 | val don = JsObject(
45 | "name" -> "Don",
46 | "salary" -> 4
47 | )
48 |
49 | val carlos = JsObject(
50 | "name" -> "Carlos",
51 | "salary" -> 5
52 | )
53 |
54 | table.upsert(haifeng)
55 | table.upsert(roberto)
56 | table.upsert(jerome)
57 | table.upsert(keith)
58 | table.upsert(amit)
59 | table.upsert(stuart)
60 | table.upsert(don)
61 | table.upsert(carlos)
62 |
63 | hbase.sql("select * from unicorn_sql where name = 'Haifeng'")
64 |
65 | hbase.dropTable("unicorn_sql")
66 |
67 | hbase.sql("select address.state, count(address.state), max(age), avg(salary) as avg_salary from worker group by address.state order by avg_salary")
--------------------------------------------------------------------------------
/shell/src/universal/examples/traversal.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | import unicorn.json._
6 | import unicorn.bigtable.hbase.HBase
7 | import unicorn.unibase._
8 | import unicorn.unibase.Unibase.{$id, $graph}
9 | import unicorn.graph._
10 |
11 | // Google+
12 | val hbase = Unibase(HBase())
13 | val gplus = hbase("gplus")
14 |
15 | def trace(vertex: JsValue, edge: Edge[JsValue, (String, JsValue)], hops: Int) {
16 | if (hops > 0) println(s"111065108889012087599 --$hops--> $vertex")
17 | }
18 |
19 | val dan = JsString("111065108889012087599")
20 |
21 | val visitor = new SimpleUnibaseVisitor(gplus, 2)
22 | visitor.relationships = Some(Set("follows"))
23 | visitor.addVisitHook(trace)
24 |
25 | val danCircle = JsGraph(dan, visitor)
26 | danCircle.topologicalSort
27 | danCircle.dijkstra
28 |
29 |
30 | // Make a small org chart for A* search
31 | hbase.createTable("astar")
32 | val astar = hbase("astar")
33 |
34 | val haifeng = JsObject(
35 | $id -> "Haifeng",
36 | "rank" -> 1
37 | )
38 | graph(haifeng)("works with", "Jerome") = 1
39 | graph(haifeng)("reports to", "Roberto") = 2
40 |
41 | val roberto = JsObject(
42 | $id -> "Roberto",
43 | "rank" -> 3
44 | )
45 | graph(roberto)("works with", "Keith") = 1
46 | graph(roberto)("reports to", "Stuart") = 2
47 |
48 | val jerome = JsObject(
49 | $id -> "Jerome",
50 | "rank" -> 2
51 | )
52 | graph(jerome)("works with", "Roberto") = 1
53 | graph(jerome)("reports to", "Don") = 2
54 |
55 | val keith = JsObject(
56 | $id -> "Keith",
57 | "rank" -> 2
58 | )
59 | graph(keith)("works with", "Roberto") = 1
60 | graph(keith)("works with", "Amit") = 1
61 | graph(keith)("reports to", "Stuart") = 2
62 |
63 | val amit = JsObject(
64 | $id -> "Amit",
65 | "rank" -> 3
66 | )
67 | graph(amit)("works with", "Roberto") = 1
68 | graph(amit)("works with", "Keith") = 1
69 | graph(amit)("reports to", "Stuart") = 2
70 |
71 | val stuart = JsObject(
72 | $id -> "Stuart",
73 | "rank" -> 4
74 | )
75 | graph(stuart)("works with", "Don") = true
76 | graph(stuart)("reports to", "Carlos") = true
77 |
78 | val don = JsObject(
79 | $id -> "Don",
80 | "rank" -> 4
81 | )
82 | graph(don)("works with", "Stuart") = true
83 | graph(don)("reports to", "Carlos") = true
84 |
85 | val carlos = JsObject(
86 | $id -> "Carlos",
87 | "rank" -> 5
88 | )
89 |
90 | astar.insert(haifeng)
91 | astar.insert(roberto)
92 | astar.insert(jerome)
93 | astar.insert(keith)
94 | astar.insert(amit)
95 | astar.insert(stuart)
96 | astar.insert(don)
97 | astar.insert(carlos)
98 |
99 | println(astar.find(where = JsObject("_id" -> JsString("Haifeng"))).next)
100 |
101 | val graphOps = new GraphOps[JsObject, (String, JsValue)]()
102 | val path = graphOps.dijkstra(haifeng, carlos,
103 | (doc: JsObject) => {
104 | doc($graph).asInstanceOf[JsObject].fields.toSeq.flatMap { case (relationship, links) =>
105 | links.asInstanceOf[JsObject].fields.toSeq.map { case (_, link) =>
106 | val vertex = astar(link($id)).get
107 | val edge = (relationship, link($data))
108 | (vertex, edge)
109 | }
110 | }.iterator
111 | }
112 | )
113 |
114 | path.map {
115 | case (doc, Some(edge)) => edge._1 + " --> " + doc($id)
116 | case (doc, None) => doc($id)
117 | }.mkString(" -- ")
118 |
119 | val shortPath = graphOps.astar(haifeng, carlos,
120 | (doc: JsObject) => {
121 | doc($graph).asInstanceOf[JsObject].fields.toSeq.flatMap { case (relationship, links) =>
122 | links.asInstanceOf[JsObject].fields.toSeq.map { case (_, link) =>
123 | val vertex = astar(link($id)).get
124 | val edge = (relationship, link($data))
125 | (vertex, edge)
126 | }
127 | }.iterator
128 | },
129 | (a: JsObject, b: JsObject) => (a.rank, b.rank) match {
130 | case (ar: JsInt, br: JsInt) => Math.abs(ar.value - br.value)
131 | case _ => 100
132 | },
133 | (a: JsObject, b: JsObject, e: (String, JsValue)) => e._2 match {
134 | case JsInt(weight) => weight.toDouble
135 | case _ => 3.0
136 | }
137 | )
138 |
139 | shortPath.map {
140 | case (doc, Some(edge)) => edge._1 + " --> " + doc($id)
141 | case (doc, None) => doc($id)
142 | }.mkString(" -- ")
143 |
144 |
145 | hbase.dropTable("astar")
--------------------------------------------------------------------------------
/shell/src/universal/examples/twitter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | // import dbpedia to unibase
6 |
7 | import scala.io.Source
8 | import unicorn.json._
9 | import unicorn.bigtable.rocksdb._
10 | import unicorn.unibase._
11 | import unicorn.unibase.graph._
12 |
13 | val db = Unibase(RocksDB.create("/tmp/unicorn-twitter"))
14 | db.createGraph("twitter")
15 | val twitter = db.graph("twitter", new Snowflake(0))
16 |
17 | // Please download and unzip first.
18 | twitter.csv("http://an.kaist.ac.kr/~haewoon/release/twitter_social_graph/twitter_rv.zip", longVertexId = true)
--------------------------------------------------------------------------------
/shell/src/universal/examples/wiki.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | // import wikipedia dump to unibase
6 |
7 | import java.util._
8 | import scala.collection.mutable.Stack
9 | import scala.io.Source
10 | import scala.xml.pull._
11 | import unicorn.json._
12 | import unicorn.bigtable._
13 | import unicorn.bigtable.accumulo._
14 | import unicorn.bigtable.hbase._
15 | import unicorn.unibase._
16 |
17 | def wikipedia(bucket: HBaseBucket, files: String*): Unit = {
18 | files.foreach { xmlFile =>
19 | val xml = new XMLEventReader(Source.fromFile(xmlFile))
20 |
21 | var field: String = null
22 | val doc = Stack[JsObject]()
23 | for (event <- xml) {
24 | event match {
25 | case EvElemStart(_, "page", _, _) => {
26 | doc.push(JsObject())
27 | }
28 | case EvElemEnd(_, "page") => {
29 | if (!doc.isEmpty) {
30 | val d = doc.pop
31 | assert(doc.isEmpty)
32 |
33 | d("ns") match {
34 | case JsString(value) if value == "0" =>
35 | val title = d("title") match {
36 | case JsString(value) => value
37 | case _ => ""
38 | }
39 |
40 | if (title != "") {
41 | d($id) = d.id.toString.toInt
42 | println(d($id))
43 | bucket.upsert(d)
44 | }
45 | case _ =>
46 | }
47 | }
48 | }
49 | case e @ EvElemStart(_, tag, _, _) => {
50 | if (!doc.isEmpty) {
51 | if (field != null) {
52 | val child = JsObject()
53 | val parent = doc.top
54 | parent(field) = child
55 | doc.push(child)
56 | }
57 | field = tag
58 | }
59 | }
60 | case e @ EvElemEnd(_, tag) => {
61 | if (field == null) {
62 | if (!doc.isEmpty) doc.pop
63 | }
64 | else field = null
65 | }
66 | case EvText(t) => {
67 | if (!doc.isEmpty && field != null) {
68 | doc.top(field) = t
69 | }
70 | }
71 | case _ => // ignore
72 | }
73 | }
74 | }
75 | }
76 |
77 | val hbase = Unibase(HBase())
78 | hbase.createTable("wiki")
79 | val bucket = hbase("wiki")
80 |
81 | wikipedia(bucket, "../../data/wiki/enwikinews-20140410-pages-articles-multistream.xml")
82 |
--------------------------------------------------------------------------------
/shell/src/universal/examples/worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | exec unicorn -nc "$0" "$@"
3 | !#
4 |
5 | import java.util._
6 | import unicorn.json._
7 | import unicorn.bigtable._
8 | import unicorn.bigtable.accumulo._
9 | import unicorn.bigtable.hbase._
10 | import unicorn.unibase._
11 |
12 | // measure running time of a function/block
13 | def time[A](f: => A) = {
14 | val s = System.nanoTime
15 | val ret = f
16 | if (ret.isInstanceOf[JsValue]) println(ret.asInstanceOf[JsValue].prettyPrint)
17 | else println(ret)
18 | println("time: " + (System.nanoTime - s)/1e6 + " ms")
19 | ret
20 | }
21 |
22 | // connect to Accumulo mock
23 | val accumulo = Unibase(Accumulo())
24 | time { accumulo.createTable("worker") }
25 | val bucket = accumulo("worker")
26 |
27 | // Read a non-existing row. It is the pure time of round trip.
28 | time { bucket("row1") }
29 |
30 | // Create a document
31 | val person = JsObject(
32 | "name" -> "Haifeng",
33 | "gender" -> "Male",
34 | "salary" -> 1.0,
35 | "address" -> JsObject(
36 | "street" -> "135 W. 18th ST",
37 | "city" -> "New York",
38 | "state" -> "NY",
39 | "zip" -> 10011
40 | ),
41 | "project" -> JsArray("HCM", "Analytics"),
42 | "graph" -> JsObject(
43 | "work with" -> JsObject(
44 | "Jim" -> JsObject(
45 | "_id" -> "Jim",
46 | "data" -> 1
47 | ),
48 | "Mike" -> JsObject(
49 | "id" -> "Mike",
50 | "data" -> 1
51 | )
52 | ),
53 | "report to" -> JsObject(
54 | "Tom" -> JsObject(
55 | "_id" -> "Tome",
56 | "data" -> 1
57 | )
58 | )
59 | )
60 | )
61 |
62 |
63 | // save document into a dataset
64 | val key = time { bucket.upsert(person) }
65 |
66 | val worker = time { bucket(key).get }
67 | worker.prettyPrint
68 |
69 | // Read partially a document
70 | val doc = time { bucket(key, "name").get }
71 | doc.prettyPrint
72 |
73 | val update = JsObject(
74 | "_id" -> key,
75 | "$set" -> JsObject(
76 | "salary" -> 100000.0,
77 | "address.street" -> "5 ADP Blvd"
78 | ),
79 | "$unset" -> JsObject(
80 | "gender" -> JsTrue
81 | )
82 | )
83 |
84 | time { bucket.update(update) }
85 |
86 | val updated = time { bucket(key, "name").get }
87 | updated.prettyPrint
88 |
89 | // HBase
90 | val hbase = Unibase(HBase())
91 |
92 | time { hbase.createTable("worker") }
93 | val hbucket = hbase("worker")
94 |
95 | time { hbucket.upsert(person) }
96 |
97 | val asOfDate = new Date
98 |
99 | time { hbucket.update(update) }
100 |
101 | val old = time { hbucket(asOfDate, key).get }
102 | old.prettyPrint
103 |
104 | val latest = time { hbucket(key).get }
105 | latest.prettyPrint
106 |
107 | val rollback = """
108 | {
109 | "$rollback": {
110 | "salary": 1,
111 | "address.street": 1,
112 | "gender": 1
113 | }
114 | }
115 | """.parseJsObject
116 |
117 | rollback(Unibase.$id) = key
118 | time { hbucket.update(rollback) }
119 |
120 | val yesterdayOnceMore = time { hbucket(key).get }
121 | yesterdayOnceMore.prettyPrint
122 |
123 | // delete the bucket
124 | time { hbase.dropTable("worker") }
125 |
--------------------------------------------------------------------------------
/sql/build.sbt:
--------------------------------------------------------------------------------
1 | name := "unicorn-sql"
2 |
3 | libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.4"
4 |
--------------------------------------------------------------------------------
/sql/src/main/scala/unicorn/sql/package.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn
18 |
19 | import unicorn.narwhal.Narwhal
20 |
21 | package object sql {
22 | implicit def narwhalSQLContext(db: Narwhal) = new SQLContext(db)
23 | }
24 |
--------------------------------------------------------------------------------
/unibase/build.sbt:
--------------------------------------------------------------------------------
1 | name := "unicorn-unibase"
2 |
3 | libraryDependencies += "org.apache.jena" % "jena-arq" % "3.1.0"
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/DocumentSerializer.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase
18 |
19 | import java.nio.ByteBuffer
20 |
21 | import unicorn.bigtable.{Column, ColumnFamily}
22 | import unicorn.json._
23 |
24 | /** Document serializer. By default, document key size is up to 64KB, column size is up to 10MB.
25 | *
26 | * @author Haifeng Li
27 | */
28 | class DocumentSerializer(
29 | val keySerializer: BsonSerializer = new BsonSerializer(ByteBuffer.allocate(65536)),
30 | val valueSerializer: ColumnarJsonSerializer = new ColumnarJsonSerializer(ByteBuffer.allocate(10485760))) {
31 |
32 | /** Serialize document data. */
33 | def serialize(json: JsObject): Seq[Column] = {
34 | valueSerializer.serialize(json).map { case (path, value) =>
35 | Column(valueSerializer.str2Bytes(path), value)
36 | }.toSeq
37 | }
38 |
39 | /** Serialize document id. */
40 | def serialize(tenant: JsValue, id: JsValue): Array[Byte] = {
41 | keySerializer.clear
42 | keySerializer.put(tenant)
43 | keySerializer.put(id)
44 | keySerializer.toBytes
45 | }
46 |
47 | /** Return the row prefix of a tenant. */
48 | def tenantRowKeyPrefix(tenant: JsValue): Array[Byte] = {
49 | keySerializer.clear
50 | keySerializer.put(tenant)
51 | keySerializer.toBytes
52 | }
53 |
54 | /** Deserialize document key. */
55 | def deserialize(key: Array[Byte]): (JsValue, JsValue) = {
56 | val buffer = ByteBuffer.wrap(key)
57 | val tenant = keySerializer.deserialize(buffer)
58 | val id = keySerializer.deserialize(buffer)
59 | (tenant, id)
60 | }
61 |
62 | /** Assembles the document from multi-column family data. */
63 | def deserialize(data: Seq[ColumnFamily]): Option[JsObject] = {
64 | val objects = data.map { case ColumnFamily(family, columns) =>
65 | val map = columns.map { case Column(qualifier, value, _) =>
66 | (new String(qualifier, valueSerializer.charset), value.bytes)
67 | }.toMap
68 | val json = valueSerializer.deserialize(map)
69 | json.asInstanceOf[JsObject]
70 | }
71 |
72 | if (objects.size == 0)
73 | None
74 | else if (objects.size == 1)
75 | Some(objects(0))
76 | else {
77 | val fold = objects.foldLeft(JsObject()) { (doc, family) =>
78 | doc.fields ++= family.fields
79 | doc
80 | }
81 | Some(fold)
82 | }
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/graph/Edge.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase.graph
18 |
19 | import scala.language.dynamics
20 | import unicorn.json.JsValue
21 |
22 | /** Graph (directed) edge. For an edge 1 - follows -> 3,
23 | * "1" and "3" are vertex ids, `follows` is the label of edge.
24 | * Vertex 1 is the `out vertex` of edge, and vertex 3 is the `in vertex`.
25 | * Besides the label, an edge may have optional data.
26 | *
27 | * @author Haifeng Li
28 | */
29 | case class Edge(val from: Long, val label: String, val to: Long, val properties: JsValue) extends Dynamic {
30 |
31 | override def toString = s"($from - [$label] -> $to) = ${properties.prettyPrint}"
32 |
33 | def apply(property: String): JsValue = properties.apply(property)
34 |
35 | def applyDynamic(property: String): JsValue = apply(property)
36 |
37 | def selectDynamic(property: String): JsValue = apply(property)
38 | }
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/graph/GraphSerializer.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase.graph
18 |
19 | import java.nio.ByteBuffer
20 | import unicorn.bigtable.{Column, Row}
21 | import unicorn.json._
22 | import unicorn.util._
23 |
24 | /** Graph serializer. By default, edge label size is up to 256,
25 | * vertex property size is up to 64KB, overall data size of each edge is up to 10MB.
26 | *
27 | * @author Haifeng Li
28 | */
29 | class GraphSerializer(
30 | val buffer: ByteBuffer = ByteBuffer.allocate(1024),
31 | val vertexSerializer: ColumnarJsonSerializer = new ColumnarJsonSerializer(ByteBuffer.allocate(65536)),
32 | val edgeSerializer: BsonSerializer = new BsonSerializer(ByteBuffer.allocate(10485760))) extends Logging {
33 |
34 | /** Serializes vertex id. */
35 | def serialize(id: Long): Array[Byte] = {
36 | buffer.clear
37 | buffer.putLong(id)
38 | buffer
39 | }
40 |
41 | /** Serializes the document vertex lookup table row key. */
42 | def serialize(table: String, tenant: JsValue, key: JsValue): Array[Byte] = {
43 | buffer.clear
44 | edgeSerializer.serialize(buffer, table)
45 | edgeSerializer.serialize(buffer, tenant)
46 | edgeSerializer.serialize(buffer, key)
47 | buffer
48 | }
49 |
50 | /** Serializes vertex property data. */
51 | def serializeVertex(json: JsObject): Seq[Column] = {
52 | vertexSerializer.serialize(json).map { case (path, value) =>
53 | Column(vertexSerializer.str2Bytes(path), value)
54 | }.toSeq
55 | }
56 |
57 | def deserializeVertex(row: Row): Vertex = {
58 | val vertex = deserializeVertexId(row.key)
59 | val families = row.families
60 |
61 | val properties = families.find(_.family == GraphVertexColumnFamily).map { family =>
62 | deserializeVertexProperties(family.columns)
63 | }
64 |
65 | val in = families.find(_.family == GraphInEdgeColumnFamily).map { family =>
66 | family.columns.map { column =>
67 | val (label, source) = deserializeEdgeColumnQualifier(column.qualifier)
68 | val properties = deserializeEdgeProperties(column.value)
69 | Edge(source, label, vertex, properties)
70 | }
71 | }.getOrElse(Seq.empty)
72 |
73 | val out = families.find(_.family == GraphOutEdgeColumnFamily).map { family =>
74 | family.columns.map { column =>
75 | val (label, target) = deserializeEdgeColumnQualifier(column.qualifier)
76 | val properties = deserializeEdgeProperties(column.value)
77 | Edge(vertex, label, target, properties)
78 | }
79 | }.getOrElse(Seq.empty)
80 |
81 | val edges = (in.size, out.size) match {
82 | case (0, _) => out
83 | case (_, 0) => in
84 | case _ => out ++ in
85 | }
86 |
87 | Vertex(vertex, properties.getOrElse(JsObject("id" -> JsLong(vertex))), edges)
88 | }
89 |
90 | /** Deserializes vertex property data. */
91 | def deserializeVertexProperties(columns: Seq[Column]): JsObject = {
92 | val map = columns.map { case Column(qualifier, value, _) =>
93 | (new String(qualifier, vertexSerializer.charset), value.bytes)
94 | }.toMap
95 | vertexSerializer.deserialize(map).asInstanceOf[JsObject]
96 | }
97 |
98 | /** Serializes an edge column qualifier. */
99 | def serializeEdgeColumnQualifier(label: Array[Byte], vertex: Long): Array[Byte] = {
100 | buffer.clear
101 | buffer.put(label)
102 | buffer.put(0.toByte)
103 | buffer.putLong(vertex)
104 | buffer
105 | }
106 |
107 | /** Deserializes an edge column qualifier. */
108 | def deserializeEdgeColumnQualifier(bytes: Array[Byte]): (String, Long) = {
109 | val buffer = ByteBuffer.wrap(bytes)
110 | val label = edgeSerializer.cstring(buffer)
111 | val vertex = buffer.getLong
112 | (label, vertex)
113 | }
114 |
115 | /** Serializes edge property data. */
116 | def serializeEdge(json: JsValue): Array[Byte] = {
117 | edgeSerializer.clear
118 | edgeSerializer.put(json)
119 | edgeSerializer.toBytes
120 | }
121 |
122 | /** Deserializes vertex id. */
123 | def deserializeVertexId(bytes: Array[Byte]): Long = {
124 | require(bytes.length == 8, s"vertex id bytes size is not 8: ${bytes.length}")
125 | ByteBuffer.wrap(bytes).getLong
126 | }
127 |
128 | /** Deserializes edge property data. */
129 | def deserializeEdgeProperties(bytes: Array[Byte]): JsValue = {
130 | edgeSerializer.deserialize(bytes)
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/graph/SimpleTraveler.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase.graph
18 |
19 | import VertexColor._
20 | import Direction._
21 |
22 | /** Simple graph visitor with cache management.
23 | * In DFS and BFS, the user should create a sub class overriding
24 | * the `apply` method, which is nop by default.
25 | *
26 | * @param graph The graph to visit.
27 | * @param relationships Relationship of interest. Only neighbors with given
28 | * relationship will be visited. Empty set means all
29 | * relationships.
30 | * @param maxHops Maximum number of hops during graph traversal.
31 | * @param direction Edges to follow in the traversal.
32 | *
33 | * @author Haifeng Li
34 | */
35 | class SimpleTraveler(val graph: ReadOnlyGraph, val relationships: Set[String] = Set.empty, val maxHops: Int = 3, val direction: Direction = Outgoing) extends Traveler {
36 | /** The color mark if a vertex was already visited. */
37 | private val mark = collection.mutable.Map[Long, VertexColor]().withDefaultValue(White)
38 |
39 | /** The cache of vertices. */
40 | private val cache = collection.mutable.Map[Long, Vertex]()
41 |
42 | /** User defined vertex visit function. The default implementation is nop.
43 | * The user should create a sub class overriding this method.
44 | *
45 | * @param vertex the vertex on visiting.
46 | * @param edge the incoming arc (None for starting vertex).
47 | * @param hops the number of hops from the starting vertex to this vertex.
48 | */
49 | def apply(vertex: Vertex, edge: Option[Edge], hops: Int): Unit = {
50 |
51 | }
52 |
53 | /** Resets the vertex color to unvisited and clean up the cache. */
54 | def reset: Unit = {
55 | mark.clear
56 | cache.clear
57 | }
58 |
59 | override def vertex(id: Long): Vertex = {
60 | cache.get(id) match {
61 | case Some(node) => node
62 | case None =>
63 | val node = graph(id, direction)
64 | cache(id) = node
65 | node
66 | }
67 | }
68 |
69 | override def vertex(key: String): Vertex = {
70 | val _id = id(key)
71 | require(_id.isDefined, s"Vertex $key doesn't exist")
72 | vertex(_id.get)
73 | }
74 |
75 | /** Translates a vertex string key to 64 bit id. */
76 | override def id(key: String): Option[Long] = {
77 | graph.id(key)
78 | }
79 |
80 | override def color(id: Long): VertexColor = mark(id)
81 |
82 | override def visit(vertex: Vertex, edge: Option[Edge], hops: Int): Unit = {
83 | apply(vertex, edge, hops)
84 |
85 | val black = vertex.neighbors.forall { neighbor =>
86 | mark.contains(neighbor)
87 | }
88 |
89 | mark(vertex.id) = if (black) Black else Gray
90 | }
91 |
92 | override def neighbors(vertex: Vertex, hops: Int): Iterator[(Long, Edge)] = {
93 | if (hops >= maxHops) return Seq.empty.iterator
94 |
95 | val edges = if (relationships.isEmpty) vertex.edges
96 | else vertex.edges.filter { edge =>
97 | relationships.contains(edge.label)
98 | }
99 |
100 | edges.map { edge =>
101 | val neighbor = if (edge.to != vertex.id) edge.to else edge.from
102 | (neighbor, edge)
103 | }.iterator
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/graph/Traveler.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase.graph
18 |
19 | import unicorn.json._
20 |
21 | /** Vertex color mark in a graph graversal. */
22 | object VertexColor extends Enumeration {
23 | type VertexColor = Value
24 |
25 | /** White marks vertices that have yet to be discovered. */
26 | val White = Value
27 |
28 | /** Gray marks a vertex that is discovered but still
29 | * has vertices adjacent to it that are undiscovered. */
30 | val Gray = Value
31 |
32 | /** A black vertex is discovered vertex that is not
33 | * adjacent to any white vertices.
34 | */
35 | val Black = Value
36 | }
37 |
38 | /** The edges to follow in a graph traversal. */
39 | object Direction extends Enumeration {
40 | type Direction = Value
41 |
42 | /** Outgoing edges. */
43 | val Outgoing = Value
44 |
45 | /** Incoming edges. */
46 | val Incoming = Value
47 |
48 | /** Both directions. */
49 | val Both = Value
50 | }
51 |
52 | import VertexColor._
53 |
54 | /** Graph traveler is a proxy to the graph during the
55 | * graph traversal. Beyond the visitor design pattern
56 | * that process a vertex during the traversal,
57 | * the traveler also provides the method to access
58 | * graph vertices, the neighbors of a vertex to explore,
59 | * and the weight of an edge.
60 | *
61 | * @author Haifeng Li
62 | */
63 | trait Traveler {
64 | /** Translates a vertex string key to 64 bit id. */
65 | def id(key: String): Option[Long]
66 |
67 | /** Returns the vertex of given ID. */
68 | def vertex(id: Long): Vertex
69 |
70 | /** Returns the vertex of given string key. */
71 | def vertex(key: String): Vertex
72 |
73 | /** The color mark if a vertex was already visited. */
74 | def color(id: Long): VertexColor
75 |
76 | /** Visit a vertex during graph traversal.
77 | *
78 | * @param vertex the vertex on visiting.
79 | * @param edge the incoming arc (None for starting vertex).
80 | * @param hops the number of hops from the starting vertex to this vertex.
81 | */
82 | def visit(vertex: Vertex, edge: Option[Edge], hops: Int): Unit
83 |
84 | /** Returns an iterator of the neighbors and associated edges of a vertex.
85 | *
86 | * @param vertex the vertex on visiting.
87 | * @param hops the number of hops from starting vertex, which may be used for early termination.
88 | * @return an iterator of the outgoing edges
89 | */
90 | def neighbors(vertex: Vertex, hops: Int): Iterator[(Long, Edge)]
91 |
92 | /** The weight of edge (e.g. shortest path search). */
93 | def weight(edge: Edge): Double = edge.properties match {
94 | case JsInt(x) => x
95 | case JsCounter(x) => x
96 | case JsLong(x) => x
97 | case _ => 1.0
98 | }
99 | }
100 |
101 | /** Traveler for A* searcher. */
102 | trait AstarTraveler extends Traveler {
103 | /** The future path-cost function, which is an admissible
104 | * "heuristic estimate" of the distance from the current vertex to the goal.
105 | * Note that the heuristic function must be monotonic.
106 | */
107 | def h(v1: Long, v2: Long): Double
108 | }
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/graph/Vertex.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase.graph
18 |
19 | import scala.language.dynamics
20 | import unicorn.json._
21 |
22 | /** Graph vertex.
23 | *
24 | * @author Haifeng Li
25 | */
26 | case class Vertex(val id: Long, val properties: JsObject, val edges: Seq[Edge]) extends Dynamic {
27 |
28 | /** In vertices of outgoing edges. */
29 | @transient lazy val in: Map[String, Seq[Long]] = {
30 | edges.filter(_.from == id).groupBy(_.label).mapValues(_.map(_.to))
31 | }
32 |
33 | /** Out vertices of incoming vertices. */
34 | @transient lazy val out: Map[String, Seq[Long]] = {
35 | edges.filter(_.to == id).groupBy(_.label).mapValues(_.map(_.from))
36 | }
37 |
38 | /** Incoming arcs. */
39 | @transient lazy val inE: Map[String, Seq[Edge]] = {
40 | edges.filter(_.to == id).groupBy(_.label)
41 | }
42 |
43 | /** Outgoing arcs. */
44 | @transient lazy val outE: Map[String, Seq[Edge]] = {
45 | edges.filter(_.from == id).groupBy(_.label)
46 | }
47 |
48 | /* Neighbor vertices. */
49 | @transient lazy val neighbors: Seq[Long] = {
50 | edges.map { case Edge(from, _, to, _) =>
51 | if (from == id) to else from
52 | }
53 | }
54 |
55 | override def toString = s"Vertex[$id] = ${properties.prettyPrint}"
56 |
57 | def apply(property: String): JsValue = properties.apply(property)
58 |
59 | def applyDynamic(property: String): JsValue = apply(property)
60 |
61 | def selectDynamic(property: String): JsValue = apply(property)
62 | }
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/graph/package.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase
18 |
19 | /**
20 | * @author Haifeng Li
21 | */
22 | package object graph {
23 | val $doc = "_doc"
24 | val $table = "_table"
25 |
26 | private[unicorn] val GraphDocumentVertexTable = "unicorn_graph_doc_vertex"
27 | private[unicorn] val GraphVertexColumnFamily = "vertex"
28 | private[unicorn] val GraphInEdgeColumnFamily = "in"
29 | private[unicorn] val GraphOutEdgeColumnFamily = "out"
30 | }
31 |
--------------------------------------------------------------------------------
/unibase/src/main/scala/unicorn/unibase/package.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn
18 |
19 | /**
20 | * @author Haifeng Li
21 | */
22 | package object unibase {
23 | val $id = "_id"
24 | val $tenant = "_tenant"
25 |
26 | private[unibase] val DocumentColumnFamily = "doc"
27 |
28 | // Originally we used "." as delimiter in table name.
29 | // However, "." cannot be part of table name in Accumulo.
30 | // So we switch to "_".
31 | private[unibase] val MetaTableName = "unicorn_meta_table"
32 | private[unibase] val MetaTableColumnFamily = "meta"
33 |
34 | private[unibase] val DefaultLocalityField = "default_locality"
35 | }
36 |
--------------------------------------------------------------------------------
/unibase/src/test/scala/unicorn/unibase/UnibaseSpec.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.unibase
18 |
19 | import org.specs2.mutable._
20 | import unicorn.bigtable.accumulo.Accumulo
21 |
22 | /**
23 | * @author Haifeng Li
24 | */
25 | class UnibaseSpec extends Specification {
26 | // Make sure running examples one by one.
27 | // Otherwise, test cases on same columns will fail due to concurrency
28 | sequential
29 | val bigtable = Accumulo()
30 | val db = new Unibase(bigtable)
31 | val tableName = "unicorn_unibase_test"
32 |
33 | "Unibase" should {
34 | "create table" in {
35 | db.createTable(tableName)
36 | bigtable.tableExists(tableName) === true
37 |
38 | db.dropTable(tableName)
39 | bigtable.tableExists(tableName) === false
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/unicorn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | sbt stage
4 | shell/target/universal/stage/bin/unicorn -v
5 |
--------------------------------------------------------------------------------
/util/.idea/.name:
--------------------------------------------------------------------------------
1 | eDatabase
--------------------------------------------------------------------------------
/util/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/util/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/util/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/util/.idea/libraries/SBT__org_scala_lang_scala_library_2_11_2_jar.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/util/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/util/.idea/modules/edatabase.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/util/.idea/sbt.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
--------------------------------------------------------------------------------
/util/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/util/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/util/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/util/build.sbt:
--------------------------------------------------------------------------------
1 | name := "unicorn-util"
2 |
3 | libraryDependencies += "org.slf4j" % "slf4j-api" % "1.7.21"
4 |
5 | libraryDependencies += "com.typesafe" % "config" % "1.2.1"
6 |
--------------------------------------------------------------------------------
/util/src/main/scala/unicorn/util/ByteArray.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.util
18 |
19 | /**
20 | * Pimped byte array.
21 | *
22 | * @author Haifeng Li
23 | */
24 | case class ByteArray(bytes: Array[Byte]) extends Ordered[ByteArray] {
25 | /** Flip each bit of a byte string */
26 | def unary_~ = ByteArray(bytes.map { b => (~b).toByte })
27 |
28 | /** Hexadecimal string representation */
29 | def hex = bytes2Hex(bytes)
30 |
31 | /** Covert UTF-8 bytes back to string */
32 | override def toString = new String(bytes, utf8)
33 |
34 | override def compare(that: ByteArray): Int = compareByteArray(bytes, that.bytes)
35 |
36 | override def compareTo(that: ByteArray): Int = compareByteArray(bytes, that.bytes)
37 |
38 | override def equals(that: Any): Boolean = {
39 | if (!that.isInstanceOf[ByteArray]) return false
40 | compareTo(that.asInstanceOf[ByteArray]) == 0
41 | }
42 |
43 | override def hashCode: Int = {
44 | var hash = 7
45 | bytes.foreach { i => hash = 31 * hash + i }
46 | hash
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/util/src/main/scala/unicorn/util/Config.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.util
18 |
19 | import com.typesafe.config.ConfigFactory
20 |
21 | /**
22 | * @author Haifeng Li
23 | */
24 | object Config {
25 |
26 | def config = {
27 | val configNamespace = "unicorn"
28 | ConfigFactory.load().getConfig(configNamespace)
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/util/src/main/scala/unicorn/util/Logging.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.util
18 |
19 | import org.slf4j.LoggerFactory
20 |
21 | /**
22 | * @author Haifeng Li
23 | */
24 | trait Logging {
25 |
26 | lazy val log = LoggerFactory.getLogger(getClass)
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/util/src/main/scala/unicorn/util/Using.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn.util
18 |
19 | import java.io.Closeable
20 |
21 | /**
22 | * @author Haifeng Li
23 | */
24 | object Using {
25 | def apply[S <: Closeable, T](resource: S)(use: S => T): T = {
26 | try {
27 | use(resource)
28 | }
29 | finally {
30 | if (resource != null) resource.close
31 | }
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/util/src/main/scala/unicorn/util/package.scala:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * (C) Copyright 2015 ADP, LLC.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *******************************************************************************/
16 |
17 | package unicorn
18 |
19 | import java.nio.ByteBuffer
20 | import java.nio.charset.Charset
21 | import java.time.format.DateTimeFormatter
22 |
23 | /**
24 | * Utility functions.
25 | *
26 | * @author Haifeng Li
27 | */
28 | package object util {
29 |
30 | val iso8601DateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
31 | val iso8601DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss[.SSS]['Z']")
32 |
33 | val utf8 = Charset.forName("UTF-8")
34 |
35 | implicit def boxByteArray(x: Array[Byte]) = new ByteArray(x)
36 | implicit def unboxByteArray(x: ByteArray) = x.bytes
37 | implicit def string2Bytes(x: String) = x.getBytes(utf8)
38 | implicit def string2ByteArray(x: String) = new ByteArray(x.getBytes(utf8))
39 | implicit def bytesSeq2ByteArray(x: Seq[Array[Byte]]) = x.map { bytes => new ByteArray(bytes) }
40 | implicit def stringSeq2ByteArray(x: Seq[String]) = x.map { s => new ByteArray(s.getBytes(utf8)) }
41 |
42 | /** Measure running time of a function/block. */
43 | def time[A](f: => A) = {
44 | val s = System.nanoTime
45 | val ret = f
46 | println("time: " + (System.nanoTime - s)/1e6 + " ms")
47 | ret
48 | }
49 |
50 | /** Helper function convert ByteBuffer to Array[Byte]. */
51 | implicit def byteBuffer2ArrayByte(buffer: ByteBuffer): Array[Byte] = {
52 | val bytes = new Array[Byte](buffer.position)
53 | buffer.position(0)
54 | buffer.get(bytes)
55 | bytes
56 | }
57 |
58 | /** Helper function convert ByteBuffer to ByteArray. */
59 | implicit def byteBuffer2ByteArray(buffer: ByteBuffer): ByteArray = ByteArray(byteBuffer2ArrayByte(buffer))
60 |
61 | /** Byte array to hexadecimal string. */
62 | def bytes2Hex(bytes: Array[Byte]): String = {
63 | bytes.map("%02X" format _).mkString
64 | }
65 |
66 | /** Hexadecimal string to byte array. */
67 | def hex2Bytes(s: String): Array[Byte] = {
68 | require(s.length % 2 == 0, "Hexadecimal string must contain an even number of characters")
69 |
70 | val bytes = new Array[Byte](s.length / 2)
71 | for (i <- 0 until s.length by 2) {
72 | bytes(i/2) = java.lang.Integer.parseInt(s.substring(i, i+2), 16).toByte
73 | }
74 | bytes
75 | }
76 |
77 | val md5Encoder = java.security.MessageDigest.getInstance("MD5")
78 |
79 | /** MD5 hash function */
80 | def md5(bytes: Array[Byte]) = md5Encoder.digest(bytes)
81 |
82 | /** Byte array ordering */
83 | def compareByteArray(x: Array[Byte], y: Array[Byte]): Int = {
84 | val n = Math.min(x.length, y.length)
85 | for (i <- 0 until n) {
86 | val a: Int = x(i) & 0xFF
87 | val b: Int = y(i) & 0xFF
88 | if (a != b) return a - b
89 | }
90 | x.length - y.length
91 | }
92 |
93 | /** Left pad a String with a specified character.
94 | *
95 | * @param str the String to pad out, may be null
96 | * @param size the size to pad to
97 | * @param padChar the character to pad with
98 | * @return left padded String or original String if no padding is necessary,
99 | * null if null String input
100 | */
101 | def leftPad(str: String, size: Int, padChar: Char = ' '): String = {
102 | if (str == null)
103 | return null
104 |
105 | val pads = size - str.length
106 | if (pads <= 0)
107 | return str // returns original String when possible
108 |
109 | return (String.valueOf(padChar) * pads).concat(str)
110 | }
111 |
112 | /** Right pad a String with a specified character.
113 | *
114 | * @param str the String to pad out, may be null
115 | * @param size the size to pad to
116 | * @param padChar the character to pad with
117 | * @return left padded String or original String if no padding is necessary,
118 | * null if null String input
119 | */
120 | def rightPad(str: String, size: Int, padChar: Char = ' '): String = {
121 | if (str == null)
122 | return null
123 |
124 | val pads = size - str.length
125 | if (pads <= 0)
126 | return str // returns original String when possible
127 |
128 | return str.concat(String.valueOf(padChar) * pads)
129 | }
130 | }
131 |
--------------------------------------------------------------------------------