├── data
    └── files
    │   ├── users.txt
    │   ├── test1.txt
    │   ├── clicks.txt
    │   ├── create_nested_type.txt
    │   └── kv3.txt
├── src
    ├── main
    │   ├── resources
    │   │   ├── dashboard
    │   │   │   ├── README
    │   │   │   └── dashboard.css
    │   │   └── tablerdd
    │   │   │   ├── generator_utils.py
    │   │   │   ├── SharkContext_sqlRdd_generator.py
    │   │   │   ├── rddtable_generator.py
    │   │   │   └── TableRDDGenerated_generator.py
    │   ├── scala
    │   │   └── shark
    │   │   │   ├── api
    │   │   │       ├── ClassTags.scala
    │   │   │       ├── DataType.java
    │   │   │       ├── QueryExecutionException.scala
    │   │   │       ├── ColumnDesc.scala
    │   │   │       ├── ResultSet.scala
    │   │   │       ├── PythonTableRDD.scala
    │   │   │       ├── TableRDD.scala
    │   │   │       ├── JavaTableRDD.scala
    │   │   │       └── RDDTableFunctions.scala
    │   │   │   ├── server
    │   │   │       ├── SharkSessionManager.scala
    │   │   │       ├── SharkOperationManager.scala
    │   │   │       ├── SharkCLIService.scala
    │   │   │       ├── SharkExecuteStatementOperation.scala
    │   │   │       └── SharkSQLOperation.scala
    │   │   │   ├── tachyon
    │   │   │       └── TachyonException.scala
    │   │   │   ├── memstore2
    │   │   │       ├── column
    │   │   │       │   ├── MemoryStoreException.scala
    │   │   │       │   ├── NullableColumnIterator.scala
    │   │   │       │   ├── NullableColumnBuilder.scala
    │   │   │       │   ├── ColumnIterators.scala
    │   │   │       │   └── ColumnBuilders.scala
    │   │   │       ├── TablePartitionStats.scala
    │   │   │       ├── ColumnarStruct.scala
    │   │   │       ├── LazySimpleSerDeWrapper.scala
    │   │   │       ├── Table.scala
    │   │   │       ├── TablePartitionIterator.scala
    │   │   │       ├── TablePartitionBuilder.scala
    │   │   │       ├── CacheType.scala
    │   │   │       ├── TableRecovery.scala
    │   │   │       ├── SharkTblProperties.scala
    │   │   │       ├── MemoryTable.scala
    │   │   │       ├── ColumnarStructObjectInspector.scala
    │   │   │       └── TablePartition.scala
    │   │   │   ├── parse
    │   │   │       ├── QueryContext.scala
    │   │   │       ├── SharkSemanticAnalyzerFactory.scala
    │   │   │       ├── QueryBlock.scala
    │   │   │       ├── SharkExplainSemanticAnalyzer.scala
    │   │   │       └── SharkLoadSemanticAnalyzer.scala
    │   │   │   ├── execution
    │   │   │       ├── LateralViewForwardOperator.scala
    │   │   │       ├── ForwardOperator.scala
    │   │   │       ├── MapSplitPruningHelper.scala
    │   │   │       ├── serialization
    │   │   │       │   ├── JavaSerializer.scala
    │   │   │       │   ├── KryoSerializer.scala
    │   │   │       │   ├── HiveStructSerializer.scala
    │   │   │       │   ├── HiveConfPersistenceDelegate.scala
    │   │   │       │   ├── KryoSerializationWrapper.scala
    │   │   │       │   ├── SerializableWritable.scala
    │   │   │       │   ├── HiveStructDeserializer.scala
    │   │   │       │   ├── OperatorSerializationWrapper.scala
    │   │   │       │   └── XmlSerializer.scala
    │   │   │       ├── package.scala
    │   │   │       ├── ReduceSinkTableDesc.scala
    │   │   │       ├── GroupByOperator.scala
    │   │   │       ├── LimitOperator.scala
    │   │   │       ├── ScriptOperatorHelper.scala
    │   │   │       ├── FilterOperator.scala
    │   │   │       ├── TerminalOperator.scala
    │   │   │       ├── SelectOperator.scala
    │   │   │       ├── UDTFOperator.scala
    │   │   │       ├── JoinUtil.scala
    │   │   │       └── SharkExplainTask.scala
    │   │   │   ├── repl
    │   │   │       ├── Main.scala
    │   │   │       └── SharkILoop.scala
    │   │   │   ├── util
    │   │   │       └── QueryRewriteUtils.scala
    │   │   │   ├── SharkServer2.scala
    │   │   │   ├── LogHelper.scala
    │   │   │   ├── optimizer
    │   │   │       ├── SharkMapJoinProcessor.scala
    │   │   │       └── SharkOptimizer.scala
    │   │   │   └── KryoRegistrator.scala
    │   └── java
    │   │   └── shark
    │   │       └── tgf
    │   │           └── Schema.java
    ├── test
    │   ├── 0.20S-exclude.txt
    │   ├── scala
    │   │   └── shark
    │   │   │   ├── util
    │   │   │       └── BloomFilterSuite.scala
    │   │   │   ├── SortSuite.scala
    │   │   │   ├── UtilsSuite.scala
    │   │   │   ├── CliSuite.scala
    │   │   │   ├── execution
    │   │   │       └── HiveStructSerializerSuite.scala
    │   │   │   ├── memstore2
    │   │   │       └── column
    │   │   │       │   ├── NullableColumnIteratorSuite.scala
    │   │   │       │   └── ColumnTypeSuite.scala
    │   │   │   └── SharkServerSuite.scala
    │   ├── README.md
    │   └── 0.20S-include.txt
    └── tachyon_enabled
    │   └── scala
    │       └── shark
    │           └── tachyon
    │               └── TachyonOffHeapTableWriter.scala
├── lib
    ├── pyrolite.jar
    └── JavaEWAH-0.4.2.jar
├── README.md
├── conf
    ├── log4j.properties.template
    └── shark-env.sh.template
├── .gitignore
├── bin
    ├── shark-shell
    ├── shark-withinfo
    ├── shark-withdebug
    ├── beeline
    ├── ext
    │   ├── cli.sh
    │   ├── sharkserver.sh
    │   ├── beeline.sh
    │   └── sharkserver2.sh
    ├── dev
    │   ├── release_cleanup.sh
    │   ├── clear-buffer-cache.py
    │   ├── build_test.xml
    │   └── test
    └── shark
├── project
    ├── build.properties
    └── plugins.sbt
└── run


/data/files/users.txt:
--------------------------------------------------------------------------------
1 | 1	A
2 | 2	B
3 | 3	A
4 | 


--------------------------------------------------------------------------------
/data/files/test1.txt:
--------------------------------------------------------------------------------
1 | 1	012
2 | 2	345
3 | 3	678
4 | 


--------------------------------------------------------------------------------
/data/files/clicks.txt:
--------------------------------------------------------------------------------
1 | 1	0
2 | 2	1
3 | 1	1
4 | 2	0
5 | 1	1
6 | 
7 | 


--------------------------------------------------------------------------------
/src/main/resources/dashboard/README:
--------------------------------------------------------------------------------
1 | Place static files here.
2 | 


--------------------------------------------------------------------------------
/lib/pyrolite.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amplab/shark/HEAD/lib/pyrolite.jar


--------------------------------------------------------------------------------
/lib/JavaEWAH-0.4.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amplab/shark/HEAD/lib/JavaEWAH-0.4.2.jar


--------------------------------------------------------------------------------
/data/files/create_nested_type.txt:
--------------------------------------------------------------------------------
1 | a0b00b01c001C001c002C002c011\Nc012C012d01d011d012d02d021d022
2 | a1b10c001C001c002C002d01d011d012d02\N
3 | a2c001\Nc002C002c011C011c012C012d01\Nd012d02d021d022
4 | a3\N\N\N
5 | 


--------------------------------------------------------------------------------
/src/test/0.20S-exclude.txt:
--------------------------------------------------------------------------------
1 | testCliDriver_archive_excludeHadoop20
2 | testCliDriver_auto_join14
3 | testCliDriver_combine2
4 | testCliDriver_ctas
5 | testCliDriver_input12
6 | testCliDriver_input39
7 | testCliDriver_join14
8 | testCliDriver_loadpart_err
9 | testCliDriver_sample_islocalmode_hook


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Shark (Hive on Spark)
2 | 
3 | 
4 | Development in Shark has been ended and subsumed by [Spark SQL](http://spark.apache.org/sql/). Please see [this blog post](http://databricks.com/blog/2014/07/01/shark-spark-sql-hive-on-spark-and-the-future-of-sql-on-spark.html) for more information.
5 | 


--------------------------------------------------------------------------------
/data/files/kv3.txt:
--------------------------------------------------------------------------------
 1 | 238val_238
 2 | 
 3 | 311val_311
 4 | val_27
 5 | val_165
 6 | val_409
 7 | 255val_255
 8 | 278val_278
 9 | 98val_98
10 | val_484
11 | val_265
12 | val_193
13 | 401val_401
14 | 150val_150
15 | 273val_273
16 | 224
17 | 369
18 | 66val_66
19 | 128
20 | 213val_213
21 | 146val_146
22 | 406val_406
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/conf/log4j.properties.template:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
6 | 
7 | # Ignore messages below warning level from Jetty, because it's a bit verbose
8 | log4j.logger.org.eclipse.jetty=WARN
9 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/ClassTags.scala:
--------------------------------------------------------------------------------
 1 | package shark.api
 2 | 
 3 | import scala.reflect.classTag
 4 | 
 5 | object ClassTags {
 6 |   // List of primitive ClassTags.
 7 |   val jBoolean = classTag[java.lang.Boolean]
 8 |   val jByte = classTag[java.lang.Byte]
 9 |   val jShort = classTag[java.lang.Short]
10 |   val jInt = classTag[java.lang.Integer]
11 |   val jLong = classTag[java.lang.Long]
12 |   val jFloat = classTag[java.lang.Float]
13 |   val jDouble = classTag[java.lang.Double]
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/DataType.java:
--------------------------------------------------------------------------------
 1 | package shark.api;
 2 | 
 3 | import java.io.Serializable;
 4 | 
 5 | 
 6 | public class DataType implements Serializable {
 7 | 
 8 |   public final String name;
 9 |   public final String hiveName;
10 |   public final boolean isPrimitive;
11 | 
12 |   DataType(String name, String hiveName, boolean isPrimitive) {
13 |     this.name = name;
14 |     this.hiveName = hiveName;
15 |     this.isPrimitive = isPrimitive;
16 |   }
17 | 
18 |   @Override
19 |   public String toString() {
20 |     return name;
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/server/SharkSessionManager.scala:
--------------------------------------------------------------------------------
 1 | package shark.server
 2 | 
 3 | import org.apache.hadoop.hive.conf.HiveConf
 4 | import org.apache.hive.service.cli.session.SessionManager
 5 | import shark.Utils
 6 | 
 7 | class SharkSessionManager extends SessionManager {
 8 |   override def init(hiveConf : HiveConf) {
 9 |     this.synchronized {
10 |       val sharkOpManager = new SharkOperationManager
11 |       Utils.setSuperField("operationManager", sharkOpManager, this)
12 |       addService(sharkOpManager)
13 |       sharkInit(hiveConf)
14 |     }
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/resources/tablerdd/generator_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | 
 4 | # e.g. createList(1,3, "T[", "]", ",") gives T[1],T[2],T[3]
 5 | def createList(start, stop, prefix, suffix="", sep = ",", newlineAfter = 70, indent = 0):
 6 |     res = ""
 7 |     oneLine = res
 8 |     for y in range(start,stop+1):
 9 |         res     += prefix + str(y) + suffix
10 |         oneLine += prefix + str(y) + suffix
11 |         if y != stop:
12 |             res     += sep
13 |             oneLine += sep
14 |             if len(oneLine) > newlineAfter:
15 |                 res += "\n" + " "*indent
16 |                 oneLine = ""
17 |     return res
18 | 
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | target/
 2 | build/
 3 | metastore_db/
 4 | project/boot
 5 | lib_managed/
 6 | TempStatsStore
 7 | work/
 8 | run-tests-from-scratch-workspace/
 9 | sbt/*.jar
10 | conf/shark-env.sh
11 | 
12 | # Compiled Source
13 | *.class
14 | 
15 | # Packages
16 | #*.jar
17 | 
18 | # Log Files
19 | *.log
20 | 
21 | # Eclipse project files
22 | .classpath
23 | .project
24 | .settings
25 | 
26 | # emacs backup
27 | *~
28 | 
29 | # tmp files
30 | *.swp
31 | .cache
32 | 
33 | # mac os file
34 | *.DS_Store
35 | 
36 | # latex files
37 | paper.pdf
38 | paper.blg
39 | paper.bbl
40 | paper.aux
41 | 
42 | # IntelliJ IDE files
43 | .idea
44 | *.iml
45 | 
46 | # Test Reports
47 | TEST*.xml
48 | test_warehouses
49 | 
50 | # Ensime files for emacs
51 | .ensime
52 | .ensime_lucene
53 | /eclipse_bin
54 | /.scala_dependencies
55 | 


--------------------------------------------------------------------------------
/src/main/resources/tablerdd/SharkContext_sqlRdd_generator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from string import Template
 3 | import sys
 4 | 
 5 | from generator_utils import *
 6 | 
 7 | ## This script generates functions sqlRdd for SharkContext.scala
 8 | 
 9 | p = sys.stdout
10 | 
11 | # The SharkContext declarations
12 | for x in range(2,23):
13 |     sqlRddFun = Template(
14 | """
15 |   def sqlRdd[$list1](cmd: String):
16 |     RDD[Tuple$num[$list2]] = {
17 |     new TableRDD$num[$list2](sql2rdd(cmd),
18 |       Seq($list3))
19 |   }
20 | """).substitute(num = x, 
21 |                list1 = createList(1, x, "T", ": M", ", ", 80, 4), 
22 |                list2 = createList(1, x, "T", sep=", ", indent = 4), 
23 |                list3 = createList(1, x, "m[T", "]", sep=", ", indent = 10))
24 |     p.write(sqlRddFun)
25 | 


--------------------------------------------------------------------------------
/bin/shark-shell:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | BINDIR="`dirname $0`"
19 | FWDIR="`dirname $BINDIR`"
20 | exec $FWDIR/run shark.repl.Main "$@"
21 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/tachyon/TachyonException.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.tachyon
19 | 
20 | class TachyonException(msg: String) extends Exception(msg)
21 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/QueryExecutionException.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.api
19 | 
20 | 
21 | class QueryExecutionException(message: String) extends Exception(message)
22 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | sbt.version=0.13.1
18 | 
19 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/column/MemoryStoreException.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2.column
19 | 
20 | 
21 | class MemoryStoreException(message: String) extends Exception(message)
22 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/server/SharkOperationManager.scala:
--------------------------------------------------------------------------------
 1 | package shark.server
 2 | 
 3 | import java.util.{Map => JMap}
 4 | import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, OperationManager}
 5 | import org.apache.hive.service.cli.session.HiveSession
 6 | 
 7 | class SharkOperationManager extends OperationManager {
 8 |  override def newExecuteStatementOperation(parentSession: HiveSession,
 9 |                                            statement: String, confOverlay:
10 |                                            JMap[String, String])
11 |                                            : ExecuteStatementOperation = {
12 |    val executeStatementOperation = SharkExecuteStatementOperation
13 |      .newExecuteStatementOperation(parentSession, statement, confOverlay)
14 |    val castOp = executeStatementOperation.asInstanceOf[ExecuteStatementOperation]
15 |    addOperation(castOp)
16 |    castOp
17 |  }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/bin/shark-withinfo:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This is really just a wrapper around bin/shark to pipe INFO log to console.
19 | # Very handy for debugging.
20 | 
21 | BINDIR="`dirname $0`"
22 | exec $BINDIR/shark -hiveconf hive.root.logger=INFO,console "$@"
23 | 


--------------------------------------------------------------------------------
/bin/shark-withdebug:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This is really just a wrapper around bin/shark to pipe INFO log to console.
19 | # Very handy for debugging.
20 | 
21 | BINDIR="`dirname $0`"
22 | exec $BINDIR/shark -hiveconf hive.root.logger=DEBUG,console "$@"
23 | 
24 | 


--------------------------------------------------------------------------------
/bin/beeline:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | bin=`dirname "$0"`
19 | bin=`cd "$bin"; pwd`
20 | 
21 | . "$bin"/shark --service beeline "$@"
22 | 


--------------------------------------------------------------------------------
/bin/ext/cli.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | THISSERVICE=cli
19 | export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
20 | 
21 | cli() {
22 |   echo "Starting the Shark Command Line Client"
23 |   exec $FWDIR/run shark.SharkCliDriver "$@"
24 | }
25 | 
26 | cli_help() {
27 |  echo "usage  ./shark --service cli"
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/util/BloomFilterSuite.scala:
--------------------------------------------------------------------------------
 1 | package shark.util
 2 | 
 3 | import org.scalatest.FunSuite
 4 | 
 5 | class BloomFilterSuite extends FunSuite{
 6 | 
 7 |   test("Integer") {
 8 |     val bf = new BloomFilter(0.03, 1000000)
 9 |     Range(0, 1000000).foreach {
10 |       i => bf.add(i)
11 |     }
12 |     assert(bf.contains(333))
13 |     assert(bf.contains(678))
14 |     assert(!bf.contains(1200000))
15 |   }
16 |   
17 |   test("Integer FP") {
18 |     val bf = new BloomFilter(0.03,1000)
19 |     Range(0,700).foreach {
20 |       i => bf.add(i)
21 |     }
22 |     assert(bf.contains(333))
23 |     assert(bf.contains(678))
24 |     //is the fraction of false positives in line with what we expect ?
25 |     val e = Range(0, 100).map {
26 |       i => bf.contains(i*10)
27 |     }
28 |     val s = e.groupBy(x => x).map(x => (x._1, x._2.size))
29 |     val t = s(true)
30 |     val f = s(false)
31 |     assert(f > 25 && f < 35)
32 |     assert(t < 75 && t > 65)
33 |     // expect false positive to be < 3 % and no false negatives
34 |   }
35 | }


--------------------------------------------------------------------------------
/bin/ext/sharkserver.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | THISSERVICE=sharkserver
19 | export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
20 | 
21 | sharkserver() {
22 |   echo "Starting the Shark Server"
23 |   exec $FWDIR/run shark.SharkServer "$@"
24 | }
25 | 
26 | sharkserver_help() {
27 |  echo "usage SHARK_PORT=xxxx ./shark --service sharkserver"
28 |  echo "SHARK_PORT : Specify the server port"
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/parse/QueryContext.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.parse
19 | 
20 | import org.apache.hadoop.conf.Configuration
21 | import org.apache.hadoop.hive.ql.Context
22 | 
23 | /**
24 |  * Shark's query context. Adds Shark-specific information to Hive's Context.
25 |  */
26 | class QueryContext(conf: Configuration, val useTableRddSink: Boolean) extends Context(conf)
27 | 


--------------------------------------------------------------------------------
/src/test/README.md:
--------------------------------------------------------------------------------
 1 | ###Hive Compatibility Test Warnings
 2 | 
 3 | #### Test results that rely on tables with `timestamp` fields may differ across JVM versions.
 4 | For example, these tests:
 5 | * udf5
 6 | * timestamp.1, timestamp_2, timestamp_udf
 7 | 
 8 | Pass when running with this JVM:
 9 | (Mac 10.9, AMPLab Jenkins)
10 | java version "1.7.0_25"
11 | Java(TM) SE Runtime Environment (build 1.7.0_25-b15)
12 | Java HotSpot(TM) 64-Bit Server VM (build 23.25-b01, mixed mode)
13 | 
14 | But fail on EC2 when run with this JVM:
15 | (EC2 c2.2xlarge)
16 | java version "1.7.0_45"
17 | OpenJDK Runtime Environment (amzn-2.4.3.2.32.amzn1-x86_64 u45-b15)
18 | OpenJDK 64-Bit Server VM (build 24.45-b08, mixed mode)
19 | 
20 | 
21 | A few more tests from test_pass.txt that fall into this category:
22 | TestCliDriver_input_part8
23 | TestSharkCliDriver: testCliDriver_timestamp_1
24 | TestSharkCliDriver: testCliDriver_timestamp_2
25 | TestSharkCliDriver: testCliDriver_timestamp_3
26 | TestSharkCliDriver: testCliDriver_timestamp_udf
27 | TestSharkCliDriver: testCliDriver_udf_to_unix_timestamp
28 | TestSharkCliDriver: testCliDriver_udf5
29 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/LateralViewForwardOperator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc
21 | 
22 | import org.apache.spark.rdd.RDD
23 | 
24 | 
25 | class LateralViewForwardOperator extends UnaryOperator[LateralViewForwardDesc] {
26 | 
27 |   override def execute(): RDD[_] = executeParents().head._2
28 | 
29 |   override def processPartition(split: Int, iter: Iterator[_]) = iter
30 | 
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/bin/ext/beeline.sh:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # Need arguments [host [port [db]]]
17 | THISSERVICE=beeline
18 | export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
19 | 
20 | beeline () {
21 |   CLASS=org.apache.hive.beeline.BeeLine;
22 |   exec $FWDIR/run $CLASS "$@"
23 | }
24 | 
25 | beeline_help () {
26 |   CLASS=org.apache.hive.beeline.BeeLine;
27 |   exec $FWDIR/run "--help"
28 | } 
29 | 
30 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/ForwardOperator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import org.apache.spark.rdd.RDD
21 | import org.apache.hadoop.hive.ql.plan.ForwardDesc
22 | 
23 | 
24 | class ForwardOperator extends UnaryOperator[ForwardDesc] {
25 | 
26 |   override def execute(): RDD[_] = executeParents().head._2
27 | 
28 |   override def processPartition(split: Int, iter: Iterator[_]) =
29 |     throw new UnsupportedOperationException("ForwardOperator.processPartition()")
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/java/shark/tgf/Schema.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2013 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.tgf;
19 | 
20 | import java.lang.annotation.Retention;
21 | import java.lang.annotation.RetentionPolicy;
22 | import java.lang.annotation.ElementType;
23 | import java.lang.annotation.Target;
24 | 
25 | 
26 | /**
27 |  * Schema annotation for TGFs, example syntax: @Schema(spec = "name string, age int")
28 |  */
29 | @Retention(RetentionPolicy.RUNTIME)
30 | @Target(ElementType.METHOD)
31 | public @interface Schema {
32 |     String spec();
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/resources/dashboard/dashboard.css:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | body {
19 |   background-color : #ffffff;
20 |   font-family : sans-serif;
21 | }
22 | 
23 | th {
24 |   padding-bottom : 10px;
25 |   padding-top : 10px;
26 | 	padding-left : 10px;
27 | 	padding-right : 10px;
28 | }
29 | 
30 | td.node {
31 |   padding-bottom : 8px;
32 |   padding-top : 8px;
33 | 	padding-left : 8px;
34 | 	padding-right : 8px;
35 | }
36 | 
37 | table.percent_bar {
38 |   width: 200px;
39 |   height: 15px;
40 | }
41 | 
42 | td.percent_used {
43 |   background: #AAAAFF;
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/bin/ext/sharkserver2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | THISSERVICE=sharkserver2
19 | export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
20 | 
21 | # Use Java to launch Shark otherwise the unit tests cannot properly kill
22 | # the server process.
23 | export SHARK_LAUNCH_WITH_JAVA=1
24 | 
25 | sharkserver2() {
26 |   echo "Starting the Shark Server"
27 |   exec $FWDIR/run shark.SharkServer2 "$@"
28 | }
29 | 
30 | sharkserver2_help() {
31 |  echo "usage HIVE_SERVER2_THRIFT_PORT=xxxx ./shark --service sharkserver2"
32 |  echo "HIVE_SERVER2_THRIFT_PORT : Specify the server port"
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/MapSplitPruningHelper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hive.serde2.objectinspector
19 | 
20 | import org.apache.hadoop.hive.serde2.objectinspector.UnionStructObjectInspector.MyField
21 | 
22 | 
23 | object MapSplitPruningHelper {
24 | 
25 |   /**
26 |    * Extract the UnionStructObjectInspector.MyField's `structField` reference, which is
27 |    * package-private.
28 |    */
29 |   def getStructFieldFromUnionOIField(unionOIMyField: MyField): StructField = {
30 |   	unionOIMyField.structField
31 |   }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/TablePartitionStats.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import shark.memstore2.column.ColumnStats
21 | 
22 | 
23 | /**
24 |  * Stores column statistics for a table partition.
25 |  */
26 | class TablePartitionStats(val stats: Array[ColumnStats[_]], val numRows: Long)
27 |   extends Serializable {
28 | 
29 |   override def toString =
30 |     numRows + " rows\n" +
31 |     stats.zipWithIndex.map { case (column, index) =>
32 |       "  column " + index + " " +
33 |       { if (column != null) column.toString else "no column statistics" }
34 |     }.mkString("\n")
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/JavaSerializer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution.serialization
19 | 
20 | import java.nio.ByteBuffer
21 | 
22 | import org.apache.spark.SparkEnv
23 | import org.apache.spark.serializer.{JavaSerializer => SparkJavaSerializer}
24 | 
25 | 
26 | object JavaSerializer {
27 |   @transient val ser = new SparkJavaSerializer(SparkEnv.get.conf)
28 | 
29 |   def serialize[T](o: T): Array[Byte] = {
30 |     ser.newInstance().serialize(o).array()
31 |   }
32 | 
33 |   def deserialize[T](bytes: Array[Byte]): T  = {
34 |     ser.newInstance().deserialize[T](ByteBuffer.wrap(bytes))
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/bin/dev/release_cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | DEVDIR="`dirname $0`"
19 | BINDIR="`dirname $DEVDIR`"
20 | FWDIR="`dirname $BINDIR`"
21 | 
22 | rm -rf $FWDIR/run-tests-from-scratch-workspace
23 | rm -rf $FWDIR/test_warehouses
24 | 
25 | rm -rf $FWDIR/conf/shark-env.sh
26 | 
27 | rm -rf $FWDIR/metastore_db
28 | rm -rf $FWDIR/derby.log
29 | 
30 | rm -rf $FWDIR/project/target $FWDIR/project/project/target
31 | 
32 | rm -rf $FWDIR/target/resolution-cache
33 | rm -rf $FWDIR/target/streams
34 | rm -rf $FWDIR/target/scala-*/cache
35 | rm -rf $FWDIR/target/scala-*/classes
36 | rm -rf $FWDIR/target/scala-*/test-classes
37 | 
38 | find $FWDIR -name ".DS_Store" -exec rm {} \;
39 | find $FWDIR -name ".history" -exec rm {} \;
40 | 
41 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/package.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark
19 | 
20 | import scala.language.implicitConversions
21 | 
22 | import org.apache.hadoop.hive.ql.plan.OperatorDesc
23 | 
24 | import shark.execution.serialization.KryoSerializationWrapper
25 | import shark.execution.serialization.OperatorSerializationWrapper
26 | 
27 | package object execution {
28 | 
29 |   type HiveDesc = OperatorDesc // XXXDesc in Hive is the subclass of Serializable
30 | 
31 |   implicit def opSerWrapper2op[T <: Operator[_ <: HiveDesc]](
32 |       wrapper: OperatorSerializationWrapper[T]): T = wrapper.value
33 | 
34 |   implicit def kryoWrapper2object[T](wrapper: KryoSerializationWrapper[T]): T = wrapper.value
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/ReduceSinkTableDesc.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import org.apache.hadoop.hive.ql.plan.TableDesc
21 | import shark.LogHelper
22 | 
23 | 
24 | trait ReduceSinkTableDesc extends LogHelper {
25 |   self: Operator[_ <: HiveDesc] =>
26 | 
27 |   // Seq(tag, (Key TableDesc, Value TableDesc))
28 |   def keyValueDescs(): Seq[(Int, (TableDesc, TableDesc))] = {
29 |     // get the parent ReduceSinkOperator and sort it by tag
30 |     val reduceSinkOps =
31 |       for (op <- self.parentOperators.toSeq if op.isInstanceOf[ReduceSinkOperator])
32 |         yield op.asInstanceOf[ReduceSinkOperator]
33 | 
34 |     reduceSinkOps.map(f => (f.getTag, f.getKeyValueTableDescs))
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/ColumnarStruct.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import java.util.{List => JList, ArrayList => JArrayList}
21 | 
22 | import shark.memstore2.column.ColumnIterator
23 | 
24 | 
25 | /**
26 |  * A struct returned by the TablePartitionIterator. It contains references to the same set of
27 |  * ColumnIterators and use those to return individual fields back to the object inspectors.
28 |  */
29 | class ColumnarStruct(columnIterators: Array[ColumnIterator]) {
30 | 
31 |   def getField(columnId: Int): Object = columnIterators(columnId).current
32 | 
33 |   def getFieldsAsList(): JList[Object] = {
34 |     val list = new JArrayList[Object](columnIterators.length)
35 |     var i = 0
36 |     while (i < columnIterators.length) {
37 |       list.add(columnIterators(i).current)
38 |       i += 1
39 |     }
40 |     list
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2012 The Regents of The University California.
 2 | // All rights reserved.
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //      http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | scalaVersion := "2.10.3"
16 | 
17 | resolvers += Resolver.url(
18 |   "sbt-plugin-releases",
19 |   new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns)
20 | 
21 | resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"
22 | 
23 | addSbtPlugin("org.ensime" % "ensime-sbt-cmd" % "0.1.2")
24 | 
25 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0")
26 | 
27 | addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0")
28 | 
29 | addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1")
30 | 
31 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
32 | 
33 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.10.2")
34 | 
35 | addSbtPlugin("com.typesafe.sbt" % "sbt-pgp" % "0.8.3")
36 | 
37 | addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0")
38 | 
39 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/GroupByOperator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import org.apache.hadoop.hive.ql.exec.{GroupByOperator => HiveGroupByOperator}
21 | import org.apache.hadoop.hive.ql.exec.{ReduceSinkOperator => HiveReduceSinkOperator}
22 | 
23 | 
24 | /**
25 |  * Unlike Hive, group by in Shark is split into two different operators:
26 |  * GroupByPostShuffleOperator and GroupByPreShuffleOperator. The pre-shuffle one
27 |  * serves as a combiner on each map partition.
28 |  *
29 |  * These two classes are defined in org.apache.hadoop.hive.ql.exec package
30 |  * (scala files) to get around the problem that some Hive classes are only
31 |  * visibile within that class.
32 |  */
33 | object GroupByOperator {
34 |   
35 |   def isPostShuffle(op: HiveGroupByOperator): Boolean = {
36 |     op.getParentOperators().get(0).isInstanceOf[HiveReduceSinkOperator]
37 |   }
38 |   
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/bin/dev/clear-buffer-cache.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # Clear OS buffer cache for mesos clusters on EC2.
19 | 
20 | import os
21 | import thread
22 | import time
23 | 
24 | machinesFile = "/root/spark-ec2/slaves"
25 | machs = open(machinesFile).readlines()
26 | machs = map(lambda s: s.strip(),machs)
27 | machCount = len(machs)
28 | machID = 0
29 | cmd = "sync; echo 3 > /proc/sys/vm/drop_caches"
30 | done = {}
31 | 
32 | def dropCachesThread( mach, myID, *args ):
33 |   print "SSH to machine %i" % (myID)
34 |   os.system("ssh %s '%s'" % (mach, cmd))
35 |   done[mach] = "done"
36 | 
37 | for mach in ( machs ):
38 |   thread.start_new_thread(dropCachesThread, (mach, machID))
39 |   machID = machID + 1
40 |   time.sleep(0.2)
41 | 
42 | while (len(done.keys()) < machCount):
43 |   print "waiting for %d tasks to finish..." % (machCount - len(done.keys()))
44 |   time.sleep(1)
45 |   
46 | print "Done with %i threads" % (len(done.keys()))
47 | 
48 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/repl/Main.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.repl
19 | 
20 | import org.apache.hadoop.hive.common.LogUtils
21 | import org.apache.hadoop.hive.common.LogUtils.LogInitializationException
22 | 
23 | 
24 | /**
25 |  * Shark's REPL entry point.
26 |  */
27 | object Main {
28 | 
29 |   try {
30 |     LogUtils.initHiveLog4j()
31 |   } catch {
32 |     case e: LogInitializationException => // Ignore the error.
33 |   }
34 | 
35 |   private var _interp: SharkILoop = null
36 | 
37 |   def interp = _interp
38 | 
39 |   private def interp_=(i: SharkILoop) { _interp = i }
40 | 
41 |   def main(args: Array[String]) {
42 | 
43 |     _interp = new SharkILoop
44 | 
45 |     // We need to set spark.repl.InterpAccessor.interp since it is used
46 |     // everywhere in spark.repl code.
47 |     org.apache.spark.repl.Main.interp = _interp
48 | 
49 |     // Start an infinite loop ...
50 |     _interp.process(args)
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/server/SharkCLIService.scala:
--------------------------------------------------------------------------------
 1 | package shark.server
 2 | 
 3 | import org.apache.hive.service.cli.CLIService
 4 | import org.apache.hadoop.hive.conf.HiveConf
 5 | import org.apache.hadoop.hive.shims.ShimLoader
 6 | import org.apache.hive.service.auth.HiveAuthFactory
 7 | import java.io.IOException
 8 | import org.apache.hive.service.ServiceException
 9 | import javax.security.auth.login.LoginException
10 | import org.apache.spark.SparkEnv
11 | import shark.{SharkServer, Utils}
12 | 
13 | class SharkCLIService extends CLIService {
14 |   override def init(hiveConf: HiveConf) {
15 |     this.synchronized {
16 |       Utils.setSuperField("hiveConf", hiveConf, this)
17 |       val sharkSM = new SharkSessionManager
18 |       Utils.setSuperField("sessionManager", sharkSM, this)
19 |       addService(sharkSM)
20 |       try {
21 |         HiveAuthFactory.loginFromKeytab(hiveConf)
22 |         val serverUserName = ShimLoader.getHadoopShims
23 |           .getShortUserName(ShimLoader.getHadoopShims.getUGIForConf(hiveConf))
24 |         Utils.setSuperField("serverUserName", serverUserName, this)
25 |       } catch {
26 |         case e: IOException => {
27 |           throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
28 |         }
29 |         case e: LoginException => {
30 |           throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
31 |         }
32 |       }
33 |       // Make sure the ThreadLocal SparkEnv reference is the same for all threads.
34 |       SparkEnv.set(SharkServer.sparkEnv)
35 |       sharkInit(hiveConf)
36 |     }
37 |   }
38 | }
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/bin/shark:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | bin="`dirname $0`"
19 | bin=`cd "$bin"; pwd`
20 | export FWDIR="`dirname $bin`"
21 | 
22 | SERVICE=""
23 | HELP=""
24 | while [ $# -gt 0 ];do
25 |   case "$1" in
26 |    --service)
27 |      shift
28 |      SERVICE=$1
29 |      shift
30 |      ;;
31 |    --help)
32 |      HELP=_help
33 |      shift
34 |      ;;
35 |      *)
36 |      break
37 |      ;;
38 |   esac
39 | done
40 | 
41 | if [ "$SERVICE" = "" ] ; then
42 |   if [ "$HELP" = "_help" ] ; then
43 |     SERVICE="help"
44 |   else
45 |     SERVICE="cli"
46 |   fi
47 | fi
48 | SERVICE_LIST=""
49 | 
50 | for i in "$bin"/ext/*.sh ; do
51 |   . $i
52 | done
53 | 
54 | TORUN=""
55 | for j in $SERVICE_LIST ; do
56 |   if [ "$j" = "$SERVICE" ] ; then
57 |     TORUN=${j}$HELP
58 |   fi
59 | done
60 | echo "$@"
61 | if [ "$TORUN" = "" ] ; then
62 |   echo "Service $SERVICE not found"
63 |   echo "Available Services: $SERVICE_LIST"
64 |   exit 7
65 | else
66 |   $TORUN "$@"
67 | fi
68 | 
69 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/LimitOperator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import org.apache.hadoop.hive.ql.plan.LimitDesc
21 | 
22 | import org.apache.spark.rdd.{EmptyRDD, RDD}
23 | 
24 | import shark.SharkEnv
25 | 
26 | 
27 | class LimitOperator extends UnaryOperator[LimitDesc] {
28 | 
29 |   // Only works on the master program.
30 |   def limit = desc.getLimit()
31 | 
32 |   override def execute(): RDD[_] = {
33 | 
34 |     val limitNum = desc.getLimit()
35 | 
36 |     if (limitNum > 0) {
37 |       // Take limit on each partition.
38 |       val inputRdd = executeParents().head._2
39 |       inputRdd.mapPartitions({ iter => iter.take(limitNum) }, preservesPartitioning = true)
40 |     } else {
41 |       new EmptyRDD(SharkEnv.sc)
42 |     }
43 |   }
44 | 
45 |   override def processPartition(split: Int, iter: Iterator[_]) = {
46 |     throw new UnsupportedOperationException("LimitOperator.processPartition()")
47 |   }
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/KryoSerializer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution.serialization
19 | 
20 | import java.nio.ByteBuffer
21 | 
22 | import org.apache.spark.{SparkConf, SparkEnv}
23 | import org.apache.spark.serializer.{KryoSerializer => SparkKryoSerializer}
24 | 
25 | import shark.SharkContext
26 | 
27 | /**
28 |  * Java object serialization using Kryo. This is much more efficient, but Kryo
29 |  * sometimes is buggy to use. We use this mainly to serialize the object
30 |  * inspectors.
31 |  */
32 | object KryoSerializer {
33 | 
34 |   @transient lazy val ser: SparkKryoSerializer = {
35 |     val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
36 |     new SparkKryoSerializer(sparkConf)
37 |   }
38 | 
39 |   def serialize[T](o: T): Array[Byte] = {
40 |     ser.newInstance().serialize(o).array()
41 |   }
42 | 
43 |   def deserialize[T](bytes: Array[Byte]): T  = {
44 |     ser.newInstance().deserialize[T](ByteBuffer.wrap(bytes))
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/ScriptOperatorHelper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hive.ql.exec
19 | // Put this file in Hive's exec package to access package level visible fields and methods.
20 | 
21 | import java.util.{Map => JMap}
22 | 
23 | import org.apache.hadoop.conf.Configuration
24 | 
25 | 
26 | /**
27 |  * A helper class that gets us PathFinder and alias in ScriptOperator.
28 |  * This is needed since PathFinder inner class is not declared as
29 |  * static/public.
30 |  */
31 | class ScriptOperatorHelper(val op: ScriptOperator) extends ScriptOperator {
32 | 
33 |   def newPathFinderInstance(envpath: String): op.PathFinder = {
34 |     new op.PathFinder(envpath)
35 |   }
36 | 
37 |   def getAlias: String = op.alias
38 | 
39 |   override def addJobConfToEnvironment(conf: Configuration, env: JMap[String, String]) {
40 |     op.addJobConfToEnvironment(conf, env)
41 |   }
42 | 
43 |   override def safeEnvVarName(variable: String): String = {
44 |     op.safeEnvVarName(variable)
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/ColumnDesc.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.api
19 | 
20 | import java.util.{List => JList}
21 | 
22 | import scala.collection.JavaConversions._
23 | 
24 | import org.apache.hadoop.hive.metastore.api.FieldSchema
25 | import org.apache.hadoop.hive.metastore.api.Schema
26 | 
27 | 
28 | class ColumnDesc(val name: String, val dataType: DataType) extends Serializable {
29 | 
30 |   private[shark] def this(hiveSchema: FieldSchema) {
31 |     this(hiveSchema.getName, DataTypes.fromHiveType(hiveSchema.getType))
32 |   }
33 | 
34 |   override def toString = "ColumnDesc(name: %s, type: %s)".format(name, dataType.name)
35 | }
36 | 
37 | 
38 | object ColumnDesc {
39 | 
40 |   def createSchema(fieldSchemas: JList[FieldSchema]): Array[ColumnDesc] = {
41 |     if (fieldSchemas == null) Array.empty else fieldSchemas.map(new ColumnDesc(_)).toArray
42 |   }
43 | 
44 |   def createSchema(schema: Schema): Array[ColumnDesc] = {
45 |     if (schema == null) Array.empty else createSchema(schema.getFieldSchemas)
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/test/0.20S-include.txt:
--------------------------------------------------------------------------------
 1 | testCliDriver_archive
 2 | testCliDriver_archive_corrupt
 3 | testCliDriver_infer_bucket_sort_list_bucket
 4 | testCliDriver_list_bucket_dml_1
 5 | testCliDriver_list_bucket_dml_11
 6 | testCliDriver_list_bucket_dml_12
 7 | testCliDriver_list_bucket_dml_13
 8 | testCliDriver_list_bucket_dml_2
 9 | testCliDriver_list_bucket_dml_3
10 | testCliDriver_list_bucket_dml_4
11 | testCliDriver_list_bucket_dml_5
12 | testCliDriver_list_bucket_dml_6
13 | testCliDriver_list_bucket_dml_7
14 | testCliDriver_list_bucket_dml_8
15 | testCliDriver_list_bucket_dml_9
16 | testCliDriver_list_bucket_query_multiskew_1
17 | testCliDriver_list_bucket_query_multiskew_2
18 | testCliDriver_list_bucket_query_multiskew_3
19 | testCliDriver_list_bucket_query_oneskew_1
20 | testCliDriver_list_bucket_query_oneskew_2
21 | testCliDriver_list_bucket_query_oneskew_3
22 | testCliDriver_recursive_dir
23 | testCliDriver_skewjoin_union_remove_1
24 | testCliDriver_skewjoin_union_remove_2
25 | testCliDriver_split_sample
26 | testCliDriver_union_remove_1
27 | testCliDriver_union_remove_10
28 | testCliDriver_union_remove_11
29 | testCliDriver_union_remove_12
30 | testCliDriver_union_remove_13
31 | testCliDriver_union_remove_14
32 | testCliDriver_union_remove_15
33 | testCliDriver_union_remove_16
34 | testCliDriver_union_remove_17
35 | testCliDriver_union_remove_18
36 | testCliDriver_union_remove_19
37 | testCliDriver_union_remove_2
38 | testCliDriver_union_remove_20
39 | testCliDriver_union_remove_21
40 | testCliDriver_union_remove_22
41 | testCliDriver_union_remove_23
42 | testCliDriver_union_remove_24
43 | testCliDriver_union_remove_3
44 | testCliDriver_union_remove_4
45 | testCliDriver_union_remove_5
46 | testCliDriver_union_remove_7
47 | testCliDriver_union_remove_8
48 | testCliDriver_union_remove_9


--------------------------------------------------------------------------------
/src/main/scala/shark/api/ResultSet.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.api
19 | 
20 | import java.util.{Arrays, Collections, List => JList}
21 | 
22 | 
23 | class ResultSet private[shark](_schema: Array[ColumnDesc], _results: Array[Array[Object]]) {
24 | 
25 |   /**
26 |    * The schema for the query results, for use in Scala.
27 |    */
28 |   def schema: Seq[ColumnDesc] = _schema.toSeq
29 | 
30 |   /**
31 |    * Query results, for use in Scala.
32 |    */
33 |   def results: Seq[Array[Object]] = _results.toSeq
34 | 
35 |   /**
36 |    * Get the schema for the query results as an immutable list, for use in Java.
37 |    */
38 |   def getSchema: JList[ColumnDesc] = Collections.unmodifiableList(Arrays.asList(_schema : _*))
39 | 
40 |   /**
41 |    * Get the query results as an immutable list, for use in Java.
42 |    */
43 |   def getResults: JList[Array[Object]] = Collections.unmodifiableList(Arrays.asList(_results : _*))
44 | 
45 |   override def toString: String = {
46 |     "ResultSet(" + _schema.map(c => c.name + " " + c.dataType).mkString("\t") + ")\n" +
47 |     _results.map(row => row.mkString("\t")).mkString("\n")
48 |   }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/parse/SharkSemanticAnalyzerFactory.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.parse
19 | 
20 | import org.apache.hadoop.hive.conf.HiveConf
21 | import org.apache.hadoop.hive.ql.parse.{ASTNode, BaseSemanticAnalyzer, DDLSemanticAnalyzer, 
22 |   ExplainSemanticAnalyzer, LoadSemanticAnalyzer, SemanticAnalyzerFactory, SemanticAnalyzer}
23 | 
24 | import shark.SharkConfVars
25 | 
26 | 
27 | object SharkSemanticAnalyzerFactory {
28 | 
29 |   /**
30 |    * Return a semantic analyzer for the given ASTNode.
31 |    */
32 |   def get(conf: HiveConf, tree:ASTNode): BaseSemanticAnalyzer = {
33 |     val explainMode = SharkConfVars.getVar(conf, SharkConfVars.EXPLAIN_MODE) == "shark"
34 | 
35 |     SemanticAnalyzerFactory.get(conf, tree) match {
36 |       case _: SemanticAnalyzer =>
37 |         new SharkSemanticAnalyzer(conf)
38 |       case _: ExplainSemanticAnalyzer if explainMode =>
39 |         new SharkExplainSemanticAnalyzer(conf)
40 |       case _: DDLSemanticAnalyzer =>
41 |         new SharkDDLSemanticAnalyzer(conf)
42 |       case _: LoadSemanticAnalyzer =>
43 |         new SharkLoadSemanticAnalyzer(conf)
44 |       case sem: BaseSemanticAnalyzer =>
45 |         sem
46 |     }
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/server/SharkExecuteStatementOperation.scala:
--------------------------------------------------------------------------------
 1 | package shark.server
 2 | 
 3 | import java.lang.reflect.Constructor
 4 | import java.util.{Map => JMap}
 5 | import org.apache.hive.service.cli.session.HiveSession
 6 | 
 7 | object SharkExecuteStatementOperation {
 8 |   def newExecuteStatementOperation(parentSession: HiveSession,
 9 |                                    statement: String,
10 |                                    confOverlay: JMap[String, String])
11 |                                    : Any = {
12 |     val tokens = statement.trim().split("\\s+")
13 |     val command = tokens{0}.toLowerCase
14 |     command match {
15 |       case "set" => {
16 |         val ctor = accessCtor("org.apache.hive.service.cli.operation.SetOperation")
17 |         ctor.newInstance(parentSession, statement, confOverlay)
18 |       }
19 |       case "dfs" => {
20 |         val ctor = accessCtor("org.apache.hive.service.cli.operation.DfsOperation")
21 |         ctor.newInstance(parentSession, statement, confOverlay)
22 |       }
23 |       case "add" => {
24 |         val ctor = accessCtor("org.apache.hive.service.cli.operation.AddResourceOperation")
25 |         ctor.newInstance(parentSession, statement, confOverlay)
26 |       }
27 |       case "delete" => {
28 |         val ctor = accessCtor("org.apache.hive.service.cli.operation.DeleteResourceOperation")
29 |         ctor.newInstance(parentSession, statement, confOverlay)
30 |       }
31 |       case _ => {
32 |         new SharkSQLOperation(parentSession, statement, confOverlay)
33 |       }
34 |     }
35 |   }
36 | 
37 |   def accessCtor(className : String) : Constructor[_] =  {
38 |     val setClass =  Class.forName(className)
39 |     val setConst =
40 |       setClass.getDeclaredConstructor(
41 |         classOf[HiveSession],
42 |         classOf[String],
43 |         classOf[JMap[String, String]])
44 |     setConst.setAccessible(true)
45 |     setConst
46 |   }
47 | }


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/LazySimpleSerDeWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import java.util.{List => JList, Properties}
21 | 
22 | import org.apache.hadoop.conf.Configuration
23 | import org.apache.hadoop.hive.serde2.{SerDe, SerDeStats}
24 | import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
26 | import org.apache.hadoop.io.Writable
27 | 
28 | 
29 | class LazySimpleSerDeWrapper extends SerDe {
30 | 
31 |   val _lazySimpleSerDe = new LazySimpleSerDe()
32 | 
33 |   override def initialize(conf: Configuration, tbl: Properties) {
34 |   	_lazySimpleSerDe.initialize(conf, tbl)
35 |   }
36 | 
37 |   override def deserialize(blob: Writable): Object = _lazySimpleSerDe.deserialize(blob)
38 | 
39 |   override def getSerDeStats(): SerDeStats = _lazySimpleSerDe.getSerDeStats()
40 | 
41 |   override def getObjectInspector: ObjectInspector = _lazySimpleSerDe.getObjectInspector
42 | 
43 |   override def getSerializedClass: Class[_ <: Writable] = _lazySimpleSerDe.getSerializedClass
44 | 
45 |   override def serialize(obj: Object, objInspector: ObjectInspector): Writable = {
46 |   	_lazySimpleSerDe.serialize(obj, objInspector)
47 |   }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/SortSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark
19 | 
20 | import org.apache.hadoop.io.BytesWritable
21 | 
22 | import org.scalatest.FunSuite
23 | 
24 | import org.apache.spark.SparkContext
25 | import org.apache.spark.rdd.RDD
26 | 
27 | import shark.execution.{ReduceKey, ReduceKeyMapSide, ReduceKeyReduceSide, RDDUtils}
28 | 
29 | 
30 | class SortSuite extends FunSuite {
31 | 
32 |   TestUtils.init()
33 | 
34 |   var sc: SparkContext = SharkRunner.init()
35 | 
36 |   test("order by limit") {
37 |     val data = Array((4, 14), (1, 11), (7, 17), (0, 10))
38 |     val expected = data.sortWith(_._1 < _._1).toSeq
39 |     val rdd: RDD[(ReduceKey, BytesWritable)] = sc.parallelize(data, 50).map { x =>
40 |       (new ReduceKeyMapSide(new BytesWritable(Array[Byte](x._1.toByte))),
41 |         new BytesWritable(Array[Byte](x._2.toByte)))
42 |     }
43 |     for (k <- 0 to 5) {
44 |       val sortedRdd = RDDUtils.topK(rdd, k).asInstanceOf[RDD[(ReduceKeyReduceSide, Array[Byte])]]
45 |       val output = sortedRdd.map { case(k, v) =>
46 |         (k.byteArray(0).toInt, v(0).toInt)
47 |       }.collect().toSeq
48 |       assert(output.size === math.min(k, 4))
49 |       assert(output === expected.take(math.min(k, 4)))
50 |     }
51 |   }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/parse/QueryBlock.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.parse
19 | 
20 | import org.apache.hadoop.hive.ql.parse.{QB => HiveQueryBlock}
21 | import org.apache.hadoop.hive.ql.plan.CreateTableDesc
22 | import org.apache.hadoop.hive.ql.plan.TableDesc
23 | 
24 | import shark.memstore2.CacheType
25 | import shark.memstore2.CacheType._
26 | 
27 | 
28 | /**
29 |  * A container for flags and table metadata. Used in SharkSemanticAnalyzer while parsing
30 |  * and analyzing ASTs (e.g. in SharkSemanticAnalyzer#analyzeCreateTable()).
31 |  */
32 | class QueryBlock(outerID: String, alias: String, isSubQuery: Boolean)
33 |   extends HiveQueryBlock(outerID, alias, isSubQuery) {
34 | 
35 |   // The CacheType for the table that will be created from CREATE TABLE/CTAS, or updated for an
36 |   // INSERT.
37 |   var cacheMode = CacheType.NONE
38 | 
39 |   // Descriptor for the table being updated by an INSERT.
40 |   var targetTableDesc: TableDesc = _
41 | 
42 |   // Hive's QB uses `tableDesc` to refer to the CreateTableDesc. A direct `createTableDesc`
43 |   // makes it easier to differentiate from `_targetTableDesc`.
44 |   def createTableDesc: CreateTableDesc = super.getTableDesc
45 | 
46 |   def createTableDesc_= (desc: CreateTableDesc) = super.setTableDesc(desc)
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/util/QueryRewriteUtils.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.util
19 | 
20 | import org.apache.hadoop.hive.ql.parse.SemanticException
21 | 
22 | object QueryRewriteUtils {
23 | 
24 |   def cacheToAlterTable(cmd: String): String = {
25 |     val CACHE_TABLE_DEFAULT = "(?i)CACHE ([^ ]+)".r
26 |     val CACHE_TABLE_IN = "(?i)CACHE ([^ ]+) IN ([^ ]+)".r
27 | 
28 |     cmd match {
29 |       case CACHE_TABLE_DEFAULT(tableName) =>
30 |         s"ALTER TABLE $tableName SET TBLPROPERTIES ('shark.cache' = 'memory')"
31 |       case CACHE_TABLE_IN(tableName, cacheType) =>
32 |         s"ALTER TABLE $tableName SET TBLPROPERTIES ('shark.cache' = '$cacheType')"
33 |       case _ =>
34 |         throw new SemanticException(
35 |           s"CACHE accepts a single table name: 'CACHE <table name> [IN <cache type>]'" +
36 |             s" (received command: '$cmd')")
37 |     }
38 |   }
39 | 
40 |   def uncacheToAlterTable(cmd: String): String = {
41 |     val cmdSplit = cmd.split(' ')
42 |     if (cmdSplit.size == 2) {
43 |       val tableName = cmdSplit(1)
44 |       "ALTER TABLE %s SET TBLPROPERTIES ('shark.cache' = 'false')".format(tableName)
45 |     } else {
46 |       throw new SemanticException(
47 |         s"UNCACHE accepts a single table name: 'UNCACHE <table name>' (received command: '$cmd')")
48 |     }  
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/HiveStructSerializer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hive.serde2.binarysortable
19 | 
20 | // Putting it in this package so it can access the package level visible function
21 | // static void BinarySortableSerDe.serialize(OutputByteBuffer, Object, ObjectInspector, boolean)
22 | 
23 | import java.util.{List => JList}
24 | 
25 | import org.apache.hadoop.hive.serde2.objectinspector.{StructField, StructObjectInspector}
26 | 
27 | 
28 | /**
29 |  * Used to serialize a row of data. It needs to be initialized with an object inspector
30 |  * for the row.
31 |  */
32 | class HiveStructSerializer(val rowObjectInspector: StructObjectInspector) {
33 | 
34 |   def serialize(obj: Object): Array[Byte] = {
35 |     outputByteBuffer.reset()
36 |     var i = 0
37 |     while (i < fields.size) {
38 |       BinarySortableSerDe.serialize(
39 |         outputByteBuffer,
40 |         rowObjectInspector.getStructFieldData(obj, fields.get(i)),
41 |         fields.get(i).getFieldObjectInspector(),
42 |         false)
43 |       i += 1
44 |     }
45 |     val bytes = new Array[Byte](outputByteBuffer.length)
46 |     System.arraycopy(outputByteBuffer.getData(), 0, bytes, 0, outputByteBuffer.length)
47 |     bytes
48 |   }
49 | 
50 |   private val outputByteBuffer = new OutputByteBuffer
51 |   private val fields: JList[_ <: StructField] = rowObjectInspector.getAllStructFieldRefs
52 | }
53 | 


--------------------------------------------------------------------------------
/conf/shark-env.sh.template:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # (Required) Amount of memory used per slave node. This should be in the same
19 | # format as the JVM's -Xmx option, e.g. 300m or 1g.
20 | export SPARK_MEM=1g
21 | 
22 | # (Required) Set the master program's memory
23 | export SHARK_MASTER_MEM=1g
24 | 
25 | # (Optional) Specify the location of Hive's configuration directory. By default,
26 | # Shark run scripts will point it to $SHARK_HOME/conf
27 | #export HIVE_CONF_DIR=""
28 | 
29 | # For running Shark in distributed mode, set the following:
30 | #export HADOOP_HOME=""
31 | #export SPARK_HOME=""
32 | #export MASTER=""
33 | # Only required if using Mesos:
34 | #export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so
35 | 
36 | # Only required if run shark with spark on yarn
37 | #export SHARK_EXEC_MODE=yarn
38 | #export SPARK_ASSEMBLY_JAR=
39 | #export SHARK_ASSEMBLY_JAR=
40 | 
41 | # (Optional) Extra classpath
42 | #export SPARK_LIBRARY_PATH=""
43 | 
44 | # Java options
45 | # On EC2, change the local.dir to /mnt/tmp
46 | SPARK_JAVA_OPTS=" -Dspark.local.dir=/tmp "
47 | SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10 "
48 | SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
49 | export SPARK_JAVA_OPTS
50 | 
51 | # (Optional) Tachyon Related Configuration
52 | #export TACHYON_MASTER=""                     # e.g. "localhost:19998"
53 | #export TACHYON_WAREHOUSE_PATH=/sharktables   # Could be any valid path name
54 | 
55 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/PythonTableRDD.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.api
19 | 
20 | import scala.collection.JavaConversions._
21 | 
22 | import net.razorvine.pickle.Pickler
23 | 
24 | import org.apache.spark.api.java.JavaRDD
25 | 
26 | class PythonTableRDD(
27 |     tableRDD: JavaTableRDD)
28 |   extends JavaRDD[Array[Byte]](tableRDD.rdd.mapPartitions(PythonTableRDD.javaRowToPythonRow)) {
29 |   val schema: java.util.Map[String, Int] = tableRDD.first.colname2indexMap
30 | }
31 | 
32 | /*
33 |  *  These static methods are to be called by Python to run SQL queries. sql2rdd runs the query and
34 |  *  attempts to convert the JavaTableRDD to a Python compatible RDD (an RDD of ByteArrays
35 |  *  that are pickled Python objects). We map the pickle serializer per partition to convert the Java
36 |  *  objects to python objects, and we return the resulting PythonTableRDD to the caller (presumably
37 |  *  a Python process).
38 |  */
39 | object PythonTableRDD {
40 | 
41 |   def sql2rdd(sc: JavaSharkContext, cmd: String): PythonTableRDD = {
42 |     new PythonTableRDD(sc.sql2rdd(cmd))
43 |   }
44 | 
45 |   // Pickle a row of java objects to a row of pickled python objects (byte arrays)
46 |   def javaRowToPythonRow(rows: Iterator[Row]): Iterator[Array[Byte]] = {
47 |     // Pickler is not threadsafe, so we use 1 per partition
48 |     val pickle = new Pickler
49 |     rows.map { r =>
50 |       pickle.dumps(r.toSeq.toArray)
51 |     }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/UtilsSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark
19 | 
20 | import java.util.{HashMap => JHashMap}
21 | 
22 | import org.apache.hadoop.conf.Configuration
23 | 
24 | import org.scalatest.{BeforeAndAfter, FunSuite}
25 | 
26 | 
27 | class UtilsSuite extends FunSuite {
28 | 
29 |   test("set aws credentials") {
30 |     var conf = new Configuration
31 |     var map = new JHashMap[String, String]()
32 |     Utils.setAwsCredentials(conf, map)
33 |     assert(conf.get("fs.s3n.awsAccessKeyId") === null)
34 |     assert(conf.get("fs.s3n.awsSecretAccessKey") === null)
35 |     assert(conf.get("fs.s3.awsAccessKeyId") === null)
36 |     assert(conf.get("fs.s3.awsSecretAccessKey") === null)
37 | 
38 |     map.put("AWS_ACCESS_KEY_ID", "id")
39 |     conf = new Configuration
40 |     Utils.setAwsCredentials(conf, map)
41 |     assert(conf.get("fs.s3n.awsAccessKeyId") === null)
42 |     assert(conf.get("fs.s3n.awsSecretAccessKey") === null)
43 |     assert(conf.get("fs.s3.awsAccessKeyId") === null)
44 |     assert(conf.get("fs.s3.awsSecretAccessKey") === null)
45 | 
46 |     map.put("AWS_SECRET_ACCESS_KEY", "key")
47 |     conf = new Configuration
48 |     Utils.setAwsCredentials(conf, map)
49 |     assert(conf.get("fs.s3n.awsAccessKeyId") === "id")
50 |     assert(conf.get("fs.s3n.awsSecretAccessKey") === "key")
51 |     assert(conf.get("fs.s3.awsAccessKeyId") === "id")
52 |     assert(conf.get("fs.s3.awsSecretAccessKey") === "key")
53 |   }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/HiveConfPersistenceDelegate.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package shark.execution.serialization
18 | 
19 | import java.beans.{Statement, Encoder, DefaultPersistenceDelegate}
20 | import scala.collection.JavaConversions._
21 | import org.apache.hadoop.hive.conf.HiveConf
22 | import org.apache.commons.lang.ObjectUtils
23 | 
24 | class HiveConfPersistenceDelegate extends DefaultPersistenceDelegate {
25 |   override protected def initialize(clazz: Class[_], oldInst: AnyRef, newInst: AnyRef, out: Encoder)
26 |   {
27 |     val oldConf = oldInst.asInstanceOf[HiveConf]
28 |     val newConf = newInst.asInstanceOf[HiveConf]
29 | 
30 |     if (!ObjectUtils.equals(oldConf.getAuxJars, newConf.getAuxJars)) {
31 |       out.writeStatement(new Statement(oldInst, "setAuxJars", Array(oldConf.getAuxJars)))
32 |     }
33 | 
34 |     val oldConfProps = oldConf.getAllProperties
35 |     val newConfProps = newConf.getAllProperties
36 | 
37 |     val propsToDelete = newConfProps.filter { case(k, v) => !oldConfProps.containsKey(k) }
38 |     val propsToAdd = oldConf.getAllProperties.filter { case(k, v) =>
39 |       !newConfProps.containsKey(k) || !ObjectUtils.equals(newConfProps.get(k), v)
40 |     }
41 | 
42 |     propsToDelete.foreach { case(k, v) =>
43 |       out.writeStatement(new Statement(oldInst, "unset", Array(k)))
44 |     }
45 |     propsToAdd.foreach { case(k, v) =>
46 |       out.writeStatement(new Statement(oldInst, "set", Array(k, v)))
47 |     }
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/FilterOperator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import scala.collection.Iterator
21 | import scala.reflect.BeanProperty
22 | 
23 | import org.apache.hadoop.hive.ql.exec.{ExprNodeEvaluator, ExprNodeEvaluatorFactory}
24 | import org.apache.hadoop.hive.ql.metadata.HiveException
25 | import org.apache.hadoop.hive.ql.plan.FilterDesc
26 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector
27 | 
28 | 
29 | class FilterOperator extends UnaryOperator[FilterDesc] {
30 | 
31 |   @transient var conditionEvaluator: ExprNodeEvaluator = _
32 |   @transient var conditionInspector: PrimitiveObjectInspector = _
33 | 
34 |   @BeanProperty var conf: FilterDesc = _
35 | 
36 |   override def initializeOnMaster() {
37 |     super.initializeOnMaster()
38 |     
39 |     conf = desc
40 |   }
41 | 
42 |   override def initializeOnSlave() {
43 |     try {
44 |       conditionEvaluator = ExprNodeEvaluatorFactory.get(conf.getPredicate())
45 | 
46 |       conditionInspector = conditionEvaluator.initialize(objectInspector)
47 |         .asInstanceOf[PrimitiveObjectInspector]
48 |     } catch {
49 |       case e: Throwable => throw new HiveException(e)
50 |     }
51 |   }
52 | 
53 |   override def processPartition(split: Int, iter: Iterator[_]) = {
54 |     iter.filter { row =>
55 |       java.lang.Boolean.TRUE.equals(
56 |         conditionInspector.getPrimitiveJavaObject(conditionEvaluator.evaluate(row)))
57 |     }
58 |   }
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/KryoSerializationWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution.serialization
19 | 
20 | /**
21 |  * A wrapper around some unserializable objects that make them both Java
22 |  * serializable. Internally, Kryo is used for serialization.
23 |  *
24 |  * Use KryoSerializationWrapper(value) to create a wrapper.
25 |  */
26 | class KryoSerializationWrapper[T] extends Serializable {
27 | 
28 |   @transient var value: T = _
29 | 
30 |   private var valueSerialized: Array[Byte] = _
31 | 
32 |   // The getter and setter for valueSerialized is used for XML serialization.
33 |   def getValueSerialized(): Array[Byte] = {
34 |     valueSerialized = KryoSerializer.serialize(value)
35 |     valueSerialized
36 |   }
37 | 
38 |   def setValueSerialized(bytes: Array[Byte]) = {
39 |     valueSerialized = bytes
40 |     value = KryoSerializer.deserialize[T](valueSerialized)
41 |   }
42 | 
43 |   // Used for Java serialization.
44 |   private def writeObject(out: java.io.ObjectOutputStream) {
45 |     getValueSerialized()
46 |     out.defaultWriteObject()
47 |   }
48 | 
49 |   private def readObject(in: java.io.ObjectInputStream) {
50 |     in.defaultReadObject()
51 |     setValueSerialized(valueSerialized)
52 |   }
53 | }
54 | 
55 | 
56 | object KryoSerializationWrapper {
57 |   def apply[T](value: T): KryoSerializationWrapper[T] = {
58 |     val wrapper = new KryoSerializationWrapper[T]
59 |     wrapper.value = value
60 |     wrapper
61 |   }
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/SharkServer2.scala:
--------------------------------------------------------------------------------
 1 | package shark
 2 | 
 3 | import org.apache.commons.logging.LogFactory
 4 | import org.apache.hadoop.hive.common.LogUtils
 5 | import org.apache.hadoop.hive.common.LogUtils.LogInitializationException
 6 | import org.apache.hadoop.hive.conf.HiveConf
 7 | import org.apache.hive.service.cli.thrift.ThriftCLIService
 8 | import org.apache.hive.service.server.{HiveServer2, ServerOptionsProcessor}
 9 | import org.apache.spark.SparkEnv
10 | import shark.server.SharkCLIService
11 | 
12 | object SharkServer2 extends LogHelper {
13 |   SharkEnv.init()
14 |   var sparkEnv: SparkEnv = SparkEnv.get
15 |   var LOG = LogFactory.getLog(classOf[SharkServer2])
16 | 
17 |   def main(args: Array[String]) {
18 |     try {
19 |       LogUtils.initHiveLog4j()
20 |     } catch {
21 |       case e: LogInitializationException => {
22 |         LOG.warn(e.getMessage)
23 |       }
24 |     }
25 |     val optproc = new ServerOptionsProcessor("sharkserver2") //TODO: include load RDDs
26 | 
27 |     if (!optproc.process(args)) {
28 |       LOG.fatal("Error starting SharkServer2 with given arguments")
29 |       System.exit(-1)
30 |     }
31 | 
32 |     Runtime.getRuntime.addShutdownHook(
33 |       new Thread() {
34 |         override def run() {
35 |           SharkEnv.stop()
36 |         }
37 |       }
38 |     )
39 |   }
40 | 
41 |   try {
42 |     val hiveConf = new HiveConf
43 |     SharkConfVars.initializeWithDefaults(hiveConf)
44 |     val server = new SharkServer2
45 |     server.init(hiveConf)
46 |     server.start()
47 |     logInfo("SharkServer2 started")
48 |   } catch {
49 |     case t: Throwable => {
50 |       LOG.fatal("Error starting SharkServer2", t)
51 |       System.exit(-1)
52 |     }
53 |   }
54 | }
55 | 
56 | class SharkServer2 extends HiveServer2 {
57 |   override def init(hiveConf: HiveConf) {
58 |     this.synchronized {
59 |       val sharkCLIService = new SharkCLIService
60 |       Utils.setSuperField("cliService", sharkCLIService, this)
61 |       addService(sharkCLIService)
62 |       val sthriftCLIService = new ThriftCLIService(sharkCLIService)
63 |       Utils.setSuperField("thriftCLIService", sthriftCLIService, this)
64 |       addService(sthriftCLIService)
65 |       sharkInit(hiveConf)
66 |     }
67 |   }
68 | }
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/LogHelper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark
19 | 
20 | import java.io.PrintStream
21 | 
22 | import org.apache.commons.lang.StringUtils
23 | import org.apache.hadoop.hive.ql.session.SessionState
24 | 
25 | import org.apache.spark.Logging
26 | 
27 | /**
28 |  * Utility trait for classes that want to log data. This wraps around Spark's
29 |  * Logging trait. It creates a SLF4J logger for the class and allows logging
30 |  * messages at different levels using methods that only evaluate parameters
31 |  * lazily if the log level is enabled.
32 |  *
33 |  * It differs from the Spark's Logging trait in that it can print out the
34 |  * error to the specified console of the Hive session.
35 |  */
36 | trait LogHelper extends Logging {
37 | 
38 |   override def logError(msg: => String) = {
39 |     errStream().println(msg)
40 |     super.logError(msg)
41 |   }
42 | 
43 |   def logError(msg: String, detail: String) = {
44 |     errStream().println(msg)
45 |     super.logError(msg + StringUtils.defaultString(detail))
46 |   }
47 | 
48 |   def logError(msg: String, exception: Throwable) = {
49 |     val err = errStream()
50 |     err.println(msg)
51 |     exception.printStackTrace(err)
52 |     super.logError(msg, exception)
53 |   }
54 | 
55 |   def outStream(): PrintStream = {
56 |     val ss = SessionState.get()
57 |     if (ss != null && ss.out != null) ss.out else System.out
58 |   }
59 | 
60 |   def errStream(): PrintStream = {
61 |     val ss = SessionState.get();
62 |     if (ss != null && ss.err != null) ss.err else System.err
63 |   }
64 | 
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/TerminalOperator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import java.util.Date
21 | 
22 | import scala.reflect.BeanProperty
23 | 
24 | import org.apache.hadoop.hive.conf.HiveConf
25 | import org.apache.hadoop.hive.ql.exec.{FileSinkOperator => HiveFileSinkOperator}
26 | import org.apache.hadoop.hive.ql.plan.FileSinkDesc
27 | 
28 | 
29 | /**
30 |  * File sink operator. It can accomplish one of the three things:
31 |  * - write query output to disk
32 |  * - cache query output
33 |  * - return query as RDD directly (without materializing it)
34 |  */
35 | class TerminalOperator extends UnaryOperator[FileSinkDesc] {
36 | 
37 |   // Create a local copy of hconf and hiveSinkOp so we can XML serialize it.
38 |   @BeanProperty var localHiveOp: HiveFileSinkOperator = _
39 |   @BeanProperty var localHconf: HiveConf = _
40 |   @BeanProperty val now = new Date()
41 | 
42 |   override def initializeOnMaster() {
43 |     super.initializeOnMaster()
44 |     localHconf = super.hconf
45 |     // Set parent to null so we won't serialize the entire query plan.
46 |     localHiveOp.setParentOperators(null)
47 |     localHiveOp.setChildOperators(null)
48 |     localHiveOp.setInputObjInspectors(null)
49 |   }
50 | 
51 |   override def initializeOnSlave() {
52 |     localHiveOp.initialize(localHconf, Array(objectInspector))
53 |   }
54 | 
55 |   override def processPartition(split: Int, iter: Iterator[_]): Iterator[_] = iter
56 | }
57 | 
58 | 
59 | /**
60 |  * Collect the output as a TableRDD.
61 |  */
62 | class TableRddSinkOperator extends TerminalOperator {}
63 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/Table.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import scala.collection.mutable.ArrayBuffer
21 | 
22 | import org.apache.spark.rdd.RDD
23 | 
24 | import scala.collection.mutable.Buffer
25 | 
26 | 
27 | /**
28 |  * A container for table metadata managed by Shark and Spark. Subclasses are responsible for
29 |  * how RDDs are set, stored, and accessed.
30 |  *
31 |  * @param databaseName Namespace for this table.
32 |  * @param tableName Name of this table.
33 |  * @param cacheMode Type of memory storage used for the table (e.g., the Spark block manager).
34 |  */
35 | private[shark] abstract class Table(
36 |     var databaseName: String,
37 |     var tableName: String,
38 |     var cacheMode: CacheType.CacheType) {
39 | 
40 |   /**
41 |    * A mutable wrapper for an RDD and stats for its partitions.
42 |    */
43 |   class RDDValue(
44 |   	  var rdd: RDD[TablePartition],
45 |   	  var stats: collection.Map[Int, TablePartitionStats]) {
46 | 
47 |   	def toTuple = (rdd, stats)
48 |   }
49 | }
50 | 
51 | object Table {
52 | 
53 |   /**
54 |    * Merges contents of `otherStatsMaps` into `targetStatsMap`.
55 |    */
56 |   def mergeStats(
57 |       targetStatsMap: Buffer[(Int, TablePartitionStats)],
58 |       otherStatsMap: Iterable[(Int, TablePartitionStats)]
59 |     ): Buffer[(Int, TablePartitionStats)] = {
60 |     val targetStatsMapSize = targetStatsMap.size
61 |     for ((otherIndex, tableStats) <- otherStatsMap) {
62 |       targetStatsMap.append((otherIndex + targetStatsMapSize, tableStats))
63 |     }
64 |     targetStatsMap
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/SerializableWritable.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution.serialization
19 | 
20 | import java.io._
21 | import org.apache.hadoop.io.ObjectWritable
22 | import org.apache.hadoop.io.Writable
23 | import org.apache.hadoop.mapred.JobConf
24 | import org.apache.hadoop.io.NullWritable
25 | 
26 | object SerializableWritable {
27 |   val conf = new JobConf()
28 | }
29 | 
30 | 
31 | class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable {
32 |   def value = t
33 | 
34 |   override def toString = if(null == t) "null" else t.toString
35 | 
36 |   private def writeObject(out: ObjectOutputStream) {
37 |     out.defaultWriteObject()
38 |     new ObjectWritable(if (t == null) NullWritable.get() else t).write(out)
39 |   }
40 | 
41 |   private def readObject(in: ObjectInputStream) {
42 |     in.defaultReadObject()
43 |     val ow = new ObjectWritable()
44 |     ow.setConf(SerializableWritable.conf)
45 |     ow.readFields(in)
46 |     val s = ow.get
47 |     if (s == null || s.isInstanceOf[NullWritable]) {
48 |       t = null.asInstanceOf[T]
49 |     } else {
50 |       t = s.asInstanceOf[T]
51 |     }
52 |   }
53 | 
54 |   override def hashCode(): Int = if(t == null) 0 else t.hashCode
55 | 
56 |   override def equals(other: Any) = {
57 |     if(other.isInstanceOf[SerializableWritable[_]].unary_!) {
58 |       false
59 |     } else {
60 |       val other_t = other.asInstanceOf[SerializableWritable[_]].t
61 |       if (t == null) {
62 |         other_t == null
63 |       } else {
64 |         t.equals(other_t)
65 |       }
66 |     }
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/TablePartitionIterator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import java.util.BitSet
21 | import shark.memstore2.column.ColumnIterator
22 | 
23 | 
24 | /**
25 |  * An iterator for a partition of data. Each element returns a ColumnarStruct
26 |  * that can be read by a ColumnarStructObjectInspector.
27 |  *
28 |  * @param numRows: total number of rows in this partition.
29 |  * @param columnIterators: iterators for all columns.
30 |  @ @param columnUsed: an optional bitmap indicating whether a column is used.
31 |  */
32 | class TablePartitionIterator(
33 |     val numRows: Long,
34 |     val columnIterators: Array[ColumnIterator],
35 |     val columnUsed: BitSet)
36 |   extends Iterator[ColumnarStruct] {
37 | 
38 |   def this(numRows: Long, 
39 |       columnIterators: Array[ColumnIterator]) {
40 |     this(numRows, columnIterators, TablePartitionIterator.newBitSet(columnIterators.size))
41 |   }
42 | 
43 |   private val _struct = new ColumnarStruct(columnIterators)
44 | 
45 |   private var _position: Long = 0
46 | 
47 |   def hasNext: Boolean = _position < numRows
48 | 
49 |   def next(): ColumnarStruct = {
50 |     _position += 1
51 |     var i = columnUsed.nextSetBit(0)
52 |     while (i > -1) {
53 |       columnIterators(i).next()
54 |       i = columnUsed.nextSetBit(i + 1)
55 |     }
56 |     _struct
57 |   }
58 | }
59 | 
60 | object TablePartitionIterator {
61 |   
62 |   def newBitSet(numCols: Int): BitSet = {
63 |     val b = new BitSet(numCols)
64 |     var i = numCols 
65 |     while (i > 0) {
66 |       i -= 1
67 |       b.set(i, true)
68 |     }
69 |     b
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/bin/dev/build_test.xml:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  Copyright (C) 2012 The Regents of The University California.
 3 |  All rights reserved.
 4 | 
 5 |  Licensed under the Apache License, Version 2.0 (the "License");
 6 |  you may not use this file except in compliance with the License.
 7 |  You may obtain a copy of the License at
 8 | 
 9 |       http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |  Unless required by applicable law or agreed to in writing, software
12 |  distributed under the License is distributed on an "AS IS" BASIS,
13 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  See the License for the specific language governing permissions and
15 |  limitations under the License.
16 | !-->
17 | 
18 | <!-- This file is used to launch junit tests using ant, which is essential -->
19 | <!-- in Shark's continuous integration deployment.                         -->
20 | <project name="shark" default="test">
21 | 
22 |   <property environment="env"/>
23 | 
24 |   <target name="test" description="Run tests">
25 |     <mkdir dir="test-results"/>
26 |     <junit showoutput="true" printsummary="yes" haltonfailure="no"
27 |       maxmemory="2g" dir="../" timeout="18200000"
28 |       errorProperty="tests.failed" failureProperty="tests.failed" filtertrace="off" fork="no">
29 | 
30 |       <sysproperty key="user.dir" value="${user.dir}"/>
31 |       <sysproperty key="test.tmp.dir" value="${test.tmp.dir}"/>
32 |       <sysproperty key="test.warehouse.dir" value="${test.warehouse.dir}"/>
33 |       <sysproperty key="test.log.dir" value="${test.log.dir}"/>
34 |       <sysproperty key="test.src.data.dir" value="${test.src.data.dir}"/>
35 |       <sysproperty key="build.dir" value="${build.dir}"/>
36 |       <sysproperty key="build.ivy.lib.dir" value="${build.ivy.lib.dir}"/>
37 |       <sysproperty key="build.dir.hive" value="${build.dir.hive}"/>
38 |       <sysproperty key="test.output.overwrite" value="${test.output.overwrite}"/>
39 |       <sysproperty key="test.user.dir" value="${user.dir}"/>
40 |       <sysproperty key="log4j.configuration" value="${log4j.configuration}"/>
41 | 
42 |       <formatter type="xml" usefile="true" />
43 |       <classpath path="${env.CLASSPATH}"/>
44 |       <assertions> <enable /> </assertions>
45 |       <test name="shark.TestSharkCliDriver" todir="test-results" />
46 |     </junit>
47 |   </target>
48 | </project>
49 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/repl/SharkILoop.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.repl
19 | 
20 | import java.io.PrintWriter
21 | 
22 | import org.apache.spark.{SparkContext, SparkEnv}
23 | import org.apache.spark.repl.SparkILoop
24 | 
25 | import shark.{SharkContext, SharkEnv}
26 | 
27 | 
28 | /**
29 |  * Add more Shark specific initializations.
30 |  */
31 | class SharkILoop extends SparkILoop(None, new PrintWriter(Console.out, true), None) {
32 | 
33 |   override def initializeSpark() {
34 |     // Note: shark.SharkEnv.initWithSharkContext must be invoked after spark.repl.Main.interp
35 |     // is used because the slaves' executors depend on the environmental variable
36 |     // "spark.repl.class.uri" set to invoke Spark's ExecutorClassLoader.
37 |     intp.beQuietDuring {
38 |       command("""
39 |         org.apache.spark.repl.Main.interp.out.println("Creating SparkContext...");
40 |         org.apache.spark.repl.Main.interp.out.flush();
41 |         shark.SharkEnv.initWithSharkContext("shark-shell");
42 |         @transient val sparkContext = shark.SharkEnv.sc;
43 |         org.apache.spark.repl.Main.interp.sparkContext = sparkContext;
44 |         @transient val sc = sparkContext.asInstanceOf[shark.SharkContext];
45 |         org.apache.spark.repl.Main.interp.out.println("Shark context available as sc.");
46 |         import sc._;
47 |         def s = sql2console _;
48 |         org.apache.spark.repl.Main.interp.out.flush();
49 |         """)
50 |       command("import org.apache.spark.SparkContext._");
51 |     }
52 |     Console.println("Type in expressions to have them evaluated.")
53 |     Console.println("Type :help for more information.")
54 |     Console.flush()
55 |   }
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/column/NullableColumnIterator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2.column
19 | 
20 | import java.nio.ByteBuffer
21 | import java.nio.ByteOrder
22 | 
23 | /**
24 |  * Reads a nullable column. Expects the byte buffer to contain as first element
25 |  * the null count, followed by the null indices, and finally the non nulls.
26 |  * Reading of non nulls is delegated by setting the buffer position to the first
27 |  * non null.
28 |  */
29 | class NullableColumnIterator(buffer: ByteBuffer) extends ColumnIterator {
30 |   private var _d: ByteBuffer = _
31 |   private var _nullCount: Int = _
32 |   private var _nulls = 0
33 | 
34 |   private var _isNull = false
35 |   private var _currentNullIndex: Int = _
36 |   private var _pos = 0
37 | 
38 |   private var _delegate: ColumnIterator = _
39 | 
40 |   override def init() {
41 |     _d = buffer.duplicate()
42 |     _d.order(ByteOrder.nativeOrder())
43 |     _nullCount = _d.getInt()
44 |     _currentNullIndex = if (_nullCount > 0) _d.getInt() else Integer.MAX_VALUE
45 |     _pos = 0
46 | 
47 |     // Move the buffer position to the non-null region.
48 |     buffer.position(buffer.position() + 4 + _nullCount * 4)
49 |     _delegate = ColumnIterator.newNonNullIterator(buffer)
50 |   }
51 | 
52 |   override def next() {
53 |     if (_pos == _currentNullIndex) {
54 |       _nulls += 1
55 |       if (_nulls < _nullCount) {
56 |         _currentNullIndex = _d.getInt()
57 |       }
58 |       _isNull = true
59 |     } else {
60 |       _isNull = false
61 |       _delegate.next()
62 |     }
63 |     _pos += 1
64 |   }
65 | 
66 |   override def hasNext: Boolean = (_nulls < _nullCount) || _delegate.hasNext
67 | 
68 |   def current: Object = if (_isNull) null else _delegate.current
69 | }
70 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/CliSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark
19 | 
20 | import java.io.{BufferedReader, File, InputStreamReader, PrintWriter}
21 | import org.scalatest.{BeforeAndAfterAll, FunSuite}
22 | 
23 | 
24 | /**
25 |  * Test the Shark CLI.
26 |  */
27 | class CliSuite extends FunSuite with BeforeAndAfterAll with TestUtils {
28 | 
29 |   val WAREHOUSE_PATH = TestUtils.getWarehousePath("cli")
30 |   val METASTORE_PATH = TestUtils.getMetastorePath("cli")
31 | 
32 |   override def beforeAll() {
33 |     val pb = new ProcessBuilder(
34 |       "./bin/shark",
35 |       "-hiveconf",
36 |       "javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=" + METASTORE_PATH + ";create=true",
37 |       "-hiveconf",
38 |       "hive.metastore.warehouse.dir=" + WAREHOUSE_PATH)
39 | 
40 |     process = pb.start()
41 |     outputWriter = new PrintWriter(process.getOutputStream, true)
42 |     inputReader = new BufferedReader(new InputStreamReader(process.getInputStream))
43 |     errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream))
44 |     waitForOutput(inputReader, "shark>")
45 |   }
46 | 
47 |   override def afterAll() {
48 |     process.destroy()
49 |     process.waitFor()
50 |   }
51 | 
52 |   test("simple select") {
53 |     val dataFilePath = TestUtils.dataFilePath + "/kv1.txt"
54 |     executeQuery("create table shark_test1(key int, val string);")
55 |     executeQuery("load data local inpath '" + dataFilePath+ "' overwrite into table shark_test1;")
56 |     executeQuery("""create table shark_test1_cached TBLPROPERTIES ("shark.cache" = "true") as
57 |       select * from shark_test1;""")
58 |     val out = executeQuery("select * from shark_test1_cached where key = 407;")
59 |     assert(out.contains("val_407"))
60 |   }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/parse/SharkExplainSemanticAnalyzer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.parse
19 | 
20 | import java.io.Serializable
21 | import java.util.ArrayList
22 | 
23 | import org.apache.hadoop.fs.Path
24 | import org.apache.hadoop.hive.conf.HiveConf
25 | import org.apache.hadoop.hive.ql.exec._
26 | import org.apache.hadoop.hive.ql.parse._
27 | 
28 | import shark.execution.SharkExplainWork
29 | 
30 | 
31 | class SharkExplainSemanticAnalyzer(conf: HiveConf) extends ExplainSemanticAnalyzer(conf) {
32 | 
33 |   var sem: BaseSemanticAnalyzer = null
34 | 
35 |   /**
36 |    * This is basically the same as Hive's except we invoke
37 |    * SharkSemanticAnalyzerFactory. We need to do this to get
38 |    * SharkSemanticAnalyzer for SELECT and CTAS queries.
39 |    */
40 |   override def analyzeInternal(ast: ASTNode): Unit = {
41 |     ctx.setExplain(true)
42 |     
43 |     // Create a semantic analyzer for the query
44 |     val childNode = ast.getChild(0).asInstanceOf[ASTNode]
45 |     sem = SharkSemanticAnalyzerFactory.get(conf, childNode)
46 |     sem.analyze(childNode, ctx)
47 | 
48 |     val extended = (ast.getChildCount() > 1)
49 | 
50 |     ctx.setResFile(new Path(ctx.getLocalTmpFileURI()))
51 |     var tasks = sem.getRootTasks()
52 |     val fetchTask = sem.getFetchTask()
53 |     if (tasks == null) {
54 |       if (fetchTask != null) {
55 |         tasks = new ArrayList[Task[_ <: Serializable]]();
56 |         tasks.add(fetchTask)
57 |       }
58 |     } else if (fetchTask != null) {
59 |       tasks.add(fetchTask)
60 |     }
61 | 
62 |     val task = TaskFactory.get(
63 |       new SharkExplainWork(ctx.getResFile().toString(), tasks, childNode.toStringTree(), 
64 |         sem.getInputs(), extended), conf)
65 | 
66 |     rootTasks.add(task)
67 |   }
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/column/NullableColumnBuilder.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2.column
19 | 
20 | import java.nio.ByteBuffer
21 | import java.nio.ByteOrder
22 | 
23 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
24 | 
25 | 
26 | /**
27 |  * Builds a nullable column. The byte buffer of a nullable column contains:
28 |  * - 4 bytes for the null count (number of nulls)
29 |  * - positions for each null, in ascending order
30 |  * - the non-null data (column data type, compression type, data...)
31 |  */
32 | trait NullableColumnBuilder[T] extends ColumnBuilder[T] {
33 | 
34 |   private var _nulls: ByteBuffer = _
35 | 
36 |   private var _pos: Int = _
37 |   private var _nullCount: Int = _
38 | 
39 |   override def initialize(initialSize: Int, cName: String): ByteBuffer = {
40 |     _nulls = ByteBuffer.allocate(1024)
41 |     _nulls.order(ByteOrder.nativeOrder())
42 |     _pos = 0
43 |     _nullCount = 0
44 |     super.initialize(initialSize, cName)
45 |   }
46 | 
47 |   override def append(o: Object, oi: ObjectInspector) {
48 |     if (o == null) {
49 |       _nulls = growIfNeeded(_nulls, 4)
50 |       _nulls.putInt(_pos)
51 |       _nullCount += 1
52 |     } else {
53 |       super.append(o, oi)
54 |     }
55 |     _pos += 1
56 |   }
57 | 
58 |   override def build(): ByteBuffer = {
59 |     val nonNulls = super.build()
60 |     val nullDataLen = _nulls.position()
61 |     _nulls.limit(nullDataLen)
62 |     _nulls.rewind()
63 | 
64 |     // 4 bytes for null count + null positions + non nulls
65 |     val newBuffer = ByteBuffer.allocate(4 + nullDataLen + nonNulls.limit)
66 |     newBuffer.order(ByteOrder.nativeOrder())
67 |     newBuffer.putInt(_nullCount).put(_nulls).put(nonNulls)
68 |     newBuffer.rewind()
69 |     newBuffer
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/server/SharkSQLOperation.scala:
--------------------------------------------------------------------------------
 1 | package shark.server
 2 | 
 3 | import java.util.{Map => JMap}
 4 | import org.apache.hadoop.hive.ql.parse.VariableSubstitution
 5 | import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse
 6 | import org.apache.hive.service.cli.{HiveSQLException, OperationState, TableSchema}
 7 | import org.apache.hive.service.cli.operation.SQLOperation
 8 | import org.apache.hive.service.cli.session.HiveSession
 9 | import shark.{SharkDriver, Utils}
10 | 
11 | class SharkSQLOperation(
12 |     parentSession: HiveSession,
13 |     statement: String,
14 |     confOverlay: JMap[String, String])
15 |   extends SQLOperation(parentSession, statement, confOverlay) {
16 | 
17 |   private val sdriver = {
18 |     val d = new SharkDriver(getParentSession.getHiveConf)
19 |     d.init()
20 |     d
21 |   }
22 | 
23 |   override def run() {
24 |     setState(OperationState.RUNNING)
25 |     Utils.setSuperField("driver", sdriver, this)
26 |     var response: Option[CommandProcessorResponse] = None
27 |     sdriver.setTryCount(Integer.MAX_VALUE) //maybe useless?
28 |     var subStatement = ""
29 |     try {
30 |       //duplicate: this is also done when Driver compiles command
31 |       subStatement = new VariableSubstitution().substitute(getParentSession.getHiveConf, statement)
32 |     } catch {
33 |       case e: IllegalStateException => {
34 |         setState(OperationState.ERROR)
35 |         throw new HiveSQLException
36 |       }
37 |     }
38 | 
39 |     response = Option(sdriver.run(subStatement))
40 |     response match {
41 |       case Some(resp: CommandProcessorResponse) => {
42 |         val code = resp.getResponseCode
43 |         if (code != 0) {
44 |           setState(OperationState.ERROR)
45 |           throw new HiveSQLException("Error while processing statement: "
46 |             + resp.getErrorMessage, resp.getSQLState, code)
47 |         }
48 |       }
49 |       case None => {
50 |         setState(OperationState.ERROR)
51 |         throw new HiveSQLException
52 |       }
53 |     }
54 | 
55 |     val mResultSchema = sdriver.getSchema
56 |     Utils.setSuperField("mResultSchema", mResultSchema, this)
57 |     if (mResultSchema != null && mResultSchema.isSetFieldSchemas) {
58 |       val resultSchema = new TableSchema(mResultSchema)
59 |       Utils.setSuperField("resultSchema", resultSchema, this)
60 |       setHasResultSet(true)
61 |     } else {
62 |       setHasResultSet(false)
63 |     }
64 |     setState(OperationState.FINISHED)
65 |   }
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/TablePartitionBuilder.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import java.io.{DataInput, DataOutput}
21 | 
22 | import scala.collection.JavaConversions._
23 | 
24 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
25 | import org.apache.hadoop.hive.serde2.objectinspector.StructField
26 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
27 | import org.apache.hadoop.io.Writable
28 | 
29 | import shark.memstore2.column.ColumnBuilder
30 | 
31 | 
32 | /**
33 |  * Used to build a TablePartition. This is used in the serializer to convert a
34 |  * partition of data into columnar format and to generate a TablePartition.
35 |  */
36 | class TablePartitionBuilder(
37 |     oi: StructObjectInspector,
38 |     initialColumnSize: Int,
39 |     shouldCompress: Boolean = true)
40 |   extends Writable {
41 | 
42 |   private var numRows: Long = 0
43 |   val fields: Seq[_ <: StructField] = oi.getAllStructFieldRefs
44 | 
45 |   val columnBuilders = Array.tabulate[ColumnBuilder[_]](fields.size) { i =>
46 |     val columnBuilder = ColumnBuilder.create(fields(i), shouldCompress)
47 |     columnBuilder.initialize(initialColumnSize, fields(i).getFieldName)
48 |     columnBuilder
49 |   }
50 | 
51 |   def incrementRowCount() {
52 |     numRows += 1
53 |   }
54 | 
55 |   def append(columnIndex: Int, o: Object, oi: ObjectInspector) {
56 |     columnBuilders(columnIndex).append(o, oi)
57 |   }
58 | 
59 |   def stats: TablePartitionStats = new TablePartitionStats(columnBuilders.map(_.stats), numRows)
60 | 
61 |   def build(): TablePartition = new TablePartition(numRows, columnBuilders.map(_.build()))
62 | 
63 |   // We don't use these, but want to maintain Writable interface for SerDe
64 |   override def write(out: DataOutput) {}
65 |   override def readFields(in: DataInput) {}
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/HiveStructDeserializer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.hadoop.hive.serde2.binarysortable
19 | 
20 | // Putting it in this package so it can access the package level visible function
21 | // static void BinarySortableSerDe.serialize(OutputByteBuffer, Object, ObjectInspector, boolean)
22 | 
23 | import java.io.IOException
24 | import java.util.{ArrayList => JArrayList}
25 | 
26 | import org.apache.hadoop.hive.serde2.SerDeException
27 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
28 | import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, TypeInfoUtils}
29 | 
30 | 
31 | /**
32 |  * Used to deserialize a row of data. It needs to be initialized with an object inspector
33 |  * for the row.
34 |  */
35 | class HiveStructDeserializer(val rowObjectInspector: StructObjectInspector) {
36 | 
37 |   def deserialize(bytes: Array[Byte]): JArrayList[Object] = {
38 |     inputByteBuffer.reset(bytes, 0, bytes.length)
39 |     try {
40 |       var i = 0
41 |       while (i < types.size) {
42 |         reusedRow.set(i,
43 |           BinarySortableSerDe.deserialize(inputByteBuffer, types(i), false, reusedRow.get(i)))
44 |         i += 1
45 |       }
46 |     } catch{
47 |       case e: IOException =>  throw new SerDeException(e)
48 |     }
49 |     reusedRow
50 |   }
51 | 
52 |   private val inputByteBuffer = new InputByteBuffer
53 |   private val types = Array.tabulate[TypeInfo](rowObjectInspector.getAllStructFieldRefs.size) { i =>
54 |     TypeInfoUtils.getTypeInfoFromObjectInspector(
55 |       rowObjectInspector.getAllStructFieldRefs.get(i).getFieldObjectInspector)
56 |   }
57 | 
58 |   private val reusedRow: JArrayList[Object] = {
59 |     val row = new JArrayList[Object](rowObjectInspector.getAllStructFieldRefs.size())
60 |     (0 until rowObjectInspector.getAllStructFieldRefs.size).foreach(i => row.add(null))
61 |     row
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/optimizer/SharkMapJoinProcessor.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.optimizer
19 | 
20 | import java.util.{LinkedHashMap => JavaLinkedHashMap}
21 | 
22 | import org.apache.hadoop.hive.ql.exec.{MapJoinOperator, JoinOperator, Operator}
23 | import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor
24 | import org.apache.hadoop.hive.ql.parse.{ParseContext, QBJoinTree, OpParseContext}
25 | import org.apache.hadoop.hive.ql.plan.OperatorDesc
26 | import org.apache.hadoop.hive.conf.HiveConf
27 | 
28 | class SharkMapJoinProcessor extends MapJoinProcessor {
29 | 
30 |   /**
31 |    * Override generateMapJoinOperator to bypass the step of validating Map Join hints int Hive.
32 |    */
33 |   override def generateMapJoinOperator(
34 |       pctx: ParseContext,
35 |       op: JoinOperator,
36 |       joinTree: QBJoinTree,
37 |       mapJoinPos: Int): MapJoinOperator = {
38 |     val hiveConf: HiveConf = pctx.getConf
39 |     val noCheckOuterJoin: Boolean =
40 |       HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN) &&
41 |       HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)
42 | 
43 |     val opParseCtxMap: JavaLinkedHashMap[Operator[_ <: OperatorDesc], OpParseContext] =
44 |       pctx.getOpParseCtx
45 | 
46 |     // Explicitly set validateMapJoinTree to false to bypass the step of validating
47 |     // Map Join hints in Hive.
48 |     val validateMapJoinTree = false
49 |     val mapJoinOp: MapJoinOperator =
50 |       MapJoinProcessor.convertMapJoin(
51 |         opParseCtxMap, op, joinTree, mapJoinPos, noCheckOuterJoin, validateMapJoinTree)
52 | 
53 |     // Hive originally uses genSelectPlan to insert an dummy select after the MapJoinOperator.
54 |     // We should not need this step.
55 |     // create a dummy select to select all columns
56 |     // MapJoinProcessor.genSelectPlan(pctx, mapJoinOp)
57 | 
58 |     return mapJoinOp
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/SelectOperator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import scala.collection.JavaConversions._
21 | import scala.reflect.BeanProperty
22 | 
23 | import org.apache.hadoop.hive.ql.exec.{ExprNodeEvaluator, ExprNodeEvaluatorFactory}
24 | import org.apache.hadoop.hive.ql.plan.SelectDesc
25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
26 | 
27 | 
28 | /**
29 |  * An operator that does projection, i.e. selecting certain columns and
30 |  * filtering out others.
31 |  */
32 | class SelectOperator extends UnaryOperator[SelectDesc] {
33 | 
34 |   @BeanProperty var conf: SelectDesc = _
35 | 
36 |   @transient var evals: Array[ExprNodeEvaluator] = _
37 | 
38 |   override def initializeOnMaster() {
39 |     super.initializeOnMaster()
40 |     conf = desc
41 |     initializeEvals(false)
42 |   }
43 |   
44 |   def initializeEvals(initializeEval: Boolean) {
45 |     if (!conf.isSelStarNoCompute) {
46 |       evals = conf.getColList().map(ExprNodeEvaluatorFactory.get(_)).toArray
47 |       if (initializeEval) {
48 |         evals.foreach(_.initialize(objectInspector))
49 |       }
50 |     }
51 |   }
52 | 
53 |   override def initializeOnSlave() {
54 |     initializeEvals(true)
55 |   }
56 | 
57 |   override def processPartition(split: Int, iter: Iterator[_]) = {
58 |     if (conf.isSelStarNoCompute) {
59 |       iter
60 |     } else {
61 |       val reusedRow = new Array[Object](evals.length)
62 |       iter.map { row =>
63 |         var i = 0
64 |         while (i < evals.length) {
65 |           reusedRow(i) = evals(i).evaluate(row)
66 |           i += 1
67 |         }
68 |         reusedRow
69 |       }
70 |     }
71 |   }
72 |   
73 |   override def outputObjectInspector(): ObjectInspector = {
74 |     if (conf.isSelStarNoCompute()) {
75 |       super.outputObjectInspector()
76 |     } else {
77 |       initEvaluatorsAndReturnStruct(evals, conf.getOutputColumnNames(), objectInspector)
78 |     }
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/CacheType.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import shark.LogHelper
21 | 
22 | 
23 | /*
24 |  * Enumerations and static helper functions for caches supported by Shark.
25 |  */
26 | object CacheType extends Enumeration with LogHelper {
27 | 
28 |   /*
29 |    * The CacheTypes:
30 |    * - MEMORY: Stored in memory and on disk (i.e., cache is write-through). Persistent across Shark
31 |    *           sessions. By default, all such tables are reloaded into memory on restart.
32 |    * - MEMORY_ONLY: Stored only in memory and dropped at the end of each Shark session.
33 |    * - OFFHEAP: Stored in an off-heap data storage format, specified by the System property
34 |    *            'shark.offheap.clientFactory'. Defaults to TachyonStorageClientFactory.
35 |    * - NONE: Stored on disk (e.g., HDFS) and managed by Hive.
36 |    */
37 |   type CacheType = Value
38 |   val MEMORY, MEMORY_ONLY, OFFHEAP, NONE = Value
39 | 
40 |   def shouldCache(c: CacheType): Boolean = (c != NONE)
41 | 
42 |   /** Get the cache type object from a string representation. */
43 |   def fromString(name: String): CacheType = Option(name).map(_.toUpperCase) match {
44 |     case None | Some("") | Some("FALSE") => NONE
45 |     case Some("TRUE") => MEMORY
46 |     case Some("HEAP") =>
47 |       logWarning("The 'HEAP' cache type name is deprecated. Use 'MEMORY' instead.")
48 |       MEMORY
49 |     case Some("TACHYON") =>
50 |       logWarning("The 'TACHYON' cache type name is deprecated. Use 'OFFHEAP' instead.")
51 |       OFFHEAP
52 |     case _ => {
53 |       try {
54 |         // Try to use Scala's Enumeration::withName() to interpret 'name'.
55 |         withName(name.toUpperCase)
56 |       } catch {
57 |         case e: java.util.NoSuchElementException => throw new InvalidCacheTypeException(name)
58 |       }
59 |     }
60 |   }
61 | 
62 |   class InvalidCacheTypeException(name: String)
63 |     extends Exception("Invalid string representation of cache type: '%s'".format(name))
64 | }
65 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/execution/HiveStructSerializerSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution
19 | 
20 | import java.util.{ArrayList => JArrayList}
21 | 
22 | import scala.collection.JavaConversions._
23 | 
24 | import org.apache.hadoop.hive.serde2.binarysortable.{HiveStructSerializer, HiveStructDeserializer}
25 | import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector,
26 |   ObjectInspectorFactory, StandardListObjectInspector, StandardMapObjectInspector,
27 |   StructObjectInspector}
28 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.{PrimitiveObjectInspectorUtils,
29 |   PrimitiveObjectInspectorFactory}
30 | import org.apache.hadoop.io.{IntWritable, LongWritable, Text}
31 | 
32 | import org.scalatest.FunSuite
33 | 
34 | 
35 | class HiveStructSerializerSuite extends FunSuite {
36 | 
37 |   test("Testing serializing a simple row") {
38 |     val row1 = createRow(1, "test1")
39 |     val row2 = createRow(2, "test2")
40 |     val ser = new HiveStructSerializer(createObjectInspector)
41 |     val deser = new HiveStructDeserializer(createObjectInspector)
42 |     val deserRow1 = deser.deserialize(ser.serialize(row1))
43 |     assert(row1.get(0).equals(deserRow1.get(0)))
44 |     assert(row1.get(1).equals(deserRow1.get(1)))
45 |   }
46 | 
47 |   def createObjectInspector(): StructObjectInspector = {
48 |     val names = List("a", "b")
49 |     val ois = List(
50 |       createPrimitiveOi(classOf[java.lang.Integer]),
51 |       createPrimitiveOi(classOf[String]))
52 |     ObjectInspectorFactory.getStandardStructObjectInspector(names, ois)
53 |   }
54 | 
55 |   def createRow(v1: Int, v2: String): JArrayList[Object] = {
56 |     val row = new JArrayList[Object](2)
57 |     row.add(new IntWritable(v1))
58 |     row.add(new Text(v2))
59 |     row
60 |   }
61 | 
62 |   def createPrimitiveOi(javaClass: Class[_]): PrimitiveObjectInspector =
63 |     PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
64 |       PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(javaClass).primitiveCategory)
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/OperatorSerializationWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California. 
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution.serialization
19 | 
20 | import shark.execution.HiveDesc
21 | import shark.execution.Operator
22 | 
23 | 
24 | /**
25 |  * A wrapper around our operators so they can be serialized by standard Java
26 |  * serialization. This really just delegates the serialization of the operators
27 |  * to XML, and that of object inspectors to Kryo.
28 |  *
29 |  * Use OperatorSerializationWrapper(operator) to create a wrapper.
30 |  */
31 | class OperatorSerializationWrapper[T <: Operator[_ <: HiveDesc]]
32 |   extends Serializable with shark.LogHelper {
33 | 
34 |   /** The operator we are going to serialize. */
35 |   @transient var _value: T = _
36 | 
37 |   /** The operator serialized by the XMLEncoder, minus the object inspectors. */
38 |   var opSerialized: Array[Byte] = _
39 | 
40 |   /** The object inspectors, serialized by Kryo. */
41 |   var objectInspectorsSerialized: Array[Byte] = _
42 | 
43 |   def value: T = {
44 |     if (_value == null) {
45 |       assert(opSerialized != null)
46 |       assert(opSerialized.length > 0)
47 |       assert(objectInspectorsSerialized != null)
48 |       assert(objectInspectorsSerialized.length > 0)
49 |       _value = XmlSerializer.deserialize[T](opSerialized)
50 |       _value.objectInspectors = KryoSerializer.deserialize(objectInspectorsSerialized)
51 |     }
52 |     _value
53 |   }
54 | 
55 |   def value_= (v: T):Unit = {
56 |     _value = v
57 |     opSerialized = XmlSerializer.serialize(value, v.hconf)
58 |     objectInspectorsSerialized = KryoSerializer.serialize(value.objectInspectors)
59 |   }
60 | 
61 |   override def toString(): String = {
62 |     if (value != null) {
63 |       "OperatorSerializationWrapper[ " + value.toString() + " ]"
64 |     } else {
65 |       super.toString()
66 |     }
67 |   }
68 | }
69 | 
70 | 
71 | object OperatorSerializationWrapper {
72 |   def apply[T <: Operator[_ <: HiveDesc]](value: T): OperatorSerializationWrapper[T] = {
73 |     val wrapper = new OperatorSerializationWrapper[T]
74 |     wrapper.value = value
75 |     wrapper
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/TableRecovery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import java.util.{HashMap => JavaHashMap}
21 | 
22 | import scala.collection.JavaConversions.asScalaBuffer
23 | 
24 | import org.apache.hadoop.hive.ql.metadata.Hive
25 | import org.apache.hadoop.hive.ql.session.SessionState
26 | 
27 | import shark.{LogHelper, SharkEnv}
28 | import shark.util.QueryRewriteUtils
29 | 
30 | /**
31 |  * Singleton used to reload RDDs upon server restarts.
32 |  */
33 | object TableRecovery extends LogHelper {
34 | 
35 |   val db = Hive.get()
36 | 
37 |   /**
38 |    * Loads any cached tables with MEMORY as its `shark.cache` property.
39 |    * @param cmdRunner The runner that is responsible for taking a cached table query and
40 |    *        a) Creating the table metadata in Hive Meta Store
41 |    *        b) Loading the table as an RDD in memory
42 |    *        @see SharkServer for an example usage.
43 |    * @param console Optional SessionState.LogHelper used, if present, to log information about
44 |             the tables that get reloaded.
45 |    */
46 |   def reloadRdds(cmdRunner: String => Unit, console: Option[SessionState.LogHelper] = None) {
47 |     // Filter for tables that should be reloaded into the cache.
48 |     val currentDbName = db.getCurrentDatabase()
49 |     for (databaseName <- db.getAllDatabases(); tableName <- db.getAllTables(databaseName)) {
50 |       val hiveTable = db.getTable(databaseName, tableName)
51 |       val tblProps = hiveTable.getParameters
52 |       val cacheMode = CacheType.fromString(tblProps.get(SharkTblProperties.CACHE_FLAG.varname))
53 |       if (cacheMode == CacheType.MEMORY) {
54 |         val logMessage = "Reloading %s.%s into memory.".format(databaseName, tableName)
55 |         if (console.isDefined) {
56 |           console.get.printInfo(logMessage)
57 |         } else {
58 |           logInfo(logMessage)
59 |         }
60 |         val cmd = QueryRewriteUtils.cacheToAlterTable("CACHE %s".format(tableName))
61 |         cmdRunner(s"use $databaseName")
62 |         cmdRunner(cmd)
63 |       }
64 |     }
65 |     db.setCurrentDatabase(currentDbName)
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/TableRDD.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.api
19 | 
20 | import java.util.{List => JList}
21 | 
22 | import org.apache.hadoop.hive.metastore.api.FieldSchema
23 | import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, StructObjectInspector}
24 | 
25 | import shark.execution.serialization.KryoSerializer
26 | 
27 | import org.apache.spark.{Partition, TaskContext}
28 | import org.apache.spark.rdd.RDD
29 | 
30 | 
31 | class TableRDD(
32 |     prev: RDD[Any],
33 |     val schema: Array[ColumnDesc],
34 |     @transient oi: ObjectInspector,
35 |     val limit: Int = -1)
36 |   extends RDD[Row](prev) {
37 | 
38 |   private[shark]
39 |   def this(prev: RDD[Any], schema: JList[FieldSchema], oi: ObjectInspector, limit: Int) {
40 |     this(prev, ColumnDesc.createSchema(schema), oi, limit)
41 |   }
42 | 
43 |   override def getPartitions = firstParent[Any].partitions
44 | 
45 |   override def compute(split: Partition, context: TaskContext): Iterator[Row] = {
46 |     val structOi = initObjectInspector()
47 |     firstParent[Any].iterator(split, context).map { rowData =>
48 |       new Row(rowData, colname2indexMap, structOi)
49 |     }
50 |   }
51 | 
52 |   /**
53 |    * ObjectInspector is not Java serializable. We serialize it using Kryo and
54 |    * and save it as a byte array. On slave nodes, we deserialize this byte
55 |    * array to obtain the ObjectInspector object.
56 |    */
57 |   private val serializedObjectInspector: Array[Byte] = KryoSerializer.serialize(oi)
58 | 
59 |   /**
60 |    * Maps the column name to column index.
61 |    */
62 |   private val colname2indexMap: Map[String, Int] =
63 |     collection.immutable.Map() ++ schema.zipWithIndex.map { case(column, index) =>
64 |       (column.name, index)
65 |     }
66 | 
67 |   /**
68 |    * Initialize object inspector from the serializedObjectInspector.
69 |    */
70 |   private def initObjectInspector(): StructObjectInspector = {
71 |     val oi = KryoSerializer.deserialize[ObjectInspector](serializedObjectInspector)
72 |     oi match {
73 |       case soi: StructObjectInspector => soi
74 |       case _ => throw new Exception("Only basic StructObjectInspector is supposed.")
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/optimizer/SharkOptimizer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.optimizer
19 | 
20 | import java.util.{List => JavaList}
21 | 
22 | import org.apache.hadoop.hive.ql.optimizer.JoinReorder
23 | import org.apache.hadoop.hive.ql.optimizer.{Optimizer => HiveOptimizer, 
24 |   SimpleFetchOptimizer, Transform, MapJoinProcessor => HiveMapJoinProcessor}
25 | import org.apache.hadoop.hive.ql.parse.ParseContext
26 | import shark.LogHelper
27 | 
28 | class SharkOptimizer extends HiveOptimizer with LogHelper {
29 | 
30 |   /**
31 |    * Override Hive optimizer to skip SimpleFetchOptimizer, which is designed 
32 |    * to let Hive avoid launching MR jobs on simple queries, but rewrites the 
33 |    * query plan in a way that is inconvenient for Shark (replaces the FS operator 
34 |    * with a non-terminal ListSink operator).
35 |    */
36 |   override def optimize(): ParseContext  = {
37 | 
38 |     // Use reflection to make some private members accessible.
39 |     val transformationsField = classOf[HiveOptimizer].getDeclaredField("transformations")
40 |     val pctxField = classOf[HiveOptimizer].getDeclaredField("pctx")
41 |     pctxField.setAccessible(true)
42 |     transformationsField.setAccessible(true)
43 |     val transformations = transformationsField.get(this).asInstanceOf[JavaList[Transform]]
44 |     var pctx = pctxField.get(this).asInstanceOf[ParseContext]
45 | 
46 |     // Invoke each optimizer transformation
47 |     val it = transformations.iterator
48 |     while (it.hasNext()) {
49 |       val transformation = it.next()
50 |       transformation match {
51 |         case _: SimpleFetchOptimizer => {}
52 |         case _: JoinReorder => {}
53 |         case _: HiveMapJoinProcessor => {
54 |           // Use SharkMapJoinProcessor to bypass the step of validating Map Join hints
55 |           // in Hive. So, we can use hints to mark tables that will be considered as small
56 |           // tables (like Hive 0.9).
57 |           val sharkMapJoinProcessor = new SharkMapJoinProcessor
58 |           pctx = sharkMapJoinProcessor.transform(pctx)
59 |         }
60 |         case _ => {
61 |           pctx = transformation.transform(pctx)
62 |         }
63 |       }
64 |     }
65 |     pctx
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/JavaTableRDD.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.api
19 | 
20 | import scala.reflect.ClassTag
21 | 
22 | import org.apache.spark.api.java.function.{Function => JFunction}
23 | import org.apache.spark.api.java.JavaRDDLike
24 | import org.apache.spark.rdd.RDD
25 | import org.apache.spark.storage.StorageLevel
26 | 
27 | 
28 | class JavaTableRDD(val rdd: RDD[Row], val schema: Array[ColumnDesc])
29 |   extends JavaRDDLike[Row, JavaTableRDD] {
30 | 
31 |   override def wrapRDD(rdd: RDD[Row]): JavaTableRDD = new JavaTableRDD(rdd, schema)
32 | 
33 |   // Common RDD functions
34 |   override val classTag: ClassTag[Row] = implicitly[ClassTag[Row]]
35 | 
36 |   // This shouldn't be necessary, but we seem to need this to get first() to return Row
37 |   // instead of Object; possibly a compiler bug?
38 |   override def first(): Row = rdd.first()
39 | 
40 |   /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
41 |   def cache(): JavaTableRDD = wrapRDD(rdd.cache())
42 | 
43 |   /**
44 |    * Set this RDD's storage level to persist its values across operations after the first time
45 |    * it is computed. Can only be called once on each RDD.
46 |    */
47 |   def persist(newLevel: StorageLevel): JavaTableRDD = wrapRDD(rdd.persist(newLevel))
48 | 
49 |   // Transformations (return a new RDD)
50 | 
51 |   // Note: we didn't implement distinct() because equals() and hashCode() are not defined for Row.
52 | 
53 |   /**
54 |    * Return a new RDD containing only the elements that satisfy a predicate.
55 |    */
56 |   def filter(f: JFunction[Row, java.lang.Boolean]): JavaTableRDD =
57 |     wrapRDD(rdd.filter((x => f(x).booleanValue())))
58 | 
59 |   /**
60 |    * Return a sampled subset of this RDD.
61 |    */
62 |   def sample(withReplacement: Boolean, fraction: Double, seed: Int): JavaTableRDD =
63 |     wrapRDD(rdd.sample(withReplacement, fraction, seed))
64 | 
65 |   /**
66 |    * Return the union of this RDD and another one. Any identical elements will appear multiple
67 |    * times (use `.distinct()` to eliminate them).
68 |    *
69 |    * Note: the `schema` of a union is this RDD's schema.
70 |    */
71 |   def union(other: JavaTableRDD): JavaTableRDD = wrapRDD(rdd.union(other.rdd))
72 | 
73 | }
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/column/ColumnIterators.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2.column
19 | 
20 | import java.nio.ByteBuffer
21 | import org.apache.hadoop.hive.serde2.`lazy`.LazyObject
22 | import org.apache.hadoop.hive.serde2.`lazy`.LazyFactory
23 | import org.apache.hadoop.hive.serde2.`lazy`.ByteArrayRef
24 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
25 | 
26 | import shark.execution.serialization.KryoSerializer
27 | 
28 | 
29 | class IntColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, INT)
30 | 
31 | class FloatColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, FLOAT)
32 | 
33 | class LongColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, LONG)
34 | 
35 | class DoubleColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, DOUBLE)
36 | 
37 | class BooleanColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, BOOLEAN)
38 | 
39 | class ByteColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, BYTE)
40 | 
41 | class ShortColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, SHORT)
42 | 
43 | class NullColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, VOID)
44 | 
45 | class TimestampColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, TIMESTAMP)
46 | 
47 | class BinaryColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, BINARY)
48 | 
49 | class StringColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, STRING)
50 | 
51 | class GenericColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, GENERIC) {
52 | 
53 |   private var _obj: LazyObject[_] = _
54 | 
55 |   override def init() {
56 |     super.init()
57 |     val oiSize = buffer.getInt()
58 |     val oiSerialized = new Array[Byte](oiSize)
59 |     buffer.get(oiSerialized, 0, oiSize)
60 |     val oi = KryoSerializer.deserialize[ObjectInspector](oiSerialized)
61 |     _obj = LazyFactory.createLazyObject(oi)
62 |   }
63 | 
64 |   override def current = {
65 |     val v = super.current.asInstanceOf[ByteArrayRef]
66 |     _obj.init(v, 0, v.getData().length)
67 |     _obj
68 |   }
69 | }
70 | 
71 | class VoidColumnIterator(buffer: ByteBuffer) extends DefaultColumnIterator(buffer, VOID)
72 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/column/ColumnBuilders.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2.column
19 | 
20 | import java.nio.ByteBuffer
21 | import java.sql.Timestamp
22 | 
23 | import org.apache.hadoop.hive.serde2.ByteStream
24 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
25 | import org.apache.hadoop.io.BytesWritable
26 | import org.apache.hadoop.io.Text
27 | 
28 | 
29 | import shark.execution.serialization.KryoSerializer
30 | import shark.memstore2.column.ColumnStats._
31 | 
32 | 
33 | class BooleanColumnBuilder extends DefaultColumnBuilder[Boolean](new BooleanColumnStats(), BOOLEAN)
34 | 
35 | class IntColumnBuilder extends DefaultColumnBuilder[Int](new IntColumnStats(), INT)
36 | 
37 | class LongColumnBuilder extends DefaultColumnBuilder[Long](new LongColumnStats(), LONG)
38 | 
39 | class FloatColumnBuilder extends DefaultColumnBuilder[Float](new FloatColumnStats(), FLOAT)
40 | 
41 | class DoubleColumnBuilder extends DefaultColumnBuilder[Double](new DoubleColumnStats(), DOUBLE)
42 | 
43 | class StringColumnBuilder extends DefaultColumnBuilder[Text](new StringColumnStats(), STRING)
44 | 
45 | class ByteColumnBuilder extends DefaultColumnBuilder[Byte](new ByteColumnStats(), BYTE)
46 | 
47 | class ShortColumnBuilder extends DefaultColumnBuilder[Short](new ShortColumnStats(), SHORT)
48 | 
49 | class TimestampColumnBuilder
50 |   extends DefaultColumnBuilder[Timestamp](new TimestampColumnStats(), TIMESTAMP)
51 | 
52 | class BinaryColumnBuilder extends DefaultColumnBuilder[BytesWritable](new NoOpStats(), BINARY)
53 | 
54 | class VoidColumnBuilder extends DefaultColumnBuilder[Void](new NoOpStats(), VOID)
55 | 
56 | /**
57 |  * Generic columns that we can serialize, including maps, structs, and other complex types.
58 |  */
59 | class GenericColumnBuilder(oi: ObjectInspector)
60 |   extends DefaultColumnBuilder[ByteStream.Output](new NoOpStats(), GENERIC) {
61 | 
62 |   // Complex data types cannot be null. Override the initialize in NullableColumnBuilder.
63 |   override def initialize(initialSize: Int, columnName: String): ByteBuffer = {
64 |     val buffer = super.initialize(initialSize, columnName)
65 |     val objectInspectorSerialized = KryoSerializer.serialize(oi)
66 |     buffer.putInt(objectInspectorSerialized.size)
67 |     buffer.put(objectInspectorSerialized)
68 |     buffer
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/bin/dev/test:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2012 The Regents of The University California.
 4 | # All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | get_abs_path() {
19 |     local PARENT_DIR=$(dirname "$1")
20 |     cd "$PARENT_DIR"
21 |     local ABS_PATH="$(pwd)"/"$(basename $1)"
22 |     cd - >/dev/null
23 |     echo $ABS_PATH
24 | }
25 | 
26 | CURRENTFILE=`get_abs_path $0`
27 | BINDIR="`dirname $CURRENTFILE`"
28 | FWDIR="`dirname $BINDIR`/.."
29 | 
30 | # Load environment variables from conf/shark-env.sh, if it exists
31 | if [ -e $FWDIR/conf/shark-env.sh ] ; then
32 |   . $FWDIR/conf/shark-env.sh
33 | fi
34 | 
35 | # Hive related section.
36 | if [ -z $HIVE_DEV_HOME ] ; then
37 |   echo "No HIVE_DEV_HOME specified. Please set HIVE_DEV_HOME"
38 |   exit 1
39 | fi
40 | 
41 | # Hive related section.
42 | if [ -z $HADOOP_HOME ] ; then
43 |   echo "No HADOOP_HOME specified. Please set HADOOP_HOME"
44 |   exit 1
45 | fi
46 | 
47 | if [ -n "$TEST_FILE" ] ; then
48 |   TEST_FILE=`get_abs_path $TEST_FILE`
49 |   export TEST_FILE
50 | fi
51 | 
52 | 
53 | SPARK_CLASSPATH+=":${HIVE_DEV_HOME}/build/ql/test/classes"
54 | SPARK_CLASSPATH+=":${HIVE_DEV_HOME}/data/conf"
55 | export SPARK_CLASSPATH
56 | 
57 | BUILD_PATH=$HIVE_DEV_HOME/build/ql
58 | 
59 | # Set variables used by unit tests (ex. create_like.q).
60 | TEST_JAVA_OPTS="-Dbuild.dir=${HIVE_DEV_HOME}/build/ql "
61 | TEST_JAVA_OPTS+="-Dbuild.dir.hive=${HIVE_DEV_HOME}/build "
62 | TEST_JAVA_OPTS+="-Dbuild.ivy.lib.dir=${HIVE_DEV_HOME}/build/ivy/lib "
63 | TEST_JAVA_OPTS+="-Dderby.version=10.4.2.0 "
64 | TEST_JAVA_OPTS+="-Dlog4j.configuration=file://${HIVE_DEV_HOME}/data/conf/hive-log4j.properties "
65 | TEST_JAVA_OPTS+="-Dtest.log.dir=${BUILD_PATH}/test/logs "
66 | TEST_JAVA_OPTS+="-Dtest.output.overwrite=false "
67 | TEST_JAVA_OPTS+="-Dtest.src.data.dir=${HIVE_DEV_HOME}/data "
68 | TEST_JAVA_OPTS+="-Dtest.tmp.dir=${BUILD_PATH}/tmp "
69 | TEST_JAVA_OPTS+="-Dtest.warehouse.dir=${BUILD_PATH}/test/data/warehouse "
70 | #TEST_JAVA_OPTS+="-Duser.dir=${HIVE_DEV_HOME}/ql "
71 | 
72 | export TEST_JAVA_OPTS
73 | 
74 | # Set the current directory to hive/ql since lots of tests use relative path.
75 | cd ${HIVE_DEV_HOME}/ql
76 | 
77 | if [ "$TEST_WITH_ANT" == "1" ] ; then
78 |   export CLASSPATH
79 |   export RUNNER="ant -noclasspath -nouserlib -f $FWDIR/bin/dev/build_test.xml test"
80 |   exec $FWDIR/run "$@"
81 | else
82 |   export SHARK_LAUNCH_WITH_JAVA=1
83 |   exec $FWDIR/run junit.textui.TestRunner shark.TestSharkCliDriver "$@"
84 | fi
85 | 


--------------------------------------------------------------------------------
/src/main/resources/tablerdd/rddtable_generator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from string import Template
 3 | import sys
 4 | from generator_utils import *
 5 | 
 6 | ## This script generates RDDtable.scala
 7 | 
 8 | p = sys.stdout
 9 | 
10 | # e.g. createList(1,3, "T[", "]", ",") gives T[1],T[2],T[3]
11 | def createList(start, stop, prefix, suffix="", sep = ",", newlineAfter = 70, indent = 0):
12 |     res = ""
13 |     oneLine = res
14 |     for y in range(start,stop+1):
15 |         res     += prefix + str(y) + suffix
16 |         oneLine += prefix + str(y) + suffix
17 |         if y != stop:
18 |             res     += sep
19 |             oneLine += sep
20 |             if len(oneLine) > newlineAfter:
21 |                 res += "\n" + " "*indent
22 |                 oneLine = ""
23 |     return res
24 | 
25 | ### The SparkContext declaration
26 | 
27 | prefix = """
28 | /*
29 |  * Copyright (C) 2012 The Regents of The University California.
30 |  * All rights reserved.
31 |  *
32 |  * Licensed under the Apache License, Version 2.0 (the "License");
33 |  * you may not use this file except in compliance with the License.
34 |  * You may obtain a copy of the License at
35 |  *
36 |  *      http://www.apache.org/licenses/LICENSE-2.0
37 |  *
38 |  * Unless required by applicable law or agreed to in writing, software
39 |  * distributed under the License is distributed on an "AS IS" BASIS,
40 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
41 |  * See the License for the specific language governing permissions and
42 |  * limitations under the License.
43 |  */
44 | 
45 | package shark.api
46 | 
47 | // *** This file is auto-generated from RDDTable_generator.py ***
48 | 
49 | import scala.language.implicitConversions
50 | import scala.reflect.ClassTag
51 | import org.apache.spark.rdd.RDD
52 | 
53 | object RDDTableImplicits {
54 |   private type C[T] = ClassTag[T]
55 | 
56 | """
57 | 
58 | p.write(prefix)
59 | 
60 | for x in range(2,23):
61 | 
62 |     tableClass = Template(
63 | """
64 |   implicit def rddToTable$num[$tmlist]
65 |   (rdd: RDD[($tlist)]): RDDTableFunctions = RDDTable(rdd)
66 | 
67 | """).substitute(num = x, tmlist = createList(1, x, "T", ": C", ", ", indent=4), tlist = createList(1, x, "T", "", ", ", indent=4))
68 |     p.write(tableClass)
69 | 
70 | prefix = """
71 | }
72 | 
73 | object RDDTable {
74 | 
75 |   private type C[T] = ClassTag[T]
76 |   private def ct[T](implicit c: ClassTag[T]) = c
77 | """
78 | 
79 | p.write(prefix)
80 | 
81 | for x in range(2,23):
82 | 
83 |     tableClass = Template(
84 | """
85 |   def apply[$tmlist]
86 |   (rdd: RDD[($tlist)]) = {
87 |     val classTag = implicitly[ClassTag[Seq[Any]]]
88 |     val rddSeq: RDD[Seq[_]] = rdd.map(t => t.productIterator.toList.asInstanceOf[Seq[Any]])(classTag)
89 |     new RDDTableFunctions(rddSeq, Seq($mtlist))
90 |   }
91 | 
92 | """).substitute(tmlist = createList(1, x, "T", ": C", ", ", indent=4), tlist = createList(1, x, "T", "", ", ", indent=4),
93 |                 mtlist = createList(1, x, "ct[T", "]", ", ", indent=4))
94 |     p.write(tableClass)
95 | 
96 | 
97 | p.write("}\n")
98 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/SharkTblProperties.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import java.util.{Map => JavaMap}
21 | 
22 | 
23 | /**
24 |  * Collection of static fields and helpers for table properties (i.e., from A
25 |  * CREATE TABLE TBLPROPERTIES( ... ) used by Shark.
26 |  */
27 | object SharkTblProperties {
28 | 
29 |   case class TableProperty(varname: String, defaultVal: String)
30 | 
31 |   // Class name of the default cache policy used to manage partition evictions for cached,
32 |   // Hive-partitioned tables.
33 |   val CACHE_POLICY = new TableProperty("shark.cache.policy", "shark.memstore2.CacheAllPolicy")
34 | 
35 |   // Maximum size - in terms of the number of objects - of the cache specified by the
36 |   // "shark.cache.partition.cachePolicy" property above.
37 |   val MAX_PARTITION_CACHE_SIZE = new TableProperty("shark.cache.policy.maxSize", "10")
38 | 
39 |   // Default value for the "shark.cache" table property
40 |   val CACHE_FLAG = new TableProperty("shark.cache", "true")
41 | 
42 |   // Whether we are currently in the process of caching the table (meaning it cannot be accessed).
43 |   val CACHE_IN_PROGRESS_FLAG = new TableProperty("shark.cache.inProgress", "false")
44 | 
45 |   def getOrSetDefault(tblProps: JavaMap[String, String], variable: TableProperty): String = {
46 |     if (!tblProps.containsKey(variable.varname)) {
47 |       tblProps.put(variable.varname, variable.defaultVal)
48 |     }
49 |     tblProps.get(variable.varname)
50 |   }
51 | 
52 |   /**
53 |    * Returns value for the `variable` table property. If a value isn't present in `tblProps`, then
54 |    * the default for `variable` will be returned.
55 |    */
56 |   def initializeWithDefaults(
57 |       tblProps: JavaMap[String, String],
58 |       isPartitioned: Boolean = false): JavaMap[String, String] = {
59 |     tblProps.put(CACHE_FLAG.varname, CACHE_FLAG.defaultVal)
60 |     tblProps.put(CACHE_IN_PROGRESS_FLAG.varname, CACHE_IN_PROGRESS_FLAG.defaultVal)
61 |     if (isPartitioned) {
62 |       tblProps.put(CACHE_POLICY.varname, CACHE_POLICY.defaultVal)
63 |     }
64 |     tblProps
65 |   }
66 | 
67 |   def removeSharkProperties(tblProps: JavaMap[String, String]) {
68 |     tblProps.remove(CACHE_FLAG.varname)
69 |     tblProps.remove(CACHE_IN_PROGRESS_FLAG.varname)
70 |     tblProps.remove(CACHE_POLICY.varname)
71 |     tblProps.remove(MAX_PARTITION_CACHE_SIZE.varname)
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/resources/tablerdd/TableRDDGenerated_generator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from string import Template
 3 | import sys
 4 | from generator_utils import *
 5 | 
 6 | ## This script generates TableRDDGenerated.scala
 7 | 
 8 | p = sys.stdout
 9 | 
10 | p.write(
11 | """
12 | /*
13 |  * Copyright (C) 2013 The Regents of The University California.
14 |  * All rights reserved.
15 |  *
16 |  * Licensed under the Apache License, Version 2.0 (the "License");
17 |  * you may not use this file except in compliance with the License.
18 |  * You may obtain a copy of the License at
19 |  *
20 |  *      http://www.apache.org/licenses/LICENSE-2.0
21 |  *
22 |  * Unless required by applicable law or agreed to in writing, software
23 |  * distributed under the License is distributed on an "AS IS" BASIS,
24 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 |  * See the License for the specific language governing permissions and
26 |  * limitations under the License.
27 |  */
28 | 
29 | 
30 | 
31 | package shark.api
32 | 
33 | // *** This file is auto-generated from TableRDDGenerated_generator.py ***
34 | import scala.language.implicitConversions
35 | import org.apache.spark.rdd.RDD
36 | import org.apache.spark.{TaskContext, Partition}
37 | 
38 | import scala.reflect.ClassTag
39 | 
40 | class TableSeqRDD(prev: TableRDD)
41 |   extends RDD[Seq[Any]](prev) {
42 | 
43 |   def getSchema = prev.schema
44 | 
45 |   override def getPartitions = prev.getPartitions
46 | 
47 |   override def compute(split: Partition, context: TaskContext): Iterator[Seq[Any]] = {
48 |     prev.compute(split, context).map( row =>
49 |       (0 until prev.schema.size).map(i => row.getPrimitive(i)) )
50 |   }
51 | }
52 | 
53 | """)
54 | 
55 | for x in range(1,23):
56 | 
57 |     inner = ""
58 |     for y in range(1,x+1):
59 |         if y % 3 == 1: inner += "       "
60 |         inner += Template(" row.getPrimitiveGeneric[T$num1]($num2)").substitute(num1=y, num2=y-1)
61 |         if y != x: inner += ","
62 |         if y % 3 == 0: inner += "\n"
63 |     inner += " ) )\n"
64 | 
65 |     tableClass = Template(
66 | """
67 | class TableRDD$num[$list](prev: TableRDD,
68 |                           tags: Seq[ClassTag[_]])
69 |   extends RDD[Tuple$num[$list]](prev) {
70 |   def schema = prev.schema
71 | 
72 |   private val tableCols = schema.size
73 |   require(tableCols == $num, "Table only has " + tableCols + " columns, expecting $num")
74 | 
75 |   tags.zipWithIndex.foreach{ case (m, i) => if (DataTypes.fromClassTag(m) != schema(i).dataType)
76 |     throw new IllegalArgumentException(
77 |       "Type mismatch on column " + (i + 1) + ", expected " + DataTypes.fromClassTag(m) + " got " + schema(i).dataType) }
78 | 
79 |   override def getPartitions = prev.getPartitions
80 | 
81 |   override def compute(split: Partition, context: TaskContext):
82 |   Iterator[Tuple$num[$list]] = {
83 |     prev.compute(split, context).map( row =>
84 |       new Tuple$num[$list](
85 |         $innerfatlist
86 |   }
87 | }
88 | """).substitute(num = x, list = createList(1, x, "T", "", ", ", indent=4), innerfatlist = inner)
89 | 
90 | 
91 |     p.write(tableClass)
92 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/MemoryTable.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import org.apache.spark.rdd.RDD
21 | 
22 | import scala.collection.mutable.{Buffer, HashMap}
23 | 
24 | import shark.execution.RDDUtils
25 | 
26 | 
27 | /**
28 |  * A metadata container for a table in Shark that's backed by an RDD.
29 |  */
30 | private[shark] class MemoryTable(
31 |     databaseName: String,
32 |     tableName: String,
33 |     cacheMode: CacheType.CacheType)
34 |   extends Table(databaseName, tableName, cacheMode) {
35 | 
36 |   private var _rddValueOpt: Option[RDDValue] = None
37 | 
38 |   /**
39 |    * Sets the RDD and stats fields the `_rddValueOpt`. Used for INSERT/LOAD OVERWRITE.
40 |    * @param newRDD The table's data.
41 |    * @param newStats Stats for each TablePartition in `newRDD`.
42 |    * @return The previous (RDD, stats) pair for this table.
43 |    */
44 |   def put(
45 |   	  newRDD: RDD[TablePartition],
46 |   	  newStats: collection.Map[Int, TablePartitionStats] = new HashMap[Int, TablePartitionStats]()
47 |   	): Option[(RDD[TablePartition], collection.Map[Int, TablePartitionStats])] = {
48 |   	val prevRDDAndStatsOpt = _rddValueOpt.map(_.toTuple)
49 |   	if (_rddValueOpt.isDefined) {
50 |   	  _rddValueOpt.foreach { rddValue =>
51 |   	  	rddValue.rdd = newRDD
52 |   	  	rddValue.stats = newStats
53 |   	  }
54 |   	} else {
55 |       _rddValueOpt = Some(new RDDValue(newRDD, newStats))
56 |   	}
57 |     prevRDDAndStatsOpt 
58 |   }
59 | 
60 |   /**
61 |    * Used for append operations, such as INSERT and LOAD INTO.
62 |    *
63 |    * @param newRDD Data to append to the table.
64 |    * @param newStats Stats for each TablePartition in `newRDD`.
65 |    * @return The previous (RDD, stats) pair for this table.
66 |    */
67 |   def update(
68 |   	  newRDD: RDD[TablePartition],
69 |   	  newStats: Buffer[(Int, TablePartitionStats)]
70 |   	): Option[(RDD[TablePartition], collection.Map[Int, TablePartitionStats])] = {
71 |     val prevRDDAndStatsOpt = _rddValueOpt.map(_.toTuple)
72 |   	if (_rddValueOpt.isDefined) {
73 |       val (prevRDD, prevStats) = (prevRDDAndStatsOpt.get._1, prevRDDAndStatsOpt.get._2)
74 |       val updatedRDDValue = _rddValueOpt.get
75 |       updatedRDDValue.rdd = RDDUtils.unionAndFlatten(newRDD, prevRDD)
76 |       updatedRDDValue.stats = Table.mergeStats(newStats, prevStats).toMap
77 |     } else {
78 |       put(newRDD, newStats.toMap)
79 |     }
80 |     prevRDDAndStatsOpt
81 |   }
82 | 
83 |   def getRDD = _rddValueOpt.map(_.rdd)
84 | 
85 |   def getStats = _rddValueOpt.map(_.stats)
86 | 
87 | }
88 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/KryoRegistrator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark
19 | 
20 | import java.io.{DataInputStream, DataOutputStream}
21 | import java.util.Arrays
22 | import com.esotericsoftware.kryo.{Kryo, Serializer => KSerializer}
23 | import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
24 | import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer}
25 | import org.apache.hadoop.io.Writable
26 | import org.apache.hadoop.hive.ql.exec.persistence.{MapJoinSingleKey, MapJoinObjectKey,
27 |     MapJoinDoubleKeys, MapJoinObjectValue}
28 | import org.apache.spark.serializer.{KryoRegistrator => SparkKryoRegistrator}
29 | import shark.execution.serialization.SerializableWritable
30 | 
31 | 
32 | class KryoRegistrator extends SparkKryoRegistrator {
33 |   def registerClasses(kryo: Kryo) {
34 | 
35 |     kryo.register(classOf[execution.ReduceKey])
36 | 
37 |     // The map join data structures are Java serializable.
38 |     kryo.register(classOf[MapJoinSingleKey], new KryoJavaSerializer)
39 |     kryo.register(classOf[MapJoinObjectKey], new KryoJavaSerializer)
40 |     kryo.register(classOf[MapJoinDoubleKeys], new KryoJavaSerializer)
41 |     kryo.register(classOf[MapJoinObjectValue], new KryoJavaSerializer)
42 | 
43 |     kryo.register(classOf[SerializableWritable[_]], new KryoSWSerializer)
44 | 
45 |     // As far as I (rxin) know, among all Hadoop writables only TimestampWritable
46 |     // cannot be serialized by Kryo out of the box.
47 |     kryo.register(classOf[org.apache.hadoop.hive.serde2.io.TimestampWritable],
48 |       new KryoWritableSerializer[org.apache.hadoop.hive.serde2.io.TimestampWritable])
49 |   }
50 | }
51 | 
52 | class KryoSWSerializer[T <: Writable] extends KSerializer[SerializableWritable[T]]  {
53 |   def write(kryo : Kryo, out : KryoOutput, obj : SerializableWritable[T]) {
54 |     kryo.writeClassAndObject(out, obj.t); out.flush;
55 |   }
56 |   def read(kryo : Kryo, in : KryoInput, cls : Class[SerializableWritable[T]]) : SerializableWritable[T] = {
57 |     new SerializableWritable(
58 |       kryo.readClassAndObject(in).asInstanceOf[T]
59 |     )
60 |   }
61 | }
62 | 
63 | /** A Kryo serializer for Hadoop writables. */
64 | class KryoWritableSerializer[T <: Writable] extends KSerializer[T] {
65 |   override def write(kryo: Kryo, output: KryoOutput, writable: T) {
66 |     val ouputStream = new DataOutputStream(output)
67 |     writable.write(ouputStream)
68 |   }
69 | 
70 |   override def read(kryo: Kryo, input: KryoInput, cls: java.lang.Class[T]): T = {
71 |     val writable = cls.newInstance()
72 |     val inputStream = new DataInputStream(input)
73 |     writable.readFields(inputStream)
74 |     writable
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/tachyon_enabled/scala/shark/tachyon/TachyonOffHeapTableWriter.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | * Copyright (C) 2012 The Regents of The University California.
 3 | * All rights reserved.
 4 | *
 5 | * Licensed under the Apache License, Version 2.0 (the "License");
 6 | * you may not use this file except in compliance with the License.
 7 | * You may obtain a copy of the License at
 8 | *
 9 | *      http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | 
18 | package shark.tachyon
19 | 
20 | import java.nio.ByteBuffer
21 | 
22 | import scala.reflect.BeanProperty
23 | 
24 | import shark.{LogHelper, SharkConfVars}
25 | import shark.execution.serialization.JavaSerializer
26 | import shark.memstore2.{OffHeapStorageClient, OffHeapTableWriter, TablePartitionStats}
27 | 
28 | import tachyon.client.WriteType
29 | import tachyon.master.MasterInfo
30 | import tachyon.util.CommonUtils
31 | 
32 | class TachyonOffHeapTableWriter(@transient path: String, @transient numColumns: Int)
33 |   extends OffHeapTableWriter with LogHelper {
34 | 
35 |   // Re-instantiated upon deserialization, the first time it's referenced.
36 |   @transient lazy val tfs = OffHeapStorageClient.client.asInstanceOf[TachyonStorageClient].tfs
37 |   val TEMP = "_temperary"
38 |   var rawTableId: Int = -1
39 | 
40 |   override def createTable() {
41 |     val metadata = ByteBuffer.allocate(0)
42 |     rawTableId = tfs.createRawTable(path, numColumns, metadata)
43 |   }
44 | 
45 |   override def setStats(indexToStats: collection.Map[Int, TablePartitionStats]) {
46 |     val buffer = ByteBuffer.wrap(JavaSerializer.serialize(indexToStats))
47 |     tfs.updateRawTableMetadata(rawTableId, buffer)
48 |   }
49 | 
50 |   // rawTable is a lazy val so it gets created the first time it is referenced.
51 |   // This is only used on worker nodes.
52 |   @transient lazy val rawTable = tfs.getRawTable(rawTableId)
53 | 
54 |   override def writePartitionColumn(part: Int, column: Int, data: ByteBuffer, tempDir: String) {
55 |     val tmpPath = CommonUtils.concat(rawTable.getPath(), TEMP)
56 |     val fid = tfs.createFile(CommonUtils.concat(tmpPath, tempDir, column + "", part + ""))
57 |     val file = tfs.getFile(fid)
58 |     val writeType: WriteType = WriteType.valueOf(
59 |         SharkConfVars.getVar(localHconf, SharkConfVars.TACHYON_WRITER_WRITETYPE))
60 |     val outStream = file.getOutStream(writeType)
61 |     outStream.write(data.array(), 0, data.limit())
62 |     outStream.close()
63 |   }
64 | 
65 |   override def commitPartition(part: Int, numColumns: Int, tempDir: String) {
66 |     val tmpPath = CommonUtils.concat(rawTable.getPath(), TEMP)
67 |     (0 until numColumns).reverse.foreach { column =>
68 |       val srcPath = CommonUtils.concat(tmpPath, tempDir, column + "", part + "")
69 |       val destPath = CommonUtils.concat(rawTable.getPath(), MasterInfo.COL, column + "", part + "")
70 |       tfs.rename(srcPath, destPath)
71 |     }
72 |     tfs.delete(CommonUtils.concat(tmpPath, tempDir), true)
73 |   }
74 | 
75 |   override def cleanTmpPath() {
76 |     val tmpPath = CommonUtils.concat(rawTable.getPath(), TEMP)
77 |     tfs.delete(tmpPath, true)
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/serialization/XmlSerializer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.execution.serialization
19 | 
20 | import java.beans.{XMLDecoder, XMLEncoder}
21 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
22 | 
23 | import com.ning.compress.lzf.{LZFEncoder, LZFDecoder}
24 | 
25 | import org.apache.hadoop.conf.Configuration
26 | import org.apache.hadoop.hive.conf.HiveConf
27 | import org.apache.hadoop.hive.ql.exec.Utilities.EnumDelegate
28 | import org.apache.hadoop.hive.ql.plan.GroupByDesc
29 | import org.apache.hadoop.hive.ql.plan.PlanUtils.ExpressionTypes
30 | 
31 | import shark.SharkConfVars
32 | 
33 | 
34 | /**
35 |  * Java object serialization using XML encoder/decoder. Avoid using this to
36 |  * serialize byte arrays because it is extremely inefficient.
37 |  */
38 | object XmlSerializer {
39 |   // We prepend the buffer with a byte indicating whether payload is compressed
40 |   val COMPRESSION_ENABLED: Byte = 1
41 |   val COMPRESSION_DISABLED: Byte = 0
42 | 
43 |   def serialize[T](o: T, conf: Configuration): Array[Byte] = {
44 |     val byteStream = new ByteArrayOutputStream()
45 |     val e = new XMLEncoder(byteStream)
46 |     // workaround for java 1.5
47 |     e.setPersistenceDelegate(classOf[ExpressionTypes], new EnumDelegate())
48 |     e.setPersistenceDelegate(classOf[GroupByDesc.Mode], new EnumDelegate())
49 |     // workaround for HiveConf-not-a-javabean
50 |     e.setPersistenceDelegate(classOf[HiveConf], new HiveConfPersistenceDelegate )
51 |     e.writeObject(o)
52 |     e.close()
53 | 
54 |     val useCompression = conf match {
55 |       case null => SharkConfVars.COMPRESS_QUERY_PLAN.defaultBoolVal
56 |       case _ => SharkConfVars.getBoolVar(conf, SharkConfVars.COMPRESS_QUERY_PLAN)
57 |     }
58 | 
59 |     if (useCompression) {
60 |       COMPRESSION_ENABLED +: LZFEncoder.encode(byteStream.toByteArray())
61 |     } else {
62 |       COMPRESSION_DISABLED +: byteStream.toByteArray
63 |     }
64 |   }
65 | 
66 |   def deserialize[T](bytes: Array[Byte]): T  = {
67 |     val cl = Thread.currentThread.getContextClassLoader
68 |     val decodedStream =
69 |       if (bytes(0) == COMPRESSION_ENABLED) {
70 |         new ByteArrayInputStream(LZFDecoder.decode(bytes.slice(1, bytes.size)))
71 |       } else {
72 |         new ByteArrayInputStream(bytes.slice(1, bytes.size))
73 |       }
74 | 
75 |     // Occasionally an object inspector is created from the decoding.
76 |     // Need to put a lock on the process.
77 |     val ret = {
78 |       val d: XMLDecoder = new XMLDecoder(decodedStream, null, null, cl)
79 |       classOf[XMLDecoder].synchronized {
80 |         val ret = d.readObject()
81 |         d.close()
82 |         ret
83 |       }
84 |     }
85 |     ret.asInstanceOf[T]
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/UDTFOperator.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 The Regents of The University California. 
  3 |  * All rights reserved.
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *      http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package shark.execution
 19 | 
 20 | import java.util.{List => JavaList}
 21 | 
 22 | import scala.collection.mutable.ArrayBuffer
 23 | import scala.collection.JavaConversions._
 24 | import scala.reflect.BeanProperty
 25 | 
 26 | import org.apache.hadoop.hive.ql.plan.UDTFDesc
 27 | import org.apache.hadoop.hive.ql.udf.generic.Collector
 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
 29 | import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector
 30 | import org.apache.hadoop.hive.serde2.objectinspector.StructField
 31 | 
 32 | 
 33 | class UDTFOperator extends UnaryOperator[UDTFDesc] {
 34 | 
 35 |   @BeanProperty var conf: UDTFDesc = _
 36 | 
 37 |   @transient var objToSendToUDTF: Array[java.lang.Object] = _
 38 |   @transient var soi: StandardStructObjectInspector = _
 39 |   @transient var inputFields: JavaList[_ <: StructField] = _
 40 |   @transient var collector: UDTFCollector = _
 41 |   @transient var outputObjInspector: ObjectInspector = _
 42 | 
 43 |   override def initializeOnMaster() {
 44 |     super.initializeOnMaster()
 45 |     
 46 |     conf = desc
 47 |     
 48 |     initializeOnSlave()
 49 |   }
 50 | 
 51 |   override def initializeOnSlave() {
 52 |     collector = new UDTFCollector
 53 |     conf.getGenericUDTF().setCollector(collector)
 54 | 
 55 |     // Make an object inspector [] of the arguments to the UDTF
 56 |     soi = objectInspectors.head.asInstanceOf[StandardStructObjectInspector]
 57 |     inputFields = soi.getAllStructFieldRefs()
 58 | 
 59 |     val udtfInputOIs = inputFields.map { case inputField =>
 60 |       inputField.getFieldObjectInspector()
 61 |     }.toArray
 62 | 
 63 |     objToSendToUDTF = new Array[java.lang.Object](inputFields.size)
 64 |     outputObjInspector = conf.getGenericUDTF().initialize(udtfInputOIs)
 65 |   }
 66 | 
 67 |   override def outputObjectInspector() = outputObjInspector
 68 | 
 69 |   override def processPartition(split: Int, iter: Iterator[_]): Iterator[_] = {
 70 |     iter.flatMap { row =>
 71 |       explode(row)
 72 |     }
 73 |   }
 74 | 
 75 |   def explode[T](row: T): ArrayBuffer[java.lang.Object] = {
 76 |     (0 until inputFields.size).foreach { case i =>
 77 |       objToSendToUDTF(i) = soi.getStructFieldData(row, inputFields.get(i))
 78 |     }
 79 |     conf.getGenericUDTF().process(objToSendToUDTF)
 80 |     collector.collectRows()
 81 |   }
 82 | }
 83 | 
 84 | class UDTFCollector extends Collector {
 85 | 
 86 |   var collected = new ArrayBuffer[java.lang.Object]
 87 | 
 88 |   override def collect(input: java.lang.Object) {
 89 |     // We need to clone the input here because implementations of
 90 |     // GenericUDTF reuse the same object. Luckily they are always an array, so
 91 |     // it is easy to clone.
 92 |     collected += input.asInstanceOf[Array[_]].clone
 93 |   }
 94 | 
 95 |   def collectRows() = {
 96 |     val toCollect = collected
 97 |     collected = new ArrayBuffer[java.lang.Object]
 98 |     toCollect
 99 |   }
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/api/RDDTableFunctions.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.api
19 | 
20 | import scala.collection.mutable.ArrayBuffer
21 | import scala.reflect.ClassTag
22 | 
23 | import org.apache.hadoop.hive.ql.metadata.Hive
24 | 
25 | import org.apache.spark.rdd.RDD
26 | 
27 | import shark.{SharkContext, SharkEnv}
28 | import shark.memstore2.{CacheType, TablePartitionStats, TablePartition, TablePartitionBuilder}
29 | import shark.util.HiveUtils
30 | 
31 | 
32 | class RDDTableFunctions(self: RDD[Seq[_]], classTags: Seq[ClassTag[_]]) {
33 | 
34 |   def saveAsTable(tableName: String, fields: Seq[String]): Boolean = {
35 |     require(fields.size == this.classTags.size,
36 |       "Number of column names != number of fields in the RDD.")
37 | 
38 |     // Get a local copy of the classTags so we don't need to serialize this object.
39 |     val classTags = this.classTags
40 | 
41 |     val statsAcc = SharkEnv.sc.accumulableCollection(ArrayBuffer[(Int, TablePartitionStats)]())
42 | 
43 |     // Create the RDD object.
44 |     val rdd = self.mapPartitionsWithIndex { case(partitionIndex, iter) =>
45 |       val ois = classTags.map(HiveUtils.getJavaPrimitiveObjectInspector)
46 |       val builder = new TablePartitionBuilder(
47 |         HiveUtils.makeStandardStructObjectInspector(fields, ois),
48 |         1000000,
49 |         shouldCompress = false)
50 | 
51 |       for (p <- iter) {
52 |         builder.incrementRowCount()
53 |         // TODO: this is not the most efficient code to do the insertion ...
54 |         p.zipWithIndex.foreach { case (v, i) =>
55 |           builder.append(i, v.asInstanceOf[Object], ois(i))
56 |         }
57 |       }
58 | 
59 |       statsAcc += Tuple2(partitionIndex, builder.asInstanceOf[TablePartitionBuilder].stats)
60 |       Iterator(builder.build())
61 |     }.persist()
62 | 
63 |     var isSucessfulCreateTable = HiveUtils.createTableInHive(
64 |       tableName, fields, classTags, Hive.get().getConf())
65 | 
66 |     // Put the table in the metastore. Only proceed if the DDL statement is executed successfully.
67 |     val databaseName = Hive.get(SharkContext.hiveconf).getCurrentDatabase()
68 |     if (isSucessfulCreateTable) {
69 |       // Create an entry in the MemoryMetadataManager.
70 |       val newTable = SharkEnv.memoryMetadataManager.createMemoryTable(
71 |         databaseName, tableName, CacheType.MEMORY)
72 |       try {
73 |         // Force evaluate to put the data in memory.
74 |         rdd.context.runJob(rdd, (iter: Iterator[TablePartition]) => iter.foreach(_ => Unit))
75 |       } catch {
76 |         case _: Exception => {
77 |           // Intercept the exception thrown by SparkContext#runJob() and handle it silently. The
78 |           // exception message should already be printed to the console by DDLTask#execute().
79 |           HiveUtils.dropTableInHive(tableName)
80 |           // Drop the table entry from MemoryMetadataManager.
81 |           SharkEnv.memoryMetadataManager.removeTable(databaseName, tableName)
82 |           isSucessfulCreateTable = false
83 |         }
84 |       }
85 |       newTable.put(rdd, statsAcc.value.toMap)
86 |     }
87 |     return isSucessfulCreateTable
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/parse/SharkLoadSemanticAnalyzer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.parse
19 | 
20 | import scala.collection.JavaConversions._
21 | 
22 | import org.apache.hadoop.hive.conf.HiveConf
23 | import org.apache.hadoop.hive.ql.exec.{CopyTask, MoveTask, TaskFactory}
24 | import org.apache.hadoop.hive.ql.metadata.{Partition, Table => HiveTable}
25 | import org.apache.hadoop.hive.ql.parse.{ASTNode, BaseSemanticAnalyzer, LoadSemanticAnalyzer}
26 | import org.apache.hadoop.hive.ql.plan._
27 | 
28 | import shark.{LogHelper, SharkEnv}
29 | import shark.execution.SparkLoadWork
30 | import shark.memstore2.{CacheType, SharkTblProperties}
31 | 
32 | 
33 | class SharkLoadSemanticAnalyzer(conf: HiveConf) extends LoadSemanticAnalyzer(conf) {
34 |   
35 |   override def analyzeInternal(ast: ASTNode): Unit = {
36 |     // Delegate to the LoadSemanticAnalyzer parent for error checking the source path formatting.
37 |     super.analyzeInternal(ast)
38 | 
39 |     // Children of the AST root created for a LOAD DATA [LOCAL] INPATH ... statement are, in order:
40 |     // 1. node containing the path specified by INPATH.
41 |     // 2. internal TOK_TABNAME node that contains the table's name.
42 |     // 3. (optional) node representing the LOCAL modifier.
43 |     val tableASTNode = ast.getChild(1).asInstanceOf[ASTNode]
44 |     val tableName = getTableName(tableASTNode)
45 |     val hiveTable = db.getTable(tableName)
46 |     val cacheMode = CacheType.fromString(
47 |       hiveTable.getProperty(SharkTblProperties.CACHE_FLAG.varname))
48 | 
49 |     if (CacheType.shouldCache(cacheMode)) {
50 |       // Find the arguments needed to instantiate a SparkLoadWork.
51 |       val tableSpec = new BaseSemanticAnalyzer.tableSpec(db, conf, tableASTNode)
52 |       val hiveTable = tableSpec.tableHandle
53 |       val moveTask = getMoveTask()
54 |       val partSpecOpt = Option(tableSpec.getPartSpec)
55 |       val sparkLoadWork = SparkLoadWork(
56 |         db,
57 |         conf,
58 |         hiveTable,
59 |         partSpecOpt,
60 |         isOverwrite = moveTask.getWork.getLoadTableWork.getReplace)
61 | 
62 |       // Create a SparkLoadTask that will read from the table's data directory. Make it a dependent
63 |       // task of the LoadTask so that it's executed only if the LoadTask executes successfully.
64 |       moveTask.addDependentTask(TaskFactory.get(sparkLoadWork, conf))
65 |     }
66 |   }
67 | 
68 |   private def getMoveTask(): MoveTask = {
69 |     assert(rootTasks.size == 1)
70 | 
71 |     // If the execution is local, then the root task is a CopyTask with a MoveTask child.
72 |     // Otherwise, the root is a MoveTask.
73 |     var rootTask = rootTasks.head
74 |     val moveTask = if (rootTask.isInstanceOf[CopyTask]) {
75 |       val firstChildTask = rootTask.getChildTasks.head
76 |       assert(firstChildTask.isInstanceOf[MoveTask])
77 |       firstChildTask
78 |     } else {
79 |       rootTask
80 |     }
81 | 
82 |     // In Hive, LoadTableDesc is referred to as LoadTableWork ...
83 |     moveTask.asInstanceOf[MoveTask]
84 |   }
85 | 
86 |   private def getTableName(node: ASTNode): String = {
87 |     BaseSemanticAnalyzer.getUnescapedName(node.getChild(0).asInstanceOf[ASTNode])
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/memstore2/column/NullableColumnIteratorSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 The Regents of The University California.
  3 |  * All rights reserved.
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *      http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package shark.memstore2.column
 19 | 
 20 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
 21 | import org.apache.hadoop.io.Text
 22 | import org.apache.hadoop.io.IntWritable
 23 | 
 24 | import org.scalatest.FunSuite
 25 | 
 26 | 
 27 | class NullableColumnIteratorSuite extends FunSuite {
 28 | 
 29 |   test("String Growth") {
 30 |     val c = new StringColumnBuilder
 31 |     c.initialize(4, "")
 32 |     val oi = PrimitiveObjectInspectorFactory.writableStringObjectInspector
 33 | 
 34 |     val a = Array[Text](
 35 |         new Text("a"), null,
 36 |         new Text("b"), null,
 37 |         new Text("abc"), null,
 38 |         null, null, new Text("efg")
 39 |     )
 40 |     a.foreach {
 41 |       t => c.append(t, oi)
 42 |     }
 43 |     val b = c.build()
 44 |     val i = ColumnIterator.newIterator(b)
 45 |     Range(0, a.length).foreach { x =>
 46 |       if (x > 0) assert(i.hasNext)
 47 |       i.next()
 48 |       val v = i.current
 49 |       if (a(x) == null) {
 50 |         assert(v == null)
 51 |       } else {
 52 |         assert(v.toString == a(x).toString)
 53 |       }
 54 |     }
 55 |     assert(!i.hasNext)
 56 |   }
 57 | 
 58 |   test("Iterate Strings") {
 59 |     val c = new StringColumnBuilder
 60 |     c.initialize(4, "")
 61 |     val oi = PrimitiveObjectInspectorFactory.writableStringObjectInspector
 62 | 
 63 |     c.append(new Text("a"), oi)
 64 |     c.append(new Text(""), oi)
 65 |     c.append(null, oi)
 66 |     c.append(new Text("b"), oi)
 67 |     c.append(new Text("Abcdz"), oi)
 68 |     c.append(null, oi)
 69 |     val b = c.build()
 70 |     val i = ColumnIterator.newIterator(b)
 71 |     i.next()
 72 |     assert(i.current.toString() == "a")
 73 |     i.next()
 74 |     assert(i.current.toString() == "")
 75 |     i.next()
 76 |     assert(i.current == null)
 77 |     i.next()
 78 |     assert(i.current.toString() == "b")
 79 |     i.next()
 80 |     assert(i.current.toString() == "Abcdz")
 81 |     i.next()
 82 |     assert(i.current == null)
 83 |     assert(false === i.hasNext)
 84 |   }
 85 | 
 86 |   test("Iterate Ints") {
 87 |     def testList(l: Seq[AnyRef]) {
 88 |       val c = new IntColumnBuilder
 89 |       c.initialize(l.size, "")
 90 |       val oi = PrimitiveObjectInspectorFactory.javaIntObjectInspector
 91 | 
 92 |       l.foreach { item =>
 93 |         if (item == null) {
 94 |           c.append(null, oi)
 95 |         } else {
 96 |           c.append(item.asInstanceOf[Object], oi)
 97 |         }
 98 |       }
 99 | 
100 |       val b = c.build()
101 |       val i = ColumnIterator.newIterator(b)
102 | 
103 |       l.foreach { x =>
104 |         i.next()
105 |         if (x == null) {
106 |           assert(i.current === x)
107 |         } else {
108 |           assert(i.current.asInstanceOf[IntWritable].get === x)
109 |         }
110 |       }
111 |       assert(false === i.hasNext)
112 |     }
113 | 
114 |     testList(List(null, null, 123.asInstanceOf[AnyRef]))
115 |     testList(List(123.asInstanceOf[AnyRef], 4.asInstanceOf[AnyRef], null))
116 |     testList(List(null))
117 |   }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/memstore2/column/ColumnTypeSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 The Regents of The University California.
  3 |  * All rights reserved.
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *      http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package shark.memstore2.column
 19 | 
 20 | import java.nio.ByteBuffer
 21 | 
 22 | import org.apache.hadoop.io.IntWritable
 23 | import org.apache.hadoop.io.LongWritable
 24 | import org.apache.hadoop.hive.serde2.io._
 25 | 
 26 | import org.scalatest.FunSuite
 27 | 
 28 | class ColumnTypeSuite extends FunSuite {
 29 | 
 30 |   test("Int") {
 31 |     assert(INT.defaultSize == 4)
 32 |     var buffer = ByteBuffer.allocate(32)
 33 |     var a: Seq[Int] = Array[Int](35, 67, 899, 4569001)
 34 |     a.foreach {i => buffer.putInt(i)}
 35 |     buffer.rewind()
 36 |     a.foreach {i =>
 37 |       val v = INT.extract(buffer)
 38 |       assert(v == i)
 39 |     }
 40 |     buffer = ByteBuffer.allocate(32)
 41 |     a = Range(0, 4)
 42 |     a.foreach { i =>
 43 |       INT.append(i, buffer)
 44 |     }
 45 |     buffer.rewind()
 46 |     a.foreach { i => assert(buffer.getInt() == i)}
 47 | 
 48 |     buffer = ByteBuffer.allocate(32)
 49 |     a =Range(0,4)
 50 |     a.foreach { i => buffer.putInt(i)}
 51 |     buffer.rewind()
 52 |     val writable = new IntWritable()
 53 |     a.foreach { i =>
 54 |       INT.extractInto(buffer, writable)
 55 |       assert(writable.get == i)
 56 |     }
 57 | 
 58 |   }
 59 | 
 60 |   test("Short") {
 61 |     assert(SHORT.defaultSize == 2)
 62 |     assert(SHORT.actualSize(8) == 2)
 63 |     var buffer = ByteBuffer.allocate(32)
 64 |     var a = Array[Short](35, 67, 87, 45)
 65 |     a.foreach {i => buffer.putShort(i)}
 66 |     buffer.rewind()
 67 |     a.foreach {i =>
 68 |       val v = SHORT.extract(buffer)
 69 |       assert(v == i)
 70 |     }
 71 | 
 72 |     buffer = ByteBuffer.allocate(32)
 73 |     a = Array[Short](0,1,2,3)
 74 |     a.foreach { i =>
 75 |       SHORT.append(i, buffer)
 76 |     }
 77 |     buffer.rewind()
 78 |     a.foreach { i => assert(buffer.getShort() == i)}
 79 | 
 80 |     buffer = ByteBuffer.allocate(32)
 81 |     a =Array[Short](0,1,2,3)
 82 |     a.foreach { i => buffer.putShort(i)}
 83 |     buffer.rewind()
 84 |     val writable = new ShortWritable()
 85 |     a.foreach { i =>
 86 |       SHORT.extractInto(buffer, writable)
 87 |       assert(writable.get == i)
 88 |     }
 89 |   }
 90 | 
 91 |   test("Long") {
 92 |     assert(LONG.defaultSize == 8)
 93 |     assert(LONG.actualSize(45L) == 8)
 94 |     var buffer = ByteBuffer.allocate(64)
 95 |     var a = Array[Long](35L, 67L, 8799000880L, 45000999090L)
 96 |     a.foreach {i => buffer.putLong(i)}
 97 |     buffer.rewind()
 98 |     a.foreach {i =>
 99 |       val v = LONG.extract(buffer)
100 |       assert(v == i)
101 |     }
102 | 
103 |     buffer = ByteBuffer.allocate(32)
104 |     a = Array[Long](0,1,2,3)
105 |     a.foreach { i =>
106 |       LONG.append(i, buffer)
107 |     }
108 |     buffer.rewind()
109 |     a.foreach { i => assert(buffer.getLong() == i)}
110 | 
111 |     buffer = ByteBuffer.allocate(32)
112 |     a =Array[Long](0,1,2,3)
113 |     a.foreach { i => buffer.putLong(i)}
114 |     buffer.rewind()
115 |     val writable = new LongWritable()
116 |     a.foreach { i =>
117 |       LONG.extractInto(buffer, writable)
118 |       assert(writable.get == i)
119 |     }
120 |   }
121 | }
122 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/ColumnarStructObjectInspector.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2012 The Regents of The University California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package shark.memstore2
19 | 
20 | import java.util.{ArrayList => JArrayList, List => JList}
21 | 
22 | import org.apache.hadoop.hive.serde2.`lazy`.LazyFactory
23 | import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe.SerDeParameters
24 | import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorUtils,
25 |   StructField, StructObjectInspector}
26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category
27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
28 | import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo
29 | 
30 | 
31 | class ColumnarStructObjectInspector(fields: JList[StructField]) extends StructObjectInspector {
32 | 
33 |   override def getCategory: Category = Category.STRUCT
34 | 
35 |   override def getTypeName: String = ObjectInspectorUtils.getStandardStructTypeName(this)
36 | 
37 |   override def getStructFieldRef(fieldName: String): StructField =
38 |     ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields)
39 | 
40 |   override def getAllStructFieldRefs: JList[_ <: StructField] = fields
41 | 
42 |   override def getStructFieldData(data: Object, fieldRef: StructField): Object =
43 |     data.asInstanceOf[ColumnarStruct].getField(
44 |         fieldRef.asInstanceOf[ColumnarStructObjectInspector.IDStructField].fieldID)
45 | 
46 |   override def getStructFieldsDataAsList(data: Object): JList[Object] =
47 |     if (data == null) null else data.asInstanceOf[ColumnarStruct].getFieldsAsList()
48 | }
49 | 
50 | 
51 | object ColumnarStructObjectInspector {
52 | 
53 |   def apply(serDeParams: SerDeParameters): ColumnarStructObjectInspector = {
54 | 
55 |     val columnNames = serDeParams.getColumnNames()
56 |     val columnTypes = serDeParams.getColumnTypes()
57 |     val fields = new JArrayList[StructField]()
58 |     for (i <- 0 until columnNames.size) {
59 |       val typeInfo = columnTypes.get(i)
60 |       val fieldOI = typeInfo.getCategory match {
61 |         case Category.PRIMITIVE => 
62 |           PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
63 |             typeInfo.asInstanceOf[PrimitiveTypeInfo].getPrimitiveCategory)
64 |         case _ => LazyFactory.createLazyObjectInspector(
65 |             typeInfo, serDeParams.getSeparators(), 1, serDeParams.getNullSequence(),
66 |             serDeParams.isEscaped(), serDeParams.getEscapeChar())
67 |       }
68 |       fields.add(new IDStructField(i, columnNames.get(i), fieldOI))
69 |     }
70 |     new ColumnarStructObjectInspector(fields)
71 |   }
72 | 
73 |   class IDStructField(
74 |       val fieldID: Int,
75 |       val fieldName: String,
76 |       val fieldObjectInspector: ObjectInspector,
77 |       val fieldComment: String)
78 |     extends StructField {
79 | 
80 |     def this(fieldID: Int, fieldName: String, fieldObjectInspector: ObjectInspector) =
81 |       this(fieldID, fieldName, fieldObjectInspector, null)
82 | 
83 |     override def getFieldName: String = fieldName
84 |     override def getFieldObjectInspector: ObjectInspector = fieldObjectInspector
85 |     override def toString(): String = "" + fieldID + ":" + fieldName
86 |     override def getFieldComment() : String = fieldComment
87 |   }
88 | }
89 | 
90 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/JoinUtil.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 The Regents of The University California.
  3 |  * All rights reserved.
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *      http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package shark.execution
 19 | 
 20 | import java.util.{List => JavaList}
 21 | 
 22 | import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator
 23 | import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector => OI}
 24 | import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorUtils => OIUtils}
 25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.{ObjectInspectorCopyOption => CopyOption}
 26 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector
 27 | 
 28 | import org.apache.hadoop.io.BooleanWritable
 29 | import org.apache.hadoop.io.NullWritable
 30 | import org.apache.hadoop.io.Writable
 31 | 
 32 | import shark.execution.serialization.SerializableWritable
 33 | 
 34 | 
 35 | object JoinUtil {
 36 | 
 37 |   def computeJoinKey(row: Any, keyFields: JavaList[ExprNodeEvaluator], keyFieldsOI: JavaList[OI])
 38 |   : Seq[SerializableWritable[_]] = {
 39 |     Range(0, keyFields.size).map { i =>
 40 |       val c = copy(row, keyFields.get(i), keyFieldsOI.get(i), CopyOption.WRITABLE)
 41 |       val s = if (c == null) NullWritable.get else c
 42 |       new SerializableWritable(s.asInstanceOf[Writable])
 43 |     }
 44 |   }
 45 | 
 46 |   def joinKeyHasAnyNulls(joinKey: Seq[AnyRef], nullSafes: Array[Boolean]): Boolean = {
 47 |     joinKey.zipWithIndex.exists { x =>
 48 |       (nullSafes == null || nullSafes(x._2).unary_!) && (x._1 == null)
 49 |     }
 50 |   }
 51 | 
 52 |   def computeJoinValues(row: Any,
 53 |       valueFields: JavaList[ExprNodeEvaluator],
 54 |       valueFieldsOI: JavaList[OI],
 55 |       filters: JavaList[ExprNodeEvaluator],
 56 |       filtersOI: JavaList[OI],
 57 |       noOuterJoin: Boolean,
 58 |       serializable: Boolean = false)
 59 |     : Array[AnyRef] = {
 60 | 
 61 |     // isFiltered = true means failed in the join filter testing
 62 |     val isFiltered: Boolean = {
 63 |       if (filters == null) {
 64 |         false
 65 |       } else {
 66 |         var x = 0
 67 |         var exists = false
 68 |         while (x < filters.size() && !exists) {
 69 |           val cond = filters.get(x).evaluate(row)
 70 |           if (cond == null) {
 71 |             exists = true
 72 |           } else {
 73 |             exists = !filtersOI.get(x).asInstanceOf[BooleanObjectInspector].get(cond)
 74 |           }
 75 |           x += 1
 76 |         }
 77 |         
 78 |         exists
 79 |       }
 80 |     }
 81 |     val size = valueFields.size
 82 |     val a = new Array[AnyRef](size)
 83 |     var i = 0
 84 |     while (i < size) {
 85 |       a(i) = copy(row, valueFields.get(i), valueFieldsOI.get(i), CopyOption.WRITABLE)
 86 |       i += 1
 87 |     }
 88 | 
 89 |     val result = if (noOuterJoin) {
 90 |       a
 91 |     } else {
 92 |       val n = new Array[AnyRef](size + 1)
 93 |       Array.copy(a, 0, n, 0, size)
 94 |       n(size) = new BooleanWritable(isFiltered)
 95 |       n
 96 |     }
 97 |     
 98 |     if (serializable) {
 99 |       result.map(e => new SerializableWritable(e.asInstanceOf[Writable]))
100 |     } else {
101 |       result
102 |     }
103 |   }
104 | 
105 |   private def copy(row: Any, evaluator: ExprNodeEvaluator, oi: OI, copyOption: CopyOption) = {
106 |     OIUtils.copyToStandardObject(evaluator.evaluate(row), oi, copyOption)
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/memstore2/TablePartition.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 The Regents of The University California.
  3 |  * All rights reserved.
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *      http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package shark.memstore2
 19 | 
 20 | import java.io.{Externalizable, ObjectInput, ObjectOutput}
 21 | import java.nio.ByteBuffer
 22 | import java.nio.ByteOrder
 23 | import java.util.BitSet
 24 | import shark.memstore2.column.ColumnIterator
 25 | 
 26 | 
 27 | /**
 28 |  * TablePartition contains a whole partition of data in columnar format. It
 29 |  * simply contains a list of columns and their meta data. It should be built
 30 |  * using a TablePartitionBuilder.
 31 |  */
 32 | class TablePartition(private var _numRows: Long, private var _columns: Array[ByteBuffer])
 33 |   extends Externalizable {
 34 | 
 35 |   // Empty constructor for Externalizable
 36 |   def this() {
 37 |     this(0, null)
 38 |   }
 39 | 
 40 |   def this(columns: Array[ByteBuffer]) {
 41 |     this(columns(0).getLong(), columns.tail)
 42 |   }
 43 | 
 44 |   def numRows: Long = _numRows
 45 | 
 46 |   def columns: Array[ByteBuffer] = _columns
 47 | 
 48 |   /** We store our per-partition metadata in a fake "column 0" for off-heap storage. */
 49 |   def toOffHeap: Array[ByteBuffer] = {
 50 |     val buffers = new Array[ByteBuffer](1 + _columns.size)
 51 |     buffers(0) = metadata
 52 |     System.arraycopy(_columns, 0, buffers, 1, _columns.size)
 53 |     buffers
 54 |   }
 55 | 
 56 |   def metadata: ByteBuffer = {
 57 |     val buffer = ByteBuffer.allocate(8)
 58 |     buffer.order(ByteOrder.nativeOrder())
 59 |     buffer.putLong(_numRows)
 60 |     buffer.rewind()
 61 |     buffer
 62 |   }
 63 | 
 64 |   /**
 65 |    * Return an iterator for the partition.
 66 |    */
 67 |   def iterator: TablePartitionIterator = {
 68 |     val columnIterators: Array[ColumnIterator] = _columns.map { case buffer: ByteBuffer =>
 69 |       val iter = ColumnIterator.newIterator(buffer)
 70 |       iter
 71 |     }
 72 |     new TablePartitionIterator(_numRows, columnIterators)
 73 |   }
 74 | 
 75 |   def prunedIterator(columnsUsed: BitSet) = {
 76 |     val columnIterators: Array[ColumnIterator] = _columns.map {
 77 |       case buffer: ByteBuffer =>
 78 |         ColumnIterator.newIterator(buffer)
 79 |       case _ =>
 80 |         // The buffer might be null if it is pruned in off-heap storage.
 81 |         null
 82 |     }
 83 |     new TablePartitionIterator(_numRows, columnIterators, columnsUsed)
 84 |   }
 85 | 
 86 |   override def readExternal(in: ObjectInput) {
 87 |     _numRows = in.readLong()
 88 |     val numColumns = in.readInt()
 89 |     _columns = Array.fill[ByteBuffer](numColumns) {
 90 |       val columnLen = in.readInt()
 91 |       val buf = ByteBuffer.allocate(columnLen)
 92 |       in.readFully(buf.array(), 0, columnLen)
 93 |       buf
 94 |     }
 95 |   }
 96 | 
 97 |   override def writeExternal(out: ObjectOutput) {
 98 |     out.writeLong(numRows)
 99 |     out.writeInt(columns.length)
100 |     for (column <- columns) {
101 |       val buf = column.duplicate()
102 |       buf.rewind()
103 |       // If the ByteBuffer is backed by a byte array, just write the byte array out.
104 |       // Otherwise, write each byte one by one.
105 |       if (buf.hasArray()) {
106 |         val byteArray = buf.array()
107 |         out.writeInt(byteArray.length)
108 |         out.write(byteArray, 0, byteArray.length)
109 |       } else {
110 |         out.writeInt(buf.remaining())
111 |         while (buf.hasRemaining()) {
112 |           out.write(buf.get())
113 |         }
114 |       }
115 |     }
116 |   }
117 | }
118 | 


--------------------------------------------------------------------------------
/src/main/scala/shark/execution/SharkExplainTask.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 The Regents of The University California. 
  3 |  * All rights reserved.
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *      http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package shark.execution
 19 | 
 20 | import java.io.PrintStream
 21 | import java.util.{HashSet => JHashSet, List => JList}
 22 | 
 23 | import scala.collection.JavaConversions._
 24 | 
 25 | import org.apache.hadoop.fs.Path
 26 | import org.apache.hadoop.hive.conf.HiveConf
 27 | import org.apache.hadoop.hive.ql.exec.{ExplainTask, Task}
 28 | import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 29 | import org.apache.hadoop.hive.ql.{Context, DriverContext, QueryPlan}
 30 | import org.apache.hadoop.hive.ql.exec.{ExplainTask, Task}
 31 | import org.apache.hadoop.hive.ql.plan.ExplainWork
 32 | import org.apache.hadoop.util.StringUtils
 33 | 
 34 | import shark.LogHelper
 35 | 
 36 | 
 37 | class SharkExplainWork(
 38 |   resFile: String,
 39 |   rootTasks: JList[Task[_ <: java.io.Serializable]],
 40 |   astStringTree: String,
 41 |   inputs: JHashSet[ReadEntity],
 42 |   extended: Boolean)
 43 |  extends ExplainWork(resFile, rootTasks, astStringTree, inputs, extended, false, false)
 44 | 
 45 | 
 46 | /**
 47 |  * SharkExplainTask executes EXPLAIN for RDD operators.
 48 |  */
 49 | class SharkExplainTask extends Task[SharkExplainWork] with java.io.Serializable with LogHelper {
 50 | 
 51 |   val hiveExplainTask = new ExplainTask
 52 | 
 53 |   override def execute(driverContext: DriverContext): Int = {
 54 |     logDebug("Executing " + this.getClass.getName())
 55 |     hiveExplainTask.setWork(work)
 56 | 
 57 |     try {
 58 |       val resFile = new Path(work.getResFile())
 59 |       val outS = resFile.getFileSystem(conf).create(resFile)
 60 |       val out = new PrintStream(outS)
 61 | 
 62 |       // Print out the parse AST
 63 |       ExplainTask.outputAST(work.getAstStringTree, out, false, 0)
 64 |       out.println()
 65 | 
 66 |       ExplainTask.outputDependencies(out, work.isFormatted(), work.getRootTasks, 0)
 67 |       out.println()
 68 | 
 69 |       // Go over all the tasks and dump out the plans
 70 |       ExplainTask.outputStagePlans(out, work, work.getRootTasks, 0)
 71 | 
 72 |       // Print the Shark query plan if applicable.
 73 |       if (work != null && work.getRootTasks != null && work.getRootTasks.size > 0) {
 74 |         work.getRootTasks.zipWithIndex.foreach { case(task, taskIndex) =>
 75 |           task match {
 76 |             case sparkTask: SparkTask => {
 77 |               out.println("SHARK QUERY PLAN #%d:".format(taskIndex))
 78 |               val terminalOp = sparkTask.getWork().terminalOperator
 79 |               ExplainTaskHelper.outputPlan(terminalOp, out, work.getExtended, 2)
 80 |               out.println()
 81 |             }
 82 |             case _ => null
 83 |           }
 84 |         }
 85 |       }
 86 | 
 87 |       out.close()
 88 |       0
 89 |     } catch {
 90 |       case e: Exception => {
 91 |         console.printError("Failed with exception " + e.getMessage(), "\n" +
 92 |             StringUtils.stringifyException(e))
 93 |         throw e
 94 |         1
 95 |       }
 96 |     }
 97 |   }
 98 | 
 99 |   override def initialize(conf: HiveConf, queryPlan: QueryPlan, driverContext: DriverContext) {
100 |     hiveExplainTask.initialize(conf, queryPlan, driverContext)
101 |     super.initialize(conf, queryPlan, driverContext)
102 |   }
103 | 
104 |   override def getType = hiveExplainTask.getType
105 | 
106 |   override def getName = hiveExplainTask.getName
107 | 
108 |   override def localizeMRTmpFilesImpl(ctx: Context) {
109 |     // explain task has nothing to localize
110 |     // we don't expect to enter this code path at all
111 |     throw new RuntimeException ("Unexpected call")
112 |   }
113 | 
114 | }
115 | 
116 | 


--------------------------------------------------------------------------------
/run:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file is used to launch Shark on the master.
  4 | export SCALA_VERSION=2.10
  5 | SHARK_VERSION=0.9.2
  6 | 
  7 | # Figure out where the framework is installed
  8 | FWDIR="$(cd `dirname $0`; pwd)"
  9 | 
 10 | export SHARK_HOME="$FWDIR"
 11 | 
 12 | # Load environment variables from conf/shark-env.sh, if it exists
 13 | if [ -e $SHARK_HOME/conf/shark-env.sh ] ; then
 14 |   . $SHARK_HOME/conf/shark-env.sh
 15 | fi
 16 | 
 17 | if [ -n "$MASTER" ] ; then
 18 |   if [ -z $SPARK_HOME ] ; then
 19 |     echo "No SPARK_HOME specified. Please set SPARK_HOME for cluster mode."
 20 |     exit 1
 21 |   fi
 22 | fi
 23 | 
 24 | # check for shark with spark on yarn params
 25 | if [ "x$SHARK_EXEC_MODE" == "xyarn" ] ; then
 26 |   if [ "x$SPARK_ASSEMBLY_JAR" == "x" ] ; then
 27 |     echo "No SPARK_ASSEMBLY_JAR specified. Please set SPARK_ASSEMBLY_JAR for spark on yarn mode."
 28 |     exit 1
 29 |   else
 30 |     export SPARK_JAR=$SPARK_ASSEMBLY_JAR
 31 |   fi
 32 | 
 33 |   if [ "x$SHARK_ASSEMBLY_JAR" == "x" ] ; then
 34 |     echo "No SHARK_ASSEMBLY_JAR specified. please set SHARK_ASSEMBLY_JAR for spark on yarn mode."
 35 |     exit 1
 36 |   else
 37 |     export SPARK_YARN_APP_JAR=$SHARK_ASSEMBLY_JAR
 38 |   fi
 39 | 
 40 |   # use yarn-client mode for interactive shell.
 41 |   export MASTER=yarn-client
 42 | fi
 43 | 
 44 | # Check for optionally specified configuration file path
 45 | if [ "x$HIVE_CONF_DIR" == "x" ] ; then
 46 |     HIVE_CONF_DIR="$SHARK_HOME/conf"
 47 | fi
 48 | 
 49 | if [ -f "${HIVE_CONF_DIR}/hive-env.sh" ]; then
 50 |   . "${HIVE_CONF_DIR}/hive-env.sh"
 51 | fi
 52 | 
 53 | # Add Shark jars.
 54 | for jar in `find $SHARK_HOME/lib -name '*jar'`; do
 55 |   SPARK_CLASSPATH+=:$jar
 56 | done
 57 | for jar in `find $SHARK_HOME/lib_managed/jars -name '*jar'`; do
 58 |   SPARK_CLASSPATH+=:$jar
 59 | done
 60 | for jar in `find $SHARK_HOME/lib_managed/bundles -name '*jar'`; do
 61 |   SPARK_CLASSPATH+=:$jar
 62 | done
 63 | 
 64 | SPARK_CLASSPATH+=:$HIVE_CONF_DIR
 65 | 
 66 | # Build up Shark's jar or classes.
 67 | SHARK_CLASSES="$SHARK_HOME/target/scala-$SCALA_VERSION/classes"
 68 | SHARK_JAR="$SHARK_HOME/target/scala-$SCALA_VERSION/shark_$SCALA_VERSION-$SHARK_VERSION.jar"
 69 | if [ -d "$SHARK_CLASSES/shark" ] ; then
 70 |   SPARK_CLASSPATH+=":$SHARK_CLASSES"
 71 | else
 72 |   if [ -f "$SHARK_JAR" ] ; then
 73 |     SPARK_CLASSPATH+=":$SHARK_JAR"
 74 |   else
 75 |     echo "Cannot find either compiled classes or compiled jar package for Shark."
 76 |     echo "Have you compiled Shark yet?"
 77 |     exit 1
 78 |   fi
 79 | fi
 80 | 
 81 | SPARK_CLASSPATH+=":$SHARK_HOME/target/scala-$SCALA_VERSION/test-classes"
 82 | 
 83 | 
 84 | SHARK_JAR="$SHARK_HOME/target/scala-$SCALA_VERSION/shark_$SCALA_VERSION-$SHARK_VERSION.jar"
 85 | if [ -f "$SHARK_JAR" ] ; then
 86 |   SPARK_CLASSPATH+=":$SHARK_JAR"
 87 | else
 88 |   SPARK_CLASSPATH+=":$SHARK_HOME/target/scala-$SCALA_VERSION/classes"
 89 | fi
 90 | 
 91 | SPARK_CLASSPATH+=":$SHARK_HOME/target/scala-$SCALA_VERSION/test-classes"
 92 | 
 93 | 
 94 | if [ "x$HADOOP_HOME" == "x" ] ; then
 95 |   echo "No HADOOP_HOME specified. Shark will run in local-mode"
 96 | else
 97 |   SPARK_CLASSPATH+=:$HADOOP_HOME/etc/hadoop
 98 |   SPARK_CLASSPATH+=:$HADOOP_HOME/conf
 99 | fi
100 | 
101 | 
102 | # TODO(rxin): Check aux classpath and aux java opts.
103 | #CLASSPATH=${CLASSPATH}:${AUX_CLASSPATH}
104 | 
105 | export SPARK_CLASSPATH
106 | export CLASSPATH+=$SPARK_CLASSPATH # Needed for spark-shell
107 | 
108 | export SPARK_JAVA_OPTS+=" $TEST_JAVA_OPTS"
109 | 
110 | # supress the HADOOP_HOME warnings in 1.x.x
111 | export HADOOP_HOME_WARN_SUPPRESS=true
112 | 
113 | if [ "x$SHARK_MASTER_MEM" == "x" ] ; then
114 |   SHARK_MASTER_MEM="512m"
115 | fi
116 | 
117 | # Set JAVA_OPTS to be able to load native libraries and to set heap size
118 | JAVA_OPTS+="$SPARK_JAVA_OPTS"
119 | JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH"
120 | JAVA_OPTS+=" -Xms$SHARK_MASTER_MEM -Xmx$SHARK_MASTER_MEM"
121 | export JAVA_OPTS
122 | 
123 | # In case we are running Ant
124 | export ANT_OPTS=$JAVA_OPTS
125 | 
126 | if [ "x$RUNNER" == "x" ] ; then
127 |   if [ -n "$JAVA_HOME" ]; then
128 |     RUNNER="${JAVA_HOME}/bin/java"
129 |   else
130 |     RUNNER=java
131 |   fi
132 |   # The JVM doesn't read JAVA_OPTS by default so we need to pass it in
133 |   EXTRA_ARGS="$JAVA_OPTS"
134 | fi
135 | 
136 | exec $RUNNER $EXTRA_ARGS "$@"
137 | 


--------------------------------------------------------------------------------
/src/test/scala/shark/SharkServerSuite.scala:
--------------------------------------------------------------------------------
  1 | package shark
  2 | 
  3 | import java.io.{BufferedReader, InputStreamReader}
  4 | import java.sql.DriverManager
  5 | import java.sql.Statement
  6 | import java.sql.Connection
  7 | 
  8 | import scala.collection.JavaConversions._
  9 | 
 10 | import org.scalatest.{BeforeAndAfterAll, FunSuite}
 11 | import org.scalatest.matchers.ShouldMatchers
 12 | 
 13 | import scala.concurrent._
 14 | import ExecutionContext.Implicits.global
 15 | 
 16 | /**
 17 |  * Test for the Shark server.
 18 |  */
 19 | class SharkServerSuite extends FunSuite with BeforeAndAfterAll with ShouldMatchers with TestUtils {
 20 | 
 21 |   val WAREHOUSE_PATH = TestUtils.getWarehousePath("server")
 22 |   val METASTORE_PATH = TestUtils.getMetastorePath("server")
 23 |   val DRIVER_NAME  = "org.apache.hadoop.hive.jdbc.HiveDriver"
 24 |   val TABLE = "test"
 25 |   // use a different port, than the hive standard 10000,
 26 |   // for tests to avoid issues with the port being taken on some machines
 27 |   val PORT = "9011"
 28 | 
 29 |   // If verbose is true, the testing program will print all outputs coming from the shark server.
 30 |   val VERBOSE = Option(System.getenv("SHARK_TEST_VERBOSE")).getOrElse("false").toBoolean
 31 | 
 32 |   Class.forName(DRIVER_NAME)
 33 | 
 34 |   override def beforeAll() { launchServer() }
 35 | 
 36 |   override def afterAll() { stopServer() }
 37 | 
 38 |   private def launchServer(args: Seq[String] = Seq.empty) {
 39 |     // Forking a new process to start the Shark server. The reason to do this is it is
 40 |     // hard to clean up Hive resources entirely, so we just start a new process and kill
 41 |     // that process for cleanup.
 42 |     val defaultArgs = Seq("./bin/shark", "--service", "sharkserver",
 43 |       "--verbose",
 44 |       "-p",
 45 |       PORT,
 46 |       "--hiveconf",
 47 |       "hive.root.logger=INFO,console",
 48 |       "--hiveconf",
 49 |       "\"javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=" + METASTORE_PATH + ";create=true\"",
 50 |       "--hiveconf",
 51 |       "\"hive.metastore.warehouse.dir=" + WAREHOUSE_PATH + "\"")
 52 |     val pb = new ProcessBuilder(defaultArgs ++ args)
 53 |     process = pb.start()
 54 |     inputReader = new BufferedReader(new InputStreamReader(process.getInputStream))
 55 |     errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream))
 56 |     waitForOutput(inputReader, "Starting Shark server")
 57 | 
 58 |     // Spawn a thread to read the output from the forked process.
 59 |     // Note that this is necessary since in some configurations, log4j could be blocked
 60 |     // if its output to stderr are not read, and eventually blocking the entire test suite.
 61 |     future {
 62 |       while (true) {
 63 |         val stdout = readFrom(inputReader)
 64 |         val stderr = readFrom(errorReader)
 65 |         if (VERBOSE && stdout.length > 0) {
 66 |           println(stdout)
 67 |         }
 68 |         if (VERBOSE && stderr.length > 0) {
 69 |           println(stderr)
 70 |         }
 71 |         Thread.sleep(50)
 72 |       }
 73 |     }
 74 |   }
 75 | 
 76 |   private def stopServer() {
 77 |     process.destroy()
 78 |     process.waitFor()
 79 |   }
 80 | 
 81 |   test("test query execution against a shark server") {
 82 |     Thread.sleep(5*1000) // I know... Gross.  However, without this the tests fail non-deterministically.
 83 | 
 84 |     val dataFilePath = TestUtils.dataFilePath + "/kv1.txt"
 85 |     val stmt = createStatement()
 86 |     stmt.executeQuery("DROP TABLE IF EXISTS test")
 87 |     stmt.executeQuery("DROP TABLE IF EXISTS test_cached")
 88 |     stmt.executeQuery("CREATE TABLE test(key int, val string)")
 89 |     stmt.executeQuery("LOAD DATA LOCAL INPATH '" + dataFilePath+ "' OVERWRITE INTO TABLE test")
 90 |     stmt.executeQuery("CREATE TABLE test_cached as select * from test limit 499")
 91 | 
 92 |     var rs = stmt.executeQuery("select count(*) from test")
 93 |     rs.next()
 94 |     rs.getInt(1) should equal (500)
 95 | 
 96 |     rs = stmt.executeQuery("select count(*) from test_cached")
 97 |     rs.next()
 98 |     rs.getInt(1) should equal (499)
 99 | 
100 |     stmt.close()
101 |   }
102 | 
103 |   def getConnection(): Connection = {
104 |     DriverManager.getConnection("jdbc:hive://localhost:" + PORT + "/default", "", "")
105 |   }
106 | 
107 |   def createStatement(): Statement = getConnection().createStatement()
108 | }


--------------------------------------------------------------------------------