├── .gitignore
├── LICENSE.md
├── README.md
├── jaws-hive-sql-rest
├── LICENSE.md
├── pom.xml
└── src
│ ├── main
│ ├── assembly
│ │ └── archive.xml
│ ├── resources
│ │ ├── application.conf
│ │ └── log4j.properties
│ └── scala
│ │ ├── apiactors
│ │ └── HiveRunnerActor.scala
│ │ ├── customs
│ │ ├── CORSDirectives.scala
│ │ ├── CommandsProcessor.scala
│ │ └── ResultsProcessor.scala
│ │ └── server
│ │ └── HiveController.scala
│ └── test
│ ├── resources
│ └── application.conf
│ └── scala
│ ├── CommandsProcessorTest.scala
│ └── ResultsProcessorTest.scala
├── jaws-spark-sql-data
├── LICENSE.md
├── pom.xml
└── src
│ ├── main
│ └── scala
│ │ └── com
│ │ └── xpatterns
│ │ └── jaws
│ │ └── data
│ │ ├── DTO
│ │ ├── AvroBinaryResult.scala
│ │ ├── AvroResult.scala
│ │ ├── Column.scala
│ │ ├── CustomResult.scala
│ │ ├── Databases.scala
│ │ ├── Log.scala
│ │ ├── Logs.scala
│ │ ├── ParquetTable.scala
│ │ ├── Queries.scala
│ │ ├── Query.scala
│ │ ├── QueryMetaInfo.scala
│ │ ├── Table.scala
│ │ └── Tables.scala
│ │ ├── contracts
│ │ ├── DAL.scala
│ │ ├── TJawsLogging.scala
│ │ ├── TJawsParquetTables.scala
│ │ └── TJawsResults.scala
│ │ ├── impl
│ │ ├── CassandraDal.scala
│ │ ├── HdfsDal.scala
│ │ ├── JawsCassandraLogging.scala
│ │ ├── JawsCassandraParquetTables.scala
│ │ ├── JawsCassandraResults.scala
│ │ ├── JawsHdfsLogging.scala
│ │ ├── JawsHdfsParquetTables.scala
│ │ └── JawsHdfsResults.scala
│ │ └── utils
│ │ ├── AvroConverter.scala
│ │ ├── CustomConverter.scala
│ │ ├── GsonHelper.scala
│ │ ├── QueryState.scala
│ │ ├── Randomizer.scala
│ │ ├── ResultsConverter.scala
│ │ └── Utils.scala
│ └── test
│ ├── resources
│ └── application.conf
│ └── scala
│ └── com
│ └── xpatterns
│ └── jaws
│ └── data
│ ├── impl
│ ├── JawsCassandraParquetTablesTest.scala
│ ├── JawsHdfsParquetTablesTest.scala
│ ├── JawsLoggingOnHdfsTest.scala
│ ├── JawsLoggingTest.scala
│ ├── JawsResultsOnHdfsTest.scala
│ └── JawsResultsTest.scala
│ └── utils
│ ├── AvroConverterComplexTest.scala
│ ├── AvroConverterCustomTest.scala
│ ├── AvroConverterTest.scala
│ ├── AvroSerializerTest.scala
│ └── CustomConverterTest.scala
├── jaws-spark-sql-integration-tests
├── LICENSE.md
├── pom.xml
└── src
│ └── test
│ ├── resources
│ ├── application.conf
│ ├── jawsTest.parquet
│ │ ├── _metadata
│ │ └── part-r-1.parquet
│ └── people.txt
│ └── scala
│ ├── api
│ ├── GetDatabasesApiTest.scala
│ ├── JawsIsUpTest.scala
│ ├── ParquetManagementApiTest.scala
│ ├── RunApiTest.scala
│ ├── RunHiveApiTest.scala
│ └── TestSuite.scala
│ └── foundation
│ ├── TestBase.scala
│ └── UtilOperations.scala
├── jaws-spark-sql-rest
├── LICENSE.md
├── pom.xml
└── src
│ ├── main
│ ├── assembly
│ │ └── archive.xml
│ ├── resources
│ │ ├── application.conf
│ │ ├── cassandra-schema.txt
│ │ ├── core-site.xml
│ │ ├── hive-site.xml
│ │ ├── jaws-env.sh
│ │ ├── log4j.properties
│ │ └── sharkSettings.txt
│ ├── scala
│ │ ├── apiactors
│ │ │ ├── ActorUtils.scala
│ │ │ ├── BalancerActor.scala
│ │ │ ├── DeleteQueryApiActor.scala
│ │ │ ├── GetDatabasesApiActor.scala
│ │ │ ├── GetDatasourceSchemaActor.scala
│ │ │ ├── GetLogsApiActor.scala
│ │ │ ├── GetParquetTablesApiActor.scala
│ │ │ ├── GetQueriesApiActor.scala
│ │ │ ├── GetResultsApiActor.scala
│ │ │ ├── GetTablesApiActor.scala
│ │ │ ├── QueryPropertiesApiActor.scala
│ │ │ ├── RegisterParquetTableApiActor.scala
│ │ │ └── RunScriptApiActor.scala
│ │ ├── customs
│ │ │ ├── CORSDirectives.scala
│ │ │ ├── CustomDirectives.scala
│ │ │ └── CustomIndexer.scala
│ │ ├── implementation
│ │ │ ├── HiveContextWrapper.scala
│ │ │ └── SchemaSettingsFactory.scala
│ │ ├── messages
│ │ │ └── Messages.scala
│ │ ├── org
│ │ │ └── apache
│ │ │ │ └── spark
│ │ │ │ └── scheduler
│ │ │ │ ├── HiveUtils.scala
│ │ │ │ ├── LoggingListener.scala
│ │ │ │ └── RunScriptTask.scala
│ │ └── server
│ │ │ ├── Configuration.scala
│ │ │ ├── JawsController.scala
│ │ │ ├── LogsActor.scala
│ │ │ ├── MainActors.scala
│ │ │ ├── ReactiveServer.scala
│ │ │ ├── Supervisor.scala
│ │ │ └── api
│ │ │ ├── BaseApi.scala
│ │ │ ├── IndexApi.scala
│ │ │ ├── MetadataApi.scala
│ │ │ ├── ParquetApi.scala
│ │ │ ├── QueryManagementApi.scala
│ │ │ └── UIApi.scala
│ ├── scripts
│ │ └── start-jaws.sh
│ └── webapp
│ │ └── hello.txt
│ └── test
│ ├── resources
│ ├── application.conf
│ ├── core-site.xml
│ ├── data
│ │ └── kv1.txt
│ ├── hive-site.xml
│ ├── jaws-env.sh
│ ├── log4j.properties
│ └── sharkSettings.txt
│ └── scala
│ ├── api
│ ├── DeleteQueryTest.scala
│ ├── GetQueryInfoTest.scala
│ └── QueryPropertiesTest.scala
│ ├── implementation
│ └── HiveUtilsTest.scala
│ └── utils
│ ├── TestSuite.scala
│ └── TestUtils.scala
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | .cache
2 | syntax: glob
3 | *.log
4 | RemoteSystemsTempFiles
5 | *.metadata
6 | target/*
7 | .DS_Store
8 | *.settings
9 | *.classpath
10 | *.idea
11 | *.iml
12 | *.orig
13 | *.txt~
14 | src/main/webapp/resources/node_modules/
15 | xpatterns-data-init/application-context.xml
16 | atlassian-ide-plugin.xml
17 | bin/*
18 | */META-INF/
19 | */src/main/webapp/resources/node_modules/
20 | */node_modules/
21 | atlassian-ide-plugin.xml
22 | RemoteSystemsTempFiles
23 | Servers
24 | bin
25 | .metadata
26 | target
27 | .DS_Store
28 | .settings
29 | .classpath
30 | .project
31 | project
32 | classpath
33 | HadoopCryptoCompressor
34 | .idea
35 | .iml
36 | .orig
37 | .java.orig
38 | syntax: glob
39 | java-driver
40 | ioenl
41 | derby.log
42 | mancenter-2.5
43 | dependency-reduced-pom.xml
44 | test_warehouses
45 | localhost
46 | jawsSchemaFolder
47 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2014 Atigeo, LLC.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2014 Atigeo, LLC.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/main/assembly/archive.xml:
--------------------------------------------------------------------------------
1 |
5 | archive
6 |
7 | tar.gz
8 |
9 |
10 |
11 | ${project.build.directory}/temp_build
12 | /
13 |
14 |
15 | ${basedir}/conf
16 | /conf
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | spray.can.server {
2 | # uncomment the next line for making this an HTTPS example
3 | # ssl-encryption = on
4 | idle-timeout = 301 s
5 | request-timeout = 300 s
6 | }
7 |
8 |
9 |
10 | ######### application configuration ###################
11 | appConf{
12 | # the interface on which to start the spray server : localhost/ip/hostname
13 | server.interface=localhost
14 | # the cors filter allowed hosts
15 | cors-filter-allowed-hosts="*"
16 | # implicit akka timeout
17 | timeout=1000000
18 | #app port
19 | web.services.port=7080
20 | #where to log: app.logging.type = cassandra/hdfs
21 | app.logging.type=cassandra
22 | # the number of threads used to execute hive commands
23 | nr.of.threads=10
24 |
25 | }
26 |
27 |
28 | ########## hadoop configuration - skip this if you are using cassandra logging ########
29 | hadoopConf {
30 | namenode="hdfs://devbox.local:8020"
31 | replicationFactor=1
32 | # set on true if you want to start fresh (all the existing folders will be recreated)
33 | forcedMode=false
34 | # folder where to write the logs
35 | loggingFolder=jawsLogs
36 | # folder where to write the jobs states
37 | stateFolder=jawsStates
38 | # folder where to write the jobs details
39 | detailsFolder=jawsDetails
40 | # folder where to write the jobs results
41 | resultsFolder=jawsResultsFolder
42 | # folder where to write the jobs meta information
43 | metaInfoFolder=jawsMetainfoFolder
44 | # folder where to write the name of query information
45 | queryNameFolder=jawsQueryNameFolder
46 | # folder where to write the published queries
47 | queryPublishedFolder=jawsQueryPublishedFolder
48 | # folder where to write the unpublished queries
49 | queryUnpublishedFolder=jawsQueryUnpublishedFolder
50 | # folder where to write the parquet tables information
51 | parquetTablesFolder=parquetTablesFolder
52 | }
53 |
54 | ########## cassandra configuration - skip this if you are using hdfs logging ##########
55 | cassandraConf {
56 | cassandra.host="devbox.local:9160"
57 | cassandra.keyspace=xpatterns_jaws
58 | cassandra.cluster.name=Jaws
59 | }
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | #
3 | # The following properties set the logging levels and log appender. The
4 | # log4j.rootCategory variable defines the default log level and one or more
5 | # appenders. For the console, use 'S'. For the daily rolling file, use 'R'.
6 | # For an HTML formatted log, use 'H'.
7 | #
8 | # To override the default (rootCategory) log level, define a property of the
9 | # form (see below for available values):
10 | #
11 | # log4j.logger. =
12 | #
13 | # Available logger names:
14 | # TODO
15 | #
16 | # Possible Log Levels:
17 | # FATAL, ERROR, WARN, INFO, DEBUG
18 | #
19 | #------------------------------------------------------------------------------
20 |
21 | #log4j.category.me.prettyprint.cassandra = INFO, dataConsole, dataFile
22 | #log4j.additivity.me.prettyprint.cassandra = false
23 | #log4j.category.DATA = INFO, dataConsole, dataFile
24 | #log4j.additivity.DATA = false
25 |
26 | log4j.rootCategory = INFO, defaultConsole, defaultFile
27 |
28 | #log4j.category.com.xpatterns.xrelevance.content.data = INFO, dataConsole, dataFile
29 | #log4j.additivity.com.xpatterns.xrelevance.content.data = false
30 | #log4j.category.com.xpatterns.xrelevance.configuration.data = INFO, dataConsole, dataFile
31 | #log4j.additivity.com.xpatterns.xrelevance.configuration.data = false
32 | #log4j.category.com.xpatterns.xrelevance.data = INFO, dataConsole, dataFile
33 | #log4j.additivity.com.xpatterns.xrelevance.data = false
34 |
35 |
36 | #------------------------------------------------------------------------------
37 | #
38 | # The following properties configure the console (stdout) appender.
39 | # See http://logging.apache.org/log4j/docs/api/index.html for details.
40 | #
41 | #------------------------------------------------------------------------------
42 | log4j.appender.defaultConsole = org.apache.log4j.ConsoleAppender
43 | log4j.appender.defaultConsole.layout = org.apache.log4j.PatternLayout
44 | log4j.appender.defaultConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
45 |
46 | #------------------------------------------------------------------------------
47 | #
48 | # The following properties configure the Daily Rolling File appender.
49 | # See http://logging.apache.org/log4j/docs/api/index.html for details.
50 | #
51 | #------------------------------------------------------------------------------
52 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender
53 | log4j.appender.defaultFile.File = jaws-hive-sql-rest.log
54 | log4j.appender.defaultFile.Append = true
55 | log4j.appender.defaultFile.DatePattern = '.'yyy-MM-dd
56 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout
57 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
58 |
59 | #console for data project
60 | #log4j.appender.dataConsole = org.apache.log4j.ConsoleAppender
61 | #log4j.appender.dataConsole.layout = org.apache.log4j.PatternLayout
62 | #log4j.appender.dataConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
63 |
64 | #file for data project
65 | #log4j.appender.dataFile = org.apache.log4j.DailyRollingFileAppender
66 | #log4j.appender.dataFile.File = ${catalina.home}/logs/xpatterns-api-data-4.0.log
67 | #log4j.appender.dataFile.Append = true
68 | #log4j.appender.dataFile.DatePattern = '.'yyy-MM-dd
69 | #log4j.appender.dataFile.layout = org.apache.log4j.PatternLayout
70 | #log4j.appender.dataFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/main/scala/apiactors/HiveRunnerActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import server.Configuration
4 | import sys.process._
5 | import scala.collection.mutable.ListBuffer
6 | import akka.actor.Actor
7 | import scala.util.Try
8 | import scala.util.Success
9 | import scala.util.Failure
10 | import com.xpatterns.jaws.data.contracts.DAL
11 | import customs.CommandsProcessor._
12 | import customs.ResultsProcessor._
13 | import java.io.ByteArrayOutputStream
14 | import java.io.OutputStreamWriter
15 | import sys.process._
16 | import scala.concurrent.ExecutionContext
17 | import java.util.concurrent.Executors
18 | import java.util.UUID
19 | import com.xpatterns.jaws.data.utils.QueryState
20 | import scala.concurrent._
21 | import java.io.ByteArrayInputStream
22 | import java.io.InputStreamReader
23 | import java.io.BufferedReader
24 | import scala.io.Source
25 | import scala.io.BufferedSource
26 | import com.xpatterns.jaws.data.utils.Utils._
27 |
28 | /**
29 | * Created by emaorhian
30 | */
31 |
32 | case class RunQueryMessage(script: String, limit: Int)
33 | case class ErrorMessage(message: String)
34 |
35 | class HiveRunnerActor(dals: DAL) extends Actor {
36 |
37 | override def receive = {
38 |
39 | case message: RunQueryMessage => {
40 | Configuration.log4j.info(s"[HiveRunnerActor]: Running script=${message.script}")
41 | val uuid = System.currentTimeMillis() + UUID.randomUUID().toString()
42 | implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(Configuration.nrOfThreads.getOrElse("10").toInt))
43 | var script = ""
44 |
45 | val startTime = System.currentTimeMillis()
46 | dals.loggingDal.setTimestamp(uuid, startTime)
47 |
48 | val tryPreRunScript = Try {
49 | writeLaunchStatus(uuid, message.script)
50 | script = prepareCommands(message.script, message.limit)
51 | }
52 |
53 | tryPreRunScript match {
54 | case Success(v) => sender ! uuid
55 | case Failure(e) => sender ! ErrorMessage(s"Run hive query failed with the following message: ${getCompleteStackTrace(e)}")
56 | }
57 |
58 | val runResponse = future {
59 | Configuration.log4j.info(s"[HiveRunnerActor]: Executing commands $script")
60 | runHiveScript(script, uuid)
61 | }
62 |
63 | runResponse onComplete {
64 | case Success(s) => {
65 | val message = s"[HiveRunnerActor]: Query $uuid has successfully finished"
66 | dals.resultsDal.setResults(uuid, s)
67 | setStatus(uuid, message, QueryState.DONE)
68 |
69 | val executionTime = System.currentTimeMillis() - startTime
70 | dals.loggingDal.setExecutionTime(uuid, executionTime)
71 | }
72 | case Failure(e) => {
73 | val message = s"[HiveRunnerActor]: Query $uuid has failed with the following exception ${getCompleteStackTrace(e)}"
74 | setStatus(uuid, message, QueryState.FAILED)
75 | }
76 | }
77 | }
78 | }
79 |
80 | private def runHiveScript(script: String, uuid: String) = {
81 | val stdOutOS = new ByteArrayOutputStream
82 | val osWriter = new OutputStreamWriter(stdOutOS)
83 |
84 | val command = Seq("hive", "-e", script)
85 |
86 | try {
87 | command ! ProcessLogger(
88 | stdOutLine => osWriter.write(s"$stdOutLine\n"),
89 | stdErrLine => {
90 | Configuration.log4j.info(stdErrLine)
91 | dals.loggingDal.addLog(uuid, "hive", System.currentTimeMillis(), stdErrLine)
92 | })
93 | osWriter flush ()
94 |
95 | getLastResults(new ByteArrayInputStream(stdOutOS.toByteArray()))
96 |
97 | } finally {
98 | if (osWriter != null) osWriter close ()
99 | }
100 | }
101 |
102 | private def writeLaunchStatus(uuid: String, script: String) {
103 | dals.loggingDal.addLog(uuid, "hive", System.currentTimeMillis(), s"Launching task for $uuid")
104 | dals.loggingDal.setState(uuid, QueryState.IN_PROGRESS)
105 | dals.loggingDal.setScriptDetails(uuid, script)
106 | }
107 |
108 | private def setStatus(uuid: String, message: String, status: QueryState.Value) {
109 | Configuration.log4j.info(message)
110 | dals.loggingDal.addLog(uuid, "hive", System.currentTimeMillis(), message)
111 | dals.loggingDal.setState(uuid, status)
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/main/scala/customs/CORSDirectives.scala:
--------------------------------------------------------------------------------
1 | package customs
2 |
3 | import spray.http._
4 | import spray.routing._
5 | import spray.http.HttpHeaders._
6 | import spray.http.HttpMethod
7 | /**
8 | * Created by emaorhian
9 | */
10 | trait CORSDirectives { this: HttpService =>
11 | private def respondWithCORSHeaders(origin: String, rh: Seq[HttpHeader]) = {
12 | var headers: List[HttpHeader] = List(
13 | HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(List(origin))),
14 | HttpHeaders.`Access-Control-Allow-Credentials`(true),
15 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid")
16 | ) ++ rh.toList
17 |
18 | respondWithHeaders(headers)
19 | }
20 | private def respondWithCORSHeadersAllOrigins(rh: Seq[HttpHeader]) = {
21 | var headers: List[HttpHeader] = List(
22 | HttpHeaders.`Access-Control-Allow-Origin`(AllOrigins),
23 | HttpHeaders.`Access-Control-Allow-Credentials`(true),
24 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid")
25 | ) ++ rh.toList
26 |
27 | respondWithHeaders(headers)
28 | }
29 |
30 | def corsFilter(origins: List[String], rh: HttpHeader*)(route: Route) =
31 | if (origins.contains("*"))
32 | respondWithCORSHeadersAllOrigins(rh)(route)
33 | else
34 | optionalHeaderValueByName("Origin") {
35 | case None =>
36 | route
37 | case Some(clientOrigin) => {
38 | if (origins.contains(clientOrigin))
39 | respondWithCORSHeaders(clientOrigin, rh)(route)
40 | else {
41 | // Maybe, a Rejection will fit better
42 | complete(StatusCodes.Forbidden, "Invalid origin")
43 | }
44 | }
45 | }
46 | }
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/main/scala/customs/CommandsProcessor.scala:
--------------------------------------------------------------------------------
1 | package customs
2 |
3 | import scala.collection.mutable.ListBuffer
4 | import org.apache.commons.lang.StringUtils
5 | import org.apache.commons.lang.RandomStringUtils
6 | import scala.collection.mutable.ListBuffer
7 |
8 | class CommandsProcessor
9 | object CommandsProcessor {
10 |
11 | val MORE_THAT_ONE_SELECT_EXCEPTION_MESSAGE = "The query must contain only one select, at the end"
12 | val QUERY_DELIMITATOR = "_jaws_query_delimitator_"
13 |
14 | def prepareCommands(script: String, numberOfResults: Int) = {
15 | val commandList = filterCommands(script)
16 |
17 | val commandsNb = commandList.size
18 | val firstCommands = commandList.take(commandsNb - 1) map (command => if (command.trim().toLowerCase().startsWith("select")) limitQuery(numberOfResults, command) else command)
19 | val lastCommand = if (commandList(commandsNb - 1).trim().toLowerCase().startsWith("select")) limitQuery(numberOfResults, commandList(commandsNb - 1)) else commandList(commandsNb - 1)
20 |
21 | firstCommands += ("set hive.cli.print.header=true", s"select '$QUERY_DELIMITATOR'", lastCommand)
22 |
23 | firstCommands addString (new StringBuilder, ";") toString
24 | }
25 |
26 | def filterCommands(script: String) = {
27 | val commandsList = ListBuffer.empty[String]
28 | script.split(";").foreach(oneCmd => {
29 | var command = oneCmd.trim()
30 | val trimmedCmd = oneCmd.trim()
31 | if (command.endsWith("\\")) {
32 | command = StringUtils.chop(command) + ";"
33 | }
34 |
35 | if (StringUtils.isBlank(command) == false) {
36 | commandsList += command
37 | }
38 |
39 | })
40 | commandsList
41 | }
42 |
43 | def limitQuery(numberOfResults: Long, cmd: String): String = {
44 | val temporaryTableName = RandomStringUtils.randomAlphabetic(10)
45 | // take only x results
46 | return s"select $temporaryTableName.* from ( $cmd ) $temporaryTableName limit $numberOfResults"
47 | }
48 | }
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/main/scala/customs/ResultsProcessor.scala:
--------------------------------------------------------------------------------
1 | package customs
2 |
3 | import java.io.ByteArrayInputStream
4 | import scala.io.Source
5 | import customs.CommandsProcessor._
6 | import com.xpatterns.jaws.data.DTO.Column
7 | import org.apache.spark.sql.catalyst.expressions.Row
8 | import com.xpatterns.jaws.data.utils.ResultsConverter
9 | import org.apache.spark.sql.types._
10 |
11 | class ResultsProcessor
12 | object ResultsProcessor {
13 | val headerMatcher = "([^.]*?\\.)?(.+)".r
14 |
15 | def getLastResults(inputStream: ByteArrayInputStream): ResultsConverter = {
16 | val reader = Source.fromInputStream(inputStream)
17 | try {
18 | val lastCmdResults = reader getLines () dropWhile (!_.equals(QUERY_DELIMITATOR)) toList
19 | val headers = toStructType(getHeader(lastCmdResults(1)))
20 | val results = getResults(lastCmdResults drop 2)
21 | new ResultsConverter(headers, results)
22 |
23 | } finally if (reader != null) reader close ()
24 | }
25 |
26 | def getHeader(headerLine: String): Array[String] = {
27 | headerLine split "\t" map (column => headerMatcher replaceAllIn (column, m => m group 2))
28 | }
29 |
30 | def toStructType (headers : Array[String]) : StructType = {
31 | val fields = headers map (column => new StructField(column, StringType, true))
32 | new StructType(fields)
33 | }
34 |
35 | def getResults(resultLines: List[String]): Array[Row] = {
36 | val resultsArray = resultLines map (line => line split "\t") toArray
37 | val result = resultsArray map (arr => Row.fromSeq(arr))
38 | result
39 | }
40 | }
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 | spray.can.server {
2 | # uncomment the next line for making this an HTTPS example
3 | # ssl-encryption = on
4 | idle-timeout = 301 s
5 | request-timeout = 300 s
6 | }
7 |
8 |
9 |
10 | ######### application configuration ###################
11 | appConf{
12 | # the interface on which to start the spray server : localhost/ip/hostname
13 | server.interface=localhost
14 | # the cors filter allowed hosts
15 | cors-filter-allowed-hosts="*"
16 | # implicit akka timeout
17 | timeout=1000000
18 | #app port
19 | web.services.port=7080
20 | #where to log: app.logging.type = cassandra/hdfs
21 | app.logging.type=cassandra
22 | # the number of threads used to execute shark commands
23 | nr.of.threads=10
24 | }
25 |
26 | ########## hadoop configuration - skip this if you are using cassandra logging ########
27 | hadoopConf {
28 | namenode="hdfs://devbox.local:8020"
29 | replicationFactor=1
30 | # set on true if you want to start fresh (all the existing folders will be recreated)
31 | forcedMode=false
32 | # folder where to write the logs
33 | loggingFolder=jawsLogs
34 | # folder where to write the jobs states
35 | stateFolder=jawsStates
36 | # folder where to write the jobs details
37 | detailsFolder=jawsDetails
38 | # folder where to write the jobs results
39 | resultsFolder=jawsResultsFolder
40 | # folder where to write the jobs meta information
41 | metaInfoFolder=jawsMetainfoFolder
42 | # folder where to write the name of query information
43 | queryNameFolder=jawsQueryNameFolder
44 | # folder where to write the published queries
45 | queryPublishedFolder=jawsQueryPublishedFolder
46 | # folder where to write the unpublished queries
47 | queryUnpublishedFolder=jawsQueryUnpublishedFolder
48 | # folder where to write the parquet tables information
49 | parquetTablesFolder=parquetTablesFolder
50 |
51 |
52 |
53 | }
54 |
55 | ########## cassandra configuration - skip this if you are using hdfs logging ##########
56 | cassandraConf {
57 | cassandra.host="devbox.local:9160"
58 | cassandra.keyspace=xpatterns_jaws
59 | cassandra.cluster.name=Jaws
60 | }
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/test/scala/CommandsProcessorTest.scala:
--------------------------------------------------------------------------------
1 | import org.junit.runner.RunWith
2 | import org.scalatest.FunSuite
3 | import org.scalatest.junit.JUnitRunner
4 | import customs.CommandsProcessor._
5 | import scala.collection.mutable.ListBuffer
6 | import scala.util.Try
7 | import org.scalatest.Matchers._
8 | @RunWith(classOf[JUnitRunner])
9 | class CommandsProcessorTest extends FunSuite {
10 |
11 | test("filterCommands : ok") {
12 | val filteredResults = filterCommands("use databaseName ;show tables; ;select * from table")
13 |
14 | assert(filteredResults.size === 3, "Different number of commands")
15 | assert(filteredResults === ListBuffer("use databaseName", "show tables", "select * from table"))
16 |
17 | }
18 |
19 | test("test the used regex") {
20 | val filteredResults = "select\\s+([\\w]+)\\.\\* from \\( select \\* from table \\) ([\\w]+) limit 2"
21 | "select adda.* from ( select * from table ) adda limit 2" should fullyMatch regex filteredResults
22 |
23 | }
24 |
25 | test("prepareCommands : ok-last command is a select") {
26 |
27 | val tryPrepareCommands = Try(prepareCommands("use databaseName ;show tables; ;select * from table", 2))
28 | val requiredCommandString = s"use databaseName;show tables;set hive.cli.print.header=true;select '$QUERY_DELIMITATOR';select\\s+([\\w]+)\\.\\* from \\( select \\* from table \\) ([\\w]+) limit 2"
29 |
30 | assert(tryPrepareCommands.isSuccess, "Prepare commands failed")
31 | val returnedCommandString = tryPrepareCommands.get
32 | returnedCommandString should fullyMatch regex requiredCommandString
33 | }
34 |
35 | test("prepareCommands : ok-last command is not a select") {
36 |
37 | val tryPrepareCommands = Try(prepareCommands("use databaseName ;show tables; ;show tables", 2))
38 | val requiredCommandString = s"use databaseName;show tables;set hive.cli.print.header=true;select '$QUERY_DELIMITATOR';show tables"
39 |
40 | assert(tryPrepareCommands.isSuccess, "Prepare commands failed")
41 | val returnedCommandString = tryPrepareCommands.get
42 | returnedCommandString should be (requiredCommandString)
43 | }
44 |
45 | test("prepareCommands : ok-one command") {
46 |
47 | val tryPrepareCommands = Try(prepareCommands("show databases", 2))
48 | val requiredCommandString = s"set hive.cli.print.header=true;select '$QUERY_DELIMITATOR';show databases"
49 |
50 | assert(tryPrepareCommands.isSuccess, "Prepare commands failed")
51 | val returnedCommandString = tryPrepareCommands.get
52 | returnedCommandString should fullyMatch regex requiredCommandString
53 | }
54 |
55 | }
--------------------------------------------------------------------------------
/jaws-hive-sql-rest/src/test/scala/ResultsProcessorTest.scala:
--------------------------------------------------------------------------------
1 | import org.junit.runner.RunWith
2 | import org.scalatest.junit.JUnitRunner
3 | import org.scalatest.FunSuite
4 | import scala.util.Try
5 | import customs.ResultsProcessor._
6 | import org.scalatest.Matchers._
7 | import java.io.ByteArrayOutputStream
8 | import java.io.OutputStreamWriter
9 | import customs.CommandsProcessor._
10 | import java.io.ByteArrayInputStream
11 | import com.xpatterns.jaws.data.DTO.Column
12 | import org.apache.spark.sql.catalyst.expressions.Row
13 | import com.xpatterns.jaws.data.utils.ResultsConverter
14 | import scala.collection.mutable.WrappedArray
15 | import org.apache.spark.sql.types._
16 |
17 | @RunWith(classOf[JUnitRunner])
18 | class ResultsProcessorTest extends FunSuite {
19 |
20 | test("getHeader : columns with .") {
21 |
22 | val headers = getHeader("mzzmjgycpp.name mzzmjgycpp.age mzzmjgycpp.sex")
23 | val requiredHeaders = Array("name", "age", "sex")
24 |
25 | headers should be(requiredHeaders)
26 | }
27 |
28 | test("getHeader : columns without .") {
29 |
30 | val headers = getHeader("name age sex")
31 | val requiredHeaders = Array("name", "age", "sex")
32 |
33 | headers should be(requiredHeaders)
34 | }
35 |
36 | test("getResults") {
37 |
38 | val results = getResults(List("name age sex", "name1 age1 sex1", "name2 age2 sex2"))
39 | val requiredResults = Array(Row.fromSeq(Array("name", "age", "sex")), Row.fromSeq(Array("name1", "age1", "sex1")),
40 | Row.fromSeq(Array("name2", "age2", "sex2")))
41 |
42 | results should be(requiredResults)
43 | }
44 |
45 | test("get Last Results") {
46 |
47 | val stdOutOS = new ByteArrayOutputStream
48 | val osWriter = new OutputStreamWriter(stdOutOS)
49 | osWriter.write("db1\n")
50 | osWriter.write("db2\n")
51 | osWriter.write("db3\n")
52 | osWriter.write(s"$QUERY_DELIMITATOR\n")
53 | osWriter.write("mzzmjgycpp.name mzzmjgycpp.age mzzmjgycpp.sex\n")
54 | osWriter.write("name age sex\n")
55 | osWriter.write("name1 age1 sex1\n")
56 | osWriter.write("name2 age2 sex2")
57 |
58 | osWriter.flush()
59 |
60 | val results = getLastResults(new ByteArrayInputStream(stdOutOS.toByteArray()))
61 | val requiredSchema = new StructType(Array(StructField("name", StringType, true), StructField("age", StringType, true), StructField("sex", StringType, true)))
62 | val requiredResults = new ResultsConverter(requiredSchema,
63 | Array(Row.fromSeq(Array("name", "age", "sex")), Row.fromSeq(Array("name1", "age1", "sex1")), Row.fromSeq(Array("name2", "age2", "sex2"))))
64 |
65 | osWriter.close()
66 | assert(results.schema === requiredResults.schema, "Not the same schema")
67 | assert(results.result === requiredResults.result, "Not the same results")
68 | }
69 |
70 | test("get Last Results - no results") {
71 |
72 | val stdOutOS = new ByteArrayOutputStream
73 | val osWriter = new OutputStreamWriter(stdOutOS)
74 | osWriter.write("db1\n")
75 | osWriter.write("db2\n")
76 | osWriter.write("db3\n")
77 | osWriter.write(s"$QUERY_DELIMITATOR\n")
78 | osWriter.write("mzzmjgycpp.name mzzmjgycpp.age mzzmjgycpp.sex\n")
79 |
80 | osWriter.flush()
81 |
82 | val results = getLastResults(new ByteArrayInputStream(stdOutOS.toByteArray()))
83 | val requiredResults = new ResultsConverter(StructType(Array(StructField("name", StringType, true), StructField("age", StringType, true), StructField("sex", StringType, true))), Array.empty)
84 |
85 | osWriter.close()
86 | assert(results.schema === requiredResults.schema, "Not the same schema")
87 | assert(results.result === requiredResults.result, "Not the same results")
88 | }
89 |
90 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2014 Atigeo, LLC.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/AvroBinaryResult.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 | import spray.json.DefaultJsonProtocol._
3 | import java.io.ByteArrayOutputStream
4 | import java.io.ObjectOutputStream
5 | import java.io.ObjectInputStream
6 | import java.io.ByteArrayInputStream
7 | import org.apache.avro.generic.GenericRecord
8 | import org.apache.avro.Schema
9 | import spray.json.RootJsonFormat
10 | import org.apache.avro.generic.GenericDatumWriter
11 | import org.apache.avro.io.EncoderFactory
12 | import org.apache.avro.generic.GenericDatumReader
13 | import org.apache.avro.io.DecoderFactory
14 |
15 | case class AvroBinaryResult(schema: Schema, result: Array[Byte]) {
16 | def this() = {
17 | this(null, Array.empty)
18 | }
19 |
20 | def this(avroResult : AvroResult) = {
21 | this(avroResult.schema, avroResult.serializeResult())
22 | }
23 |
24 | override def hashCode(): Int = {
25 | val prime = 31
26 | var result = 1
27 | Option(result) match {
28 | case None => result = prime * result + 0
29 | case _ => result = prime * result + result.hashCode()
30 | }
31 | Option(schema) match {
32 | case None => result = prime * result + 0
33 | case _ => result = prime * result + schema.hashCode()
34 | }
35 |
36 | result
37 | }
38 |
39 | override def equals(other: Any): Boolean = {
40 |
41 | other match {
42 |
43 | case that: AvroBinaryResult =>
44 | (that canEqual this) &&
45 | result.deep == that.result.deep &&
46 | schema == that.schema
47 |
48 | case _ => false
49 | }
50 | }
51 |
52 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/AvroResult.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 | import spray.json.DefaultJsonProtocol._
3 | import java.io.ByteArrayOutputStream
4 | import java.io.ObjectOutputStream
5 | import java.io.ObjectInputStream
6 | import java.io.ByteArrayInputStream
7 | import org.apache.avro.generic.GenericRecord
8 | import org.apache.avro.Schema
9 | import spray.json.RootJsonFormat
10 | import org.apache.avro.generic.GenericDatumWriter
11 | import org.apache.avro.io.EncoderFactory
12 | import org.apache.avro.generic.GenericDatumReader
13 | import org.apache.avro.io.DecoderFactory
14 | import scala.collection.mutable.ArrayBuffer
15 | import org.apache.avro.generic.IndexedRecord
16 | import org.apache.avro.file.SeekableByteArrayInput
17 | import org.apache.avro.file.FileReader
18 | import org.apache.avro.file.DataFileReader
19 | import org.apache.avro.file.SeekableInput
20 | import org.apache.avro.file.DataFileWriter
21 |
22 | case class AvroResult(schema: Schema, result: Array[GenericRecord]) {
23 |
24 | def this() = {
25 | this(null, Array.empty)
26 | }
27 |
28 | override def hashCode(): Int = {
29 | val prime = 31
30 | var result = 1
31 | Option(result) match {
32 | case None => result = prime * result + 0
33 | case _ => result = prime * result + result.hashCode()
34 | }
35 | Option(schema) match {
36 | case None => result = prime * result + 0
37 | case _ => result = prime * result + schema.hashCode()
38 | }
39 |
40 | result
41 | }
42 |
43 | override def equals(other: Any): Boolean = {
44 |
45 | other match {
46 |
47 | case that: AvroResult =>
48 | (that canEqual this) &&
49 | result.deep == that.result.deep &&
50 | schema == that.schema
51 |
52 | case _ => false
53 | }
54 | }
55 |
56 | def serializeResult(): Array[Byte] = {
57 | val datumWriter = new GenericDatumWriter[GenericRecord](schema)
58 | val baos = new ByteArrayOutputStream()
59 | val fileWriter = new DataFileWriter[GenericRecord](datumWriter)
60 | fileWriter.create(schema, baos)
61 | val binaryResults = result map (row => {
62 | fileWriter.append(row)
63 | })
64 |
65 | fileWriter.close()
66 | baos.toByteArray()
67 | }
68 |
69 | override def toString() = {
70 | var s = s"schema = ${schema.toString()} \n results = "
71 | result.foreach { r => s+= r.toString() }
72 | s
73 | }
74 | }
75 | object AvroResult {
76 |
77 | def deserializeResult(byteArray: Array[Byte], schema: Schema): Array[GenericRecord] = {
78 | val reader = new GenericDatumReader[GenericRecord](schema)
79 | val in = new SeekableByteArrayInput(byteArray)
80 |
81 | var dfr: FileReader[GenericRecord] = null
82 | val records = ArrayBuffer[GenericRecord]()
83 | try {
84 | dfr = DataFileReader.openReader(in, reader);
85 | while (dfr.hasNext()) {
86 | records += dfr.next()
87 | }
88 |
89 | } finally {
90 | if (dfr != null) {
91 | dfr.close();
92 | }
93 | }
94 |
95 | records.toArray
96 | }
97 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Column.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 | import spray.json.DefaultJsonProtocol._
3 | import spray.json.JsonFormat
4 |
5 | /**
6 | * Created by emaorhian
7 | */
8 | case class Column(name: String, dataType: String, comment: String, members: Array[Column]) {
9 |
10 | def this() = {
11 | this("", "", "", Array.empty)
12 | }
13 |
14 | override def hashCode(): Int = {
15 | val prime = 31
16 | var result = 1
17 | Option(name) match {
18 | case None => result = prime * result + 0
19 | case _ => result = prime * result + name.hashCode()
20 | }
21 | Option(dataType) match {
22 | case None => result = prime * result + 0
23 | case _ => result = prime * result + dataType.hashCode()
24 | }
25 | Option(comment) match {
26 | case None => result = prime * result + 0
27 | case _ => result = prime * result + comment.hashCode()
28 | }
29 | Option(members) match {
30 | case None => result = prime * result + 0
31 | case _ => result = prime * result + members.hashCode()
32 | }
33 |
34 | result
35 | }
36 |
37 | override def equals(other: Any): Boolean = {
38 |
39 | other match {
40 |
41 | case that: Column =>
42 | (that canEqual this) &&
43 | name == that.name &&
44 | dataType == that.dataType &&
45 | comment == that.comment &&
46 | members.deep == that.members.deep
47 |
48 | case _ => false
49 | }
50 | }
51 | }
52 |
53 | object Column {
54 | implicit val columnJsonFormat: JsonFormat[Column] = lazyFormat(jsonFormat(Column.apply, "name", "dataType","comment", "members"))
55 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/CustomResult.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import spray.json.DefaultJsonProtocol._
4 | import spray.json.RootJsonFormat
5 |
6 | case class CustomResult(schema: Array[Column], result: Array[Array[Any]]) {
7 |
8 | def this() = {
9 | this(Array.empty, Array.empty)
10 | }
11 |
12 | override def hashCode(): Int = {
13 | val prime = 31
14 | var result = 1
15 | Option(result) match {
16 | case None => result = prime * result + 0
17 | case _ => result = prime * result + result.hashCode()
18 | }
19 | Option(schema) match {
20 | case None => result = prime * result + 0
21 | case _ => result = prime * result + schema.hashCode()
22 | }
23 |
24 | result
25 | }
26 |
27 | override def equals(other: Any): Boolean = {
28 |
29 | other match {
30 |
31 | case that: CustomResult =>
32 | (that canEqual this) &&
33 | result.deep == that.result.deep &&
34 | schema.deep == that.schema.deep
35 |
36 | case _ => false
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Databases.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import spray.json.DefaultJsonProtocol._
4 |
5 | /**
6 | * Created by emaorhian
7 | */
8 | case class Databases(databases: Array[String])
9 |
10 | object Databases {
11 | implicit val databasesJson = jsonFormat1(apply)
12 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Log.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import spray.json.DefaultJsonProtocol._
4 |
5 |
6 | /**
7 | * Created by emaorhian
8 | */
9 | case class Log(log: String, queryID: String, timestamp: Long)
10 |
11 | object Log {
12 | implicit val logJson = jsonFormat3(apply)
13 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Logs.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 | import spray.json.DefaultJsonProtocol._
3 |
4 | /**
5 | * Created by emaorhian
6 | */
7 | case class Logs (logs : Array[Log], status: String)
8 |
9 | object Logs {
10 | implicit val logsJson = jsonFormat2(apply)
11 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/ParquetTable.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 | import spray.json.DefaultJsonProtocol._
3 |
4 | case class ParquetTable(name: String, filePath: String, namenode : String){
5 | def this() = {
6 | this("","","")
7 | }
8 | }
9 | object ParquetTable {
10 | implicit val logJson = jsonFormat3(apply)
11 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Queries.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import spray.json.DefaultJsonProtocol.arrayFormat
4 | import spray.json.DefaultJsonProtocol.jsonFormat1
5 |
6 | /**
7 | * Created by emaorhian
8 | */
9 | case class Queries (queries : Array[Query])
10 |
11 | object Queries {
12 | implicit val queriesJson = jsonFormat1(apply)
13 |
14 |
15 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Query.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import spray.json.DefaultJsonProtocol._
4 |
5 | /**
6 | * Created by emaorhian
7 | */
8 | case class Query(state: String, queryID: String, query: String, metaInfo : QueryMetaInfo)
9 |
10 | object Query {
11 | implicit val logJson = jsonFormat4(apply)
12 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/QueryMetaInfo.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import org.apache.log4j.Logger
4 | import spray.json.DefaultJsonProtocol._
5 | import spray.json._
6 | import scala.collection.mutable
7 |
8 |
9 | /**
10 | * Created by emaorhian
11 | */
12 | case class QueryMetaInfo(var name:Option[String], var description:Option[String], var published:Option[Boolean], var timestamp:Long, var executionTime:Long,
13 | var nrOfResults:Long, var maxNrOfResults:Long, var resultsDestination:Int,
14 | var isLimited:Boolean){
15 | // resultsDestination : 0-cassandra, 1-hdfs, 2-tachyon
16 | def this() = {
17 | this(None, None, None, 0, 0, 0, 0, 0, false)
18 | }
19 |
20 | def this(nrOfResults : Long, maxNrOfResults : Long, resultsDestination : Int, isLimited : Boolean) = {
21 | this(None, None, None, 0, 0, nrOfResults, maxNrOfResults, resultsDestination, isLimited)
22 | }
23 |
24 | }
25 |
26 | object QueryMetaInfo {
27 | val logger = Logger.getLogger("QueryMetaInfo")
28 |
29 | // A custom json format is defined because some fields might be missing.
30 | implicit val logJson = new RootJsonFormat[QueryMetaInfo] {
31 | def write(metaInfo: QueryMetaInfo):JsValue = {
32 | val fields:mutable.Map[String, JsValue] = mutable.Map.empty[String, JsValue]
33 |
34 | // Don't serialize the null values of name and description because this value means that they are deleted.
35 | val queryHasName = if (metaInfo.name != None && metaInfo.name.get != null) {
36 | fields("name") = JsString(metaInfo.name.get)
37 | true
38 | } else {
39 | false
40 | }
41 |
42 | // Write the description or published only when the query has a name
43 | // to make sure that these properties are not visible
44 | if (metaInfo.description != None && metaInfo.description.get != null && queryHasName) {
45 | fields("description") = JsString(metaInfo.description.get)
46 | }
47 |
48 | if (metaInfo.published != None && metaInfo.name != None && metaInfo.name.get != null && queryHasName) {
49 | fields("published") = JsBoolean(metaInfo.published.get)
50 | }
51 |
52 | fields("timestamp") = JsNumber(metaInfo.timestamp)
53 | fields("executionTime") = JsNumber(metaInfo.executionTime)
54 | fields("nrOfResults") = JsNumber(metaInfo.nrOfResults)
55 | fields("maxNrOfResults") = JsNumber(metaInfo.maxNrOfResults)
56 | fields("resultsDestination") = JsNumber(metaInfo.resultsDestination)
57 | fields("isLimited") = JsBoolean(metaInfo.isLimited)
58 |
59 | JsObject(fields.toMap)
60 | }
61 |
62 | def read(value: JsValue):QueryMetaInfo = value match {
63 | case JsObject(fields) =>
64 | val name = if (fields.contains("name")) {
65 | Some(fields.getOrElse("name", JsNull).convertTo[Option[String]].orNull)
66 | } else {
67 | None
68 | }
69 |
70 | val description = if (fields.contains("description")) {
71 | Some(fields.getOrElse("description", JsNull).convertTo[Option[String]].orNull)
72 | } else {
73 | None
74 | }
75 |
76 | val published = if (fields.contains("published")) {
77 | fields.getOrElse("published", JsNull).convertTo[Option[Boolean]]
78 | } else {
79 | None
80 | }
81 |
82 | val timestamp = fields.getOrElse("timestamp", JsNumber(0)).convertTo[Long]
83 | val executionTime = fields.getOrElse("executionTime", JsNumber(0)).convertTo[Long]
84 | val nrOfResults = fields.getOrElse("nrOfResults", JsNumber(0)).convertTo[Long]
85 | val maxNrOfResults = fields.getOrElse("maxNrOfResults", JsNumber(0)).convertTo[Long]
86 | val resultsDestination = fields.getOrElse("resultsDestination", JsNumber(0)).convertTo[Int]
87 | val isLimited = fields.getOrElse("isLimited", JsFalse).convertTo[Boolean]
88 |
89 | new QueryMetaInfo(name, description, published, timestamp, executionTime, nrOfResults, maxNrOfResults,
90 | resultsDestination, isLimited)
91 |
92 | case _ => deserializationError("Error while trying to parse a QueryMetaInfo")
93 | }
94 | }
95 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Table.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import spray.json.DefaultJsonProtocol._
4 |
5 |
6 | /**
7 | * Created by emaorhian
8 | */
9 | case class Table(name: String, columns: Array[Column], extraInfo : Array[Array[String]])
10 |
11 | object Table {
12 | implicit val logJson = jsonFormat3(apply)
13 |
14 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Tables.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.DTO
2 |
3 | import scala.collection.JavaConverters._
4 | import spray.json.DefaultJsonProtocol._
5 |
6 | /**
7 | * Created by emaorhian
8 | */
9 | case class Tables(database: String, tables: Array[Table])
10 |
11 | object Tables {
12 | implicit val tablesJson = jsonFormat2(apply)
13 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/DAL.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.contracts
2 |
3 | /**
4 | * Created by emaorhian
5 | */
6 | trait DAL {
7 |
8 | def loggingDal : TJawsLogging
9 | def resultsDal : TJawsResults
10 | def parquetTableDal : TJawsParquetTables
11 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/TJawsLogging.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.contracts
2 |
3 | import com.xpatterns.jaws.data.DTO.QueryMetaInfo
4 | import com.xpatterns.jaws.data.DTO.Logs
5 | import com.xpatterns.jaws.data.DTO.Queries
6 | import com.xpatterns.jaws.data.utils.QueryState
7 | import com.xpatterns.jaws.data.utils.Utils
8 | import com.xpatterns.jaws.data.DTO.Query
9 |
10 | /**
11 | * Created by emaorhian
12 | */
13 | trait TJawsLogging {
14 | def setState(queryId: String, queryState: QueryState.QueryState)
15 | def setScriptDetails(queryId: String, scriptDetails: String)
16 | def addLog(queryId: String, jobId: String, time: Long, log: String)
17 |
18 | def setExecutionTime(queryId: String, executionTime: Long): Unit = {
19 | Utils.TryWithRetry {
20 | val metaInfo = getMetaInfo(queryId)
21 | metaInfo.executionTime = executionTime
22 | setMetaInfo(queryId, metaInfo)
23 | }
24 | }
25 |
26 | def setTimestamp(queryId: String, time: Long): Unit = {
27 | Utils.TryWithRetry {
28 | val metaInfo = getMetaInfo(queryId)
29 | metaInfo.timestamp = time
30 | setMetaInfo(queryId, metaInfo)
31 | }
32 | }
33 |
34 | def setRunMetaInfo(queryId: String, metainfo: QueryMetaInfo) = {
35 | Utils.TryWithRetry {
36 | val newMetaInfo = getMetaInfo(queryId)
37 | newMetaInfo.nrOfResults = metainfo.nrOfResults
38 | newMetaInfo.maxNrOfResults = metainfo.maxNrOfResults
39 | newMetaInfo.resultsDestination = metainfo.resultsDestination
40 | newMetaInfo.isLimited = metainfo.isLimited
41 | setMetaInfo(queryId, newMetaInfo)
42 | }
43 | }
44 |
45 | def setQueryProperties(queryId: String, name: Option[String], description: Option[String], published:Option[Boolean],
46 | overwrite: Boolean) = {
47 | Utils.TryWithRetry {
48 | val metaInfo = getMetaInfo(queryId)
49 |
50 | if (name != None) {
51 | updateQueryName(queryId, metaInfo, name.get, overwrite)
52 | }
53 |
54 | if (description != None) {
55 | metaInfo.description = description
56 | }
57 |
58 | // When the name of a query is not present, the description and published flags should be removed,
59 | // because they appear only when a query has a name
60 | if (metaInfo.name == None || metaInfo.name.get == null) {
61 | metaInfo.description = None
62 | metaInfo.published = None
63 | } else if (published != None) {
64 | setQueryPublishedStatus(metaInfo.name.get, metaInfo, published.get)
65 | metaInfo.published = published
66 | }
67 |
68 | setMetaInfo(queryId, metaInfo)
69 | }
70 | }
71 |
72 | private def updateQueryName(queryId: String, metaInfo: QueryMetaInfo, name: String, overwrite:Boolean):Unit = {
73 | val newQueryName = if (name != null) name.trim() else null
74 |
75 | if (newQueryName != null && newQueryName.isEmpty) {
76 | return
77 | }
78 |
79 | if (!overwrite) {
80 | if (newQueryName != null && getQueriesByName(newQueryName).queries.nonEmpty) {
81 | // When the query name already exist and the overwrite flag is not set,
82 | // then the client should be warned about it
83 | throw new Exception(s"There is already a query with the name $name. To overwrite " +
84 | s"the query name, please send the parameter overwrite set on true")
85 | }
86 | } else if (newQueryName != null) {
87 | // When overwriting the old values, the old queries should have the name and description reset
88 | val notFoundState = QueryState.NOT_FOUND.toString
89 | for (query <- getQueriesByName(newQueryName).queries) {
90 | if (query.state != notFoundState) {
91 | query.metaInfo.name = None
92 | query.metaInfo.description = None
93 | setMetaInfo(query.queryID, query.metaInfo)
94 | }
95 | }
96 | }
97 |
98 | if (metaInfo.name != None && metaInfo.name.get != null) {
99 | // Delete the old query name
100 | deleteQueryName(metaInfo.name.get)
101 | // Remove the old published status of the query from storage
102 | deleteQueryPublishedStatus(metaInfo.name.get, metaInfo.published)
103 | }
104 | metaInfo.name = Some(newQueryName)
105 |
106 | if (newQueryName != null) {
107 | // Save the query name to be able to search it
108 | saveQueryName(newQueryName, queryId)
109 |
110 | // Set the default published value
111 | val published = metaInfo.published.getOrElse(false)
112 | setQueryPublishedStatus(newQueryName, metaInfo, published)
113 | metaInfo.published = Some(published)
114 | }
115 | }
116 |
117 | def setQueryPublishedStatus(name: String, metaInfo: QueryMetaInfo, published: Boolean)
118 | def deleteQueryPublishedStatus(name: String, published: Option[Boolean])
119 |
120 | def setMetaInfo(queryId: String, metainfo: QueryMetaInfo)
121 |
122 | def getState(queryId: String): QueryState.QueryState
123 | def getScriptDetails(queryId: String): String
124 | def getLogs(queryId: String, time: Long, limit: Int): Logs
125 | def getMetaInfo(queryId: String): QueryMetaInfo
126 |
127 | def getQueries(queryId: String, limit: Int): Queries
128 | def getQueries(queryIds: Seq[String]): Queries = {
129 | Utils.TryWithRetry {
130 | val queryArray = queryIds map (queryID => new Query(getState(queryID).toString, queryID, getScriptDetails(queryID), getMetaInfo(queryID))) toArray
131 | val queries = new Queries(queryArray)
132 | queries
133 | }
134 | }
135 |
136 | def getPublishedQueries():Array[String]
137 | def getQueriesByName(name:String):Queries
138 | def deleteQueryName(name: String)
139 | def saveQueryName(name: String, queryId: String)
140 |
141 | def deleteQuery(queryId: String)
142 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/TJawsParquetTables.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.contracts
2 |
3 | import com.xpatterns.jaws.data.DTO.ParquetTable
4 |
5 | trait TJawsParquetTables {
6 | def addParquetTable(pTable : ParquetTable)
7 | def deleteParquetTable(name : String)
8 | def listParquetTables() : Array[ParquetTable]
9 | def tableExists(name : String) : Boolean
10 | def readParquetTable(name : String) : ParquetTable
11 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/TJawsResults.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.contracts
2 |
3 | import com.xpatterns.jaws.data.utils.Utils
4 | import com.xpatterns.jaws.data.utils.ResultsConverter
5 | import spray.json.DefaultJsonProtocol._
6 | import com.xpatterns.jaws.data.DTO.AvroResult
7 | import com.xpatterns.jaws.data.DTO.CustomResult
8 |
9 | /**
10 | * Created by emaorhian
11 | */
12 | trait TJawsResults {
13 | def setAvroResults (uuid: String, avroResults : AvroResult)
14 | def getAvroResults(uuid: String) : AvroResult
15 | def setCustomResults(uuid: String, results: CustomResult)
16 | def getCustomResults(uuid: String): CustomResult
17 |
18 | def setResults(uuid: String, results: ResultsConverter) {
19 | Utils.TryWithRetry {
20 |
21 | setAvroResults(uuid, results.toAvroResults())
22 | setCustomResults(uuid, results.toCustomResults())
23 | }
24 | }
25 | def deleteResults(uuid: String)
26 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/CassandraDal.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import me.prettyprint.cassandra.model.AllOneConsistencyLevelPolicy
4 | import me.prettyprint.cassandra.service.CassandraHostConfigurator
5 | import me.prettyprint.cassandra.service.ThriftCluster
6 | import me.prettyprint.hector.api.factory.HFactory
7 | import com.xpatterns.jaws.data.contracts.DAL
8 | import com.xpatterns.jaws.data.contracts.TJawsLogging
9 | import com.xpatterns.jaws.data.contracts.TJawsResults
10 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables
11 |
12 | /**
13 | * Created by emaorhian
14 | */
15 | class CassandraDal (cassandraHost : String, clusterName : String, keyspaceName : String) extends DAL {
16 | val cassandraHostConfigurator = new CassandraHostConfigurator(cassandraHost)
17 | val cluster = new ThriftCluster(clusterName, cassandraHostConfigurator)
18 | val keyspace = HFactory.createKeyspace(keyspaceName, cluster, new AllOneConsistencyLevelPolicy)
19 |
20 | val loggingDal: TJawsLogging = new JawsCassandraLogging(keyspace)
21 | val resultsDal: TJawsResults = new JawsCassandraResults(keyspace)
22 | val parquetTableDal: TJawsParquetTables = new JawsCassandraParquetTables(keyspace)
23 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/HdfsDal.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import com.xpatterns.jaws.data.contracts.DAL
4 | import com.xpatterns.jaws.data.contracts.TJawsLogging
5 | import com.xpatterns.jaws.data.contracts.TJawsResults
6 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables
7 |
8 | /**
9 | * Created by emaorhian
10 | */
11 | class HdfsDal(configuration: org.apache.hadoop.conf.Configuration) extends DAL {
12 | val loggingDal: TJawsLogging = new JawsHdfsLogging(configuration)
13 | val resultsDal: TJawsResults = new JawsHdfsResults(configuration)
14 | val parquetTableDal: TJawsParquetTables = new JawsHdfsParquetTables(configuration)
15 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/JawsCassandraParquetTables.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables
4 | import com.xpatterns.jaws.data.DTO.ParquetTable
5 | import com.xpatterns.jaws.data.utils.Utils
6 | import org.apache.log4j.Logger
7 | import me.prettyprint.hector.api.beans.Composite
8 | import me.prettyprint.hector.api.factory.HFactory
9 | import me.prettyprint.hector.api.Keyspace
10 | import me.prettyprint.cassandra.serializers.IntegerSerializer
11 | import me.prettyprint.hector.api.Serializer
12 | import me.prettyprint.cassandra.serializers.StringSerializer
13 | import me.prettyprint.cassandra.serializers.CompositeSerializer
14 | import me.prettyprint.cassandra.serializers.LongSerializer
15 | import me.prettyprint.hector.api.query.ColumnQuery
16 | import me.prettyprint.cassandra.model.thrift.ThriftColumnQuery
17 | import spray.json._
18 | import spray.json.DefaultJsonProtocol._
19 |
20 | class JawsCassandraParquetTables(keyspace: Keyspace) extends TJawsParquetTables {
21 |
22 | val CF_PARQUET_TABLES = "parquet_tables"
23 | val ROW_ID = "tables"
24 |
25 | val is = IntegerSerializer.get.asInstanceOf[Serializer[Int]]
26 | val ss = StringSerializer.get.asInstanceOf[Serializer[String]]
27 | val cs = CompositeSerializer.get.asInstanceOf[Serializer[Composite]]
28 | val ls = LongSerializer.get.asInstanceOf[Serializer[Long]]
29 |
30 | val logger = Logger.getLogger("JawsCassandraParquetTables")
31 |
32 | override def addParquetTable(pTable: ParquetTable) {
33 | Utils.TryWithRetry {
34 | logger.debug(s"Adding the parquet table ${pTable.name} for the filepath ${pTable.filePath}")
35 | val mutator = HFactory.createMutator(keyspace, ss)
36 |
37 | val valueTouple = (pTable.namenode, pTable.filePath).toJson.prettyPrint
38 | mutator.addInsertion(ROW_ID, CF_PARQUET_TABLES, HFactory.createColumn(pTable.name, valueTouple, ss, ss))
39 | mutator.execute()
40 | }
41 | }
42 |
43 | override def deleteParquetTable(name: String) {
44 | Utils.TryWithRetry {
45 | logger.debug(s"Deleting parquet table $name")
46 |
47 | val mutator = HFactory.createMutator(keyspace, ss)
48 |
49 | mutator.addDeletion(ROW_ID, CF_PARQUET_TABLES, name, ss)
50 | mutator.execute
51 | }
52 | }
53 | override def listParquetTables(): Array[ParquetTable] = {
54 | Utils.TryWithRetry {
55 | var result = Array[ParquetTable]()
56 | logger.debug("listing all parquet tables")
57 | val sliceQuery = HFactory.createSliceQuery(keyspace, ss, ss, ss).setColumnFamily(CF_PARQUET_TABLES).setKey(ROW_ID).setRange(null, null, false, Int.MaxValue)
58 | val queryResult = sliceQuery.execute
59 | Option(queryResult) match {
60 | case None => result
61 | case _ => {
62 | val columnSlice = queryResult.get
63 | Option(columnSlice) match {
64 | case None => result
65 | case _ => {
66 | val columns = columnSlice.getColumns
67 | Option(columns) match {
68 | case None => result
69 | case _ => {
70 | columns.size match {
71 | case 0 => result
72 | case size: Int => {
73 | for (index <- 0 until size) {
74 | val column = columns.get(index)
75 | val (namenode, filepath) = column.getValue.parseJson.fromJson[Tuple2[String, String]]
76 | result = result :+ new ParquetTable(column.getName, filepath, namenode)
77 | }
78 | result
79 | }
80 | }
81 | }
82 | }
83 | }
84 | }
85 | }
86 | }
87 | }
88 | }
89 |
90 | override def tableExists(name: String): Boolean = {
91 | Utils.TryWithRetry {
92 | logger.debug(s"Reading the parquet table ${name}")
93 | val columnQuery = HFactory.createColumnQuery(keyspace, ss, ss, ss)
94 | columnQuery.setColumnFamily(CF_PARQUET_TABLES).setKey(ROW_ID).setName(name)
95 |
96 | val queryResult = columnQuery.execute
97 | Option(queryResult) match {
98 | case None => false
99 | case _ => {
100 | val column = queryResult.get
101 | Option(column) match {
102 | case None => false
103 | case _ => true
104 | }
105 | }
106 | }
107 | }
108 | }
109 |
110 | override def readParquetTable(name: String): ParquetTable = {
111 | Utils.TryWithRetry {
112 | logger.debug(s"Reading the parquet table ${name}")
113 | val columnQuery = HFactory.createColumnQuery(keyspace, ss, ss, ss)
114 | columnQuery.setColumnFamily(CF_PARQUET_TABLES).setKey(ROW_ID).setName(name)
115 |
116 | val queryResult = columnQuery.execute
117 | Option(queryResult) match {
118 | case None => new ParquetTable
119 | case _ => {
120 | val column = queryResult.get
121 | Option(column) match {
122 | case None => new ParquetTable
123 | case _ =>
124 | {
125 | val (namenode, filepath) = column.getValue.parseJson.fromJson[Tuple2[String, String]]
126 | new ParquetTable(column.getName, filepath, namenode)
127 | }
128 |
129 | }
130 | }
131 | }
132 | }
133 | }
134 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/JawsHdfsParquetTables.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables
4 | import spray.json._
5 | import spray.json.DefaultJsonProtocol._
6 | import com.xpatterns.jaws.data.DTO.ParquetTable
7 | import org.apache.hadoop.conf.Configuration
8 | import com.xpatterns.jaws.data.utils.Utils
9 | import org.apache.log4j.Logger
10 | import java.util.Comparator
11 |
12 | class JawsHdfsParquetTables(configuration: Configuration) extends TJawsParquetTables {
13 |
14 | val logger = Logger.getLogger("JawsHdfsParquetTables")
15 |
16 | val forcedMode = configuration.getBoolean(Utils.FORCED_MODE, false)
17 | Utils.createFolderIfDoesntExist(configuration, configuration.get(Utils.PARQUET_TABLES_FOLDER), forcedMode)
18 |
19 | override def addParquetTable(pTable: ParquetTable) {
20 | logger.debug(s"Writing parquet table ${pTable.name} with path ${pTable.filePath} ")
21 | val valueTouple = (pTable.namenode, pTable.filePath).toJson.prettyPrint
22 | Utils.rewriteFile(valueTouple, configuration, getParquetTableFilePath(pTable.name))
23 | }
24 |
25 | override def deleteParquetTable(name: String) {
26 | logger.debug(s"Deleting parquet table called $name")
27 | var filePath = getParquetTableFilePath(name)
28 | Utils.deleteFile(configuration, filePath)
29 | }
30 |
31 | override def listParquetTables(): Array[ParquetTable] = {
32 |
33 | logger.debug("Listing parquet tables: ")
34 | var tables = Array[ParquetTable]()
35 |
36 | var files = Utils.listFiles(configuration, Utils.PARQUET_TABLES_FOLDER, new Comparator[String]() {
37 |
38 | override def compare(o1: String, o2: String): Int = {
39 | return o1.compareTo(o2)
40 | }
41 |
42 | })
43 |
44 | val iterator = files.iterator()
45 |
46 | while (iterator.hasNext()) {
47 | val tableName = iterator.next()
48 |
49 | val (namenode, filepath) = Utils.readFile(configuration, Utils.PARQUET_TABLES_FOLDER + "/" + tableName).parseJson.fromJson[Tuple2[String, String]]
50 | tables = tables :+ new ParquetTable(tableName, filepath, namenode)
51 | }
52 |
53 | tables
54 | }
55 |
56 | override def tableExists(name: String): Boolean = {
57 | logger.debug(s"Checking table existence for $name")
58 | val filename = getParquetTableFilePath(name)
59 |
60 | Utils.checkFileExistence(filename, configuration)
61 | }
62 |
63 | override def readParquetTable(name: String): ParquetTable = {
64 | logger.debug(s"Reading table $name")
65 | val filename = getParquetTableFilePath(name)
66 |
67 | if (Utils.checkFileExistence(filename, configuration)){
68 | val (namenode, filepath) = Utils.readFile(configuration, filename).parseJson.fromJson[Tuple2[String, String]]
69 | new ParquetTable(name, filepath, namenode)
70 | }
71 |
72 | else new ParquetTable
73 |
74 | }
75 |
76 | def getParquetTableFilePath(name: String): String = {
77 | configuration.get(Utils.PARQUET_TABLES_FOLDER) + "/" + name
78 | }
79 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/JawsHdfsResults.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import com.xpatterns.jaws.data.contracts.TJawsResults
4 | import org.apache.log4j.Logger
5 | import org.apache.hadoop.conf.Configuration
6 | import net.liftweb.json._
7 | import spray.json._
8 | import org.apache.hadoop.fs.Path
9 | import org.apache.hadoop.io.IOUtils
10 | import org.apache.hadoop.fs.FileSystem
11 | import java.io.InputStream
12 | import org.apache.commons.io.output.ByteArrayOutputStream
13 | import com.xpatterns.jaws.data.utils.Utils
14 | import com.xpatterns.jaws.data.utils.ResultsConverter
15 | import com.xpatterns.jaws.data.DTO.AvroResult
16 | import com.xpatterns.jaws.data.DTO.CustomResult
17 | import org.apache.avro.Schema
18 | import com.google.gson.GsonBuilder
19 |
20 | class JawsHdfsResults(configuration: Configuration) extends TJawsResults {
21 |
22 | val logger = Logger.getLogger("JawsHdfsResults")
23 | val forcedMode = configuration.getBoolean(Utils.FORCED_MODE, false)
24 | Utils.createFolderIfDoesntExist(configuration, configuration.get(Utils.RESULTS_FOLDER), forcedMode)
25 | Utils.createFolderIfDoesntExist(configuration, s"${configuration.get(Utils.RESULTS_FOLDER)}/avro", forcedMode)
26 | Utils.createFolderIfDoesntExist(configuration, s"${configuration.get(Utils.RESULTS_FOLDER)}/custom", forcedMode)
27 |
28 | implicit val formats = DefaultFormats
29 | def setAvroResults(uuid: String, avroResults: AvroResult) {
30 | logger.debug("Writing avro results to query " + uuid)
31 |
32 | val (schemaFile, resultsFile) = getAvroResultsFilePaths(uuid)
33 | Utils.rewriteFile(avroResults.schema.toString(), configuration, schemaFile)
34 | val bytesR = avroResults.serializeResult()
35 | Utils.rewriteFile(bytesR, configuration, resultsFile)
36 | }
37 |
38 | def getAvroResults(uuid: String): AvroResult = {
39 | logger.debug("Reading results for query: " + uuid)
40 | val (schemaFile, resultsFile) = getAvroResultsFilePaths(uuid)
41 | if (Utils.checkFileExistence(schemaFile, configuration) && Utils.checkFileExistence(resultsFile, configuration)) {
42 | val schemaParser = new Schema.Parser()
43 | val schema = schemaParser.parse(Utils.readFile(configuration, schemaFile))
44 | val results = Utils.readBytes(configuration, resultsFile)
45 | new AvroResult(schema, AvroResult.deserializeResult(results, schema))
46 | } else new AvroResult()
47 | }
48 |
49 | def setCustomResults(uuid: String, results: CustomResult) {
50 | logger.debug("Writing custom results to query " + uuid)
51 | val customFile = getCustomResultsFilePaths(uuid)
52 | val gson = new GsonBuilder().create()
53 | Utils.rewriteFile(gson.toJson(results), configuration, customFile)
54 | }
55 | def getCustomResults(uuid: String): CustomResult = {
56 | logger.debug("Reading custom results for query: " + uuid)
57 | val customFile = getCustomResultsFilePaths(uuid)
58 | if (Utils.checkFileExistence(customFile, configuration)) {
59 | val gson = new GsonBuilder().create()
60 | gson.fromJson(Utils.readFile(configuration, customFile), classOf[CustomResult])
61 | } else new CustomResult()
62 | }
63 |
64 | def deleteResults(uuid: String) {
65 | logger.debug(s"Deleting results for query $uuid")
66 | val (schemaFile, resultsFile) = getAvroResultsFilePaths(uuid)
67 | val customFile = getCustomResultsFilePaths(uuid)
68 | Utils.deleteFile(configuration, schemaFile)
69 | Utils.deleteFile(configuration, resultsFile)
70 | Utils.deleteFile(configuration, customFile)
71 |
72 | }
73 |
74 | def getResultsFilePath(queryId: String): String = {
75 | s"${configuration.get(Utils.RESULTS_FOLDER)}/$queryId"
76 | }
77 |
78 | def getAvroResultsFilePaths(queryId: String): Tuple2[String, String] = {
79 | val route = s"${configuration.get(Utils.RESULTS_FOLDER)}/avro/${queryId}_"
80 | (s"${route}schema", s"${route}results")
81 | }
82 |
83 | def getCustomResultsFilePaths(queryId: String) = {
84 | s"${configuration.get(Utils.RESULTS_FOLDER)}/custom/${queryId}"
85 | }
86 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/CustomConverter.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.utils
2 |
3 | import org.apache.spark.sql.catalyst.expressions.Row
4 | import spray.json.DefaultJsonProtocol._
5 | import com.xpatterns.jaws.data.DTO.CustomResult
6 | import com.xpatterns.jaws.data.DTO.Column
7 | import com.google.gson.GsonBuilder
8 | import java.sql.Timestamp
9 | import collection.JavaConversions._
10 | import org.apache.spark.sql.types._
11 | import org.apache.spark.sql.catalyst.expressions.GenericRow
12 |
13 | object CustomConverter {
14 |
15 | def getCustomSchema(schema: StructType): Array[Column] = {
16 | schema.fields map (field => getCustomSchema(field.dataType, field.name)) toArray
17 | }
18 |
19 | private def getCustomSchema(fieldType: DataType, fieldName: String): Column = {
20 | fieldType match {
21 | case ArrayType(elementType, _) => new Column(fieldName, "ArrayType", "", Array(getCustomSchema(elementType, "items")))
22 | case MapType(StringType, valueType, _) => new Column(fieldName, "MapType", "", Array(getCustomSchema(valueType, "values")))
23 | case structType: StructType => new Column(fieldName, "StructType", "", structType.fields map (field => getCustomSchema(field.dataType, field.name)) toArray)
24 | case _ => new Column(fieldName, fieldType.toString(), "", Array.empty)
25 | }
26 | }
27 |
28 | def getCustomResult(result: Array[Row], schema: StructType) = {
29 | val converter = createConverter(schema)
30 | result map (row => converter(row).asInstanceOf[Array[Any]])
31 | }
32 |
33 | private def createConverter(
34 | dataType: DataType): (Any) => Any = {
35 | dataType match {
36 | case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | StringType |
37 | BinaryType | BooleanType =>
38 | (item: Any) => item
39 |
40 | case DecimalType() =>
41 | (item: Any) => if (item == null) null else item.toString
42 |
43 | case TimestampType =>
44 | (item: Any) => {
45 | if (item == null) null else item.asInstanceOf[Timestamp].getTime
46 | }
47 |
48 | case ArrayType(elementType, _) =>
49 | val elementConverter = createConverter(elementType)
50 | (item: Any) => {
51 | if (item == null) {
52 | null
53 | } else {
54 |
55 |
56 |
57 | val sourceArray = if (item.isInstanceOf[Seq[Any]]) item.asInstanceOf[Seq[Any]] else item.asInstanceOf[GenericRow].toSeq
58 | val destination = sourceArray map { element => elementConverter(element) }
59 | destination.toArray
60 | }
61 | }
62 |
63 | case MapType(StringType, valueType, _) =>
64 | val valueConverter = createConverter(valueType)
65 |
66 | (item: Any) => {
67 | if (item == null) {
68 | null
69 | } else {
70 | val smap = item.asInstanceOf[Map[String, Any]] map {
71 | case (key, value) =>
72 | (key -> valueConverter(value))
73 | }
74 | mapAsJavaMap(smap)
75 | }
76 | }
77 |
78 | case structType: StructType =>
79 | val fieldConverters = structType.fields.map(field =>
80 | createConverter(field.dataType))
81 |
82 | (item: Any) => {
83 | if (item == null) {
84 | null
85 | } else {
86 |
87 | val row = item.asInstanceOf[Row].toSeq
88 | val valueConverter = row zip fieldConverters
89 | valueConverter map (value => value match {
90 | case (field, converter) => converter(field)
91 | }) toArray
92 | }
93 | }
94 | }
95 | }
96 |
97 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/GsonHelper.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.utils
2 |
3 | import java.lang.reflect.Type
4 | import com.google.gson.Gson
5 | import com.google.gson.GsonBuilder
6 | import com.google.gson.JsonDeserializationContext
7 | import com.google.gson.JsonDeserializer
8 | import com.google.gson.JsonElement
9 | import com.google.gson.JsonParseException
10 | import com.google.gson.JsonPrimitive
11 | import com.google.gson.JsonSerializationContext
12 | import com.google.gson.JsonSerializer
13 | import javax.xml.bind.DatatypeConverter
14 | import org.apache.avro.util.Utf8
15 |
16 | object GsonHelper {
17 |
18 | val customGson = new GsonBuilder().registerTypeHierarchyAdapter(classOf[Array[Byte]],
19 | new ByteArrayToBase64TypeAdapter())
20 | .registerTypeHierarchyAdapter(classOf[Utf8],
21 | new Utf8toStrAdapter()).create();
22 |
23 | class ByteArrayToBase64TypeAdapter extends JsonSerializer[Array[Byte]] with JsonDeserializer[Array[Byte]] {
24 | def deserialize(json: JsonElement, typeOfT: Type, context: JsonDeserializationContext) = {
25 | DatatypeConverter.parseBase64Binary(json.getAsString())
26 | }
27 |
28 | def serialize(src: Array[Byte], typeOfSrc: Type, context: JsonSerializationContext): JsonElement = {
29 | new JsonPrimitive(DatatypeConverter.printBase64Binary(src));
30 | }
31 | }
32 |
33 | class Utf8toStrAdapter extends JsonSerializer[Utf8] with JsonDeserializer[Utf8] {
34 | def deserialize(json: JsonElement, typeOfT: Type, context: JsonDeserializationContext) = {
35 | new Utf8(json.getAsString)
36 | }
37 |
38 | def serialize(src: Utf8, typeOfSrc: Type, context: JsonSerializationContext): JsonElement = {
39 | new JsonPrimitive(src.toString());
40 | }
41 | }
42 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/QueryState.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.utils
2 |
3 | object QueryState extends Enumeration {
4 | type QueryState = Value
5 | val DONE, IN_PROGRESS, FAILED, NOT_FOUND = Value
6 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/Randomizer.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.utils
2 |
3 | import java.util.ArrayList
4 | import org.apache.commons.lang.RandomStringUtils
5 | import org.apache.commons.lang.math.RandomUtils
6 | import com.xpatterns.jaws.data.DTO.Column
7 | import com.xpatterns.jaws.data.DTO.Log
8 | import com.xpatterns.jaws.data.DTO.QueryMetaInfo
9 | import com.xpatterns.jaws.data.DTO.ParquetTable
10 | import com.xpatterns.jaws.data.DTO.AvroResult
11 | import org.apache.spark.sql.catalyst.expressions.Row
12 | import org.apache.spark.sql.types._
13 |
14 |
15 |
16 | object Randomizer {
17 |
18 | def getRandomString(nr : Int) : String = {
19 | return RandomStringUtils.randomAlphabetic(nr)
20 | }
21 |
22 | def getRandomLong : Long = {
23 | return RandomUtils.nextLong()
24 | }
25 |
26 |
27 | def getParquetTable : ParquetTable ={
28 | new ParquetTable(Randomizer.getRandomString(5), Randomizer.getRandomString(5), Randomizer.getRandomString(5))
29 | }
30 |
31 | def getParquetTables (size : Int): Array[ParquetTable] = {
32 | val result : Array[ParquetTable] = new Array(size)
33 | for (i <- 0 until size){
34 | result(i) = getParquetTable
35 | }
36 | result
37 | }
38 |
39 | def getResultsConverter : ResultsConverter = {
40 |
41 | val intField = new StructField("int", IntegerType, false)
42 | val strField = new StructField("str", StringType, true)
43 | val structType = new StructType(Array(intField, strField))
44 |
45 | val structTypeRow = Array(Row.fromSeq(Seq(1, "a")), Row.fromSeq(Seq(2, "b")))
46 | new ResultsConverter(structType, structTypeRow)
47 |
48 | }
49 |
50 | def getLogDTO: Log = {
51 | return new Log(Randomizer.getRandomString(5000), Randomizer.getRandomString(10), Randomizer.getRandomLong)
52 | }
53 |
54 | def createQueryMetainfo : QueryMetaInfo = {
55 | return new QueryMetaInfo(RandomUtils.nextLong(), RandomUtils.nextLong(), RandomUtils.nextInt(3), RandomUtils.nextBoolean())
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/ResultsConverter.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.utils
2 |
3 | import org.apache.spark.sql.catalyst.expressions.Row
4 | import org.apache.avro.generic.GenericDatumWriter
5 | import org.apache.avro.generic.GenericRecord
6 | import java.io.ByteArrayOutputStream
7 | import org.apache.avro.io.EncoderFactory
8 | import spray.json.DefaultJsonProtocol._
9 | import com.xpatterns.jaws.data.DTO.AvroResult
10 | import com.xpatterns.jaws.data.DTO.CustomResult
11 | import com.xpatterns.jaws.data.DTO.Column
12 | import spray.json._
13 | import com.google.gson.GsonBuilder
14 | import java.sql.Timestamp
15 | import collection.JavaConversions._
16 | import com.xpatterns.jaws.data.DTO.AvroBinaryResult
17 | import org.apache.spark.sql.types.StructType
18 |
19 | class ResultsConverter(val schema: StructType, val result: Array[Row]) {
20 |
21 | def toAvroResults(): AvroResult = {
22 | val avroSchema = AvroConverter.getAvroSchema(schema)
23 | val avroResults = AvroConverter.getAvroResult(result, schema)
24 | new AvroResult(avroSchema, avroResults)
25 | }
26 |
27 | def toCustomResults(): CustomResult = {
28 | val gson = new GsonBuilder().create()
29 | val customSchema = CustomConverter.getCustomSchema(schema)
30 |
31 | new CustomResult(customSchema, CustomConverter.getCustomResult(result, schema))
32 | }
33 |
34 | def toAvroBinaryResults(): AvroBinaryResult = {
35 | new AvroBinaryResult(toAvroResults())
36 | }
37 |
38 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 |
2 | ########## hadoop configuration - skip this if you are using cassandra logging ########
3 | hadoopConf {
4 | namenode="hdfs://devbox.local:8020"
5 | replicationFactor=1
6 | # set on true if you want to start fresh (all the existing folders will be recreated)
7 | forcedMode=true
8 | # folder where to write the logs
9 | loggingFolder=jawsLogs
10 | # folder where to write the jobs states
11 | stateFolder=jawsStates
12 | # folder where to write the jobs details
13 | detailsFolder=jawsDetails
14 | # folder where to write the jobs results
15 | resultsFolder=jawsResultsFolder
16 | # folder where to write the jobs meta information
17 | metaInfoFolder=jawsMetainfoFolder
18 | # folder where to write the name of query information
19 | queryNameFolder=jawsQueryNameFolder
20 | # folder where to write the published queries
21 | queryPublishedFolder=jawsQueryPublishedFolder
22 | # folder where to write the unpublished queries
23 | queryUnpublishedFolder=jawsQueryUnpublishedFolder
24 | # folder where to write the parquet tables information
25 | parquetTablesFolder=parquetTablesFolder
26 | }
27 |
28 | ########## cassandra configuration - skip this if you are using hdfs logging ##########
29 | cassandraConf {
30 | cassandra.host="devbox.local:9160"
31 | cassandra.keyspace=xpatterns_jaws
32 | cassandra.cluster.name=Jaws
33 | }
34 |
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsCassandraParquetTablesTest.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables
4 | import org.scalatest.FunSuite
5 | import org.scalatest.BeforeAndAfter
6 | import com.typesafe.config.ConfigFactory
7 | import me.prettyprint.cassandra.service.CassandraHostConfigurator
8 | import me.prettyprint.cassandra.service.ThriftCluster
9 | import me.prettyprint.hector.api.factory.HFactory
10 | import me.prettyprint.cassandra.model.AllOneConsistencyLevelPolicy
11 | import com.xpatterns.jaws.data.DTO.ParquetTable
12 | import org.junit.runner.RunWith
13 | import org.scalatest.junit.JUnitRunner
14 | import com.xpatterns.jaws.data.utils.Randomizer
15 |
16 | @RunWith(classOf[JUnitRunner])
17 | class JawsCassandraParquetTablesTest extends FunSuite with BeforeAndAfter {
18 |
19 | var pTablesDal: TJawsParquetTables = _
20 |
21 | before {
22 | if (pTablesDal == null) {
23 |
24 | val conf = ConfigFactory.load
25 |
26 | val cassandraConf = conf.getConfig("cassandraConf").withFallback(conf)
27 |
28 | // cassandra configuration
29 | val cassandraHost = cassandraConf.getString("cassandra.host")
30 | val cassandraKeyspace = cassandraConf.getString("cassandra.keyspace")
31 | val cassandraClusterName = cassandraConf.getString("cassandra.cluster.name")
32 |
33 | val cassandraHostConfigurator = new CassandraHostConfigurator(cassandraHost)
34 | val cluster = new ThriftCluster(cassandraClusterName, cassandraHostConfigurator)
35 | val keyspace = HFactory.createKeyspace(cassandraKeyspace, cluster, new AllOneConsistencyLevelPolicy)
36 |
37 | //!!!!!!! ATTENTION !!!! truncating CF
38 | cluster.truncate(keyspace.getKeyspaceName(), "parquet_tables")
39 |
40 | pTablesDal = new JawsCassandraParquetTables(keyspace)
41 | }
42 |
43 | pTablesDal
44 | }
45 |
46 | test("testAddReadTable") {
47 | val table = Randomizer.getParquetTable
48 |
49 | pTablesDal.addParquetTable(table)
50 | val resultTable = pTablesDal.readParquetTable(table.name)
51 | assert(table === resultTable)
52 | pTablesDal.deleteParquetTable(table.name)
53 |
54 | }
55 |
56 | test("testDeleteTable") {
57 | val table = Randomizer.getParquetTable
58 |
59 | pTablesDal.addParquetTable(table)
60 | val tableBeforeDeletion = pTablesDal.readParquetTable(table.name)
61 | pTablesDal.deleteParquetTable(table.name)
62 | val tableAfterDeletion = pTablesDal.readParquetTable(table.name)
63 |
64 | assert(table === tableBeforeDeletion)
65 | assert(new ParquetTable === tableAfterDeletion)
66 |
67 | }
68 |
69 | test("testDeleteUnexistingTable") {
70 | val tName = Randomizer.getRandomString(5)
71 | pTablesDal.deleteParquetTable(tName)
72 | val tableAfterDeletion = pTablesDal.readParquetTable(tName)
73 |
74 | assert(new ParquetTable === tableAfterDeletion)
75 |
76 | }
77 |
78 | test("testTableDoesntExist") {
79 | val tName = Randomizer.getRandomString(5)
80 | assert(false === pTablesDal.tableExists(tName))
81 | }
82 |
83 | test("testTableExists") {
84 | val table = Randomizer.getParquetTable
85 | pTablesDal.addParquetTable(table)
86 | assert(true === pTablesDal.tableExists(table.name))
87 | pTablesDal.deleteParquetTable(table.name)
88 | }
89 |
90 | test("testGetTables Empty") {
91 | val result = pTablesDal.listParquetTables
92 | assert(false === (result == null))
93 | assert(0 === result.size)
94 | }
95 |
96 | test("testGetTables") {
97 | val tables = Randomizer.getParquetTables(5)
98 | tables.foreach(table => pTablesDal.addParquetTable(table))
99 | val result = pTablesDal.listParquetTables
100 | tables.foreach(table => pTablesDal.deleteParquetTable(table.name))
101 |
102 | assert(false === (result == null))
103 | assert(5 === result.size)
104 | tables.foreach(table => assert(true === result.contains(table)))
105 | }
106 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsHdfsParquetTablesTest.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.junit.JUnitRunner
5 | import org.scalatest.FunSuite
6 | import org.scalatest.BeforeAndAfter
7 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables
8 | import com.typesafe.config.ConfigFactory
9 | import com.xpatterns.jaws.data.utils.Utils
10 | import com.xpatterns.jaws.data.utils.Randomizer
11 | import com.xpatterns.jaws.data.DTO.ParquetTable
12 |
13 | @RunWith(classOf[JUnitRunner])
14 | class JawsHdfsParquetTablesTest extends FunSuite with BeforeAndAfter {
15 |
16 |
17 | var pTablesDal: TJawsParquetTables = _
18 |
19 | before {
20 | if (pTablesDal == null) { val conf = ConfigFactory.load
21 |
22 | val hadoopConf = conf.getConfig("hadoopConf").withFallback(conf)
23 |
24 | //hadoop conf
25 | val replicationFactor = Option(hadoopConf.getString("replicationFactor"))
26 | val forcedMode = Option(hadoopConf.getString("forcedMode"))
27 | val loggingFolder = Option(hadoopConf.getString("loggingFolder"))
28 | val stateFolder = Option(hadoopConf.getString("stateFolder"))
29 | val detailsFolder = Option(hadoopConf.getString("detailsFolder"))
30 | val resultsFolder = Option(hadoopConf.getString("resultsFolder"))
31 | val metaInfoFolder = Option(hadoopConf.getString("metaInfoFolder"))
32 | val queryNameFolder = Option(hadoopConf.getString("queryNameFolder"))
33 | val parquetTablesFolder = Option(hadoopConf.getString("parquetTablesFolder"))
34 | val namenode = Option(hadoopConf.getString("namenode"))
35 |
36 | val configuration = new org.apache.hadoop.conf.Configuration()
37 | configuration.setBoolean(Utils.FORCED_MODE, forcedMode.getOrElse("false").toBoolean)
38 |
39 | // set hadoop name node and job tracker
40 | namenode match {
41 | case None => {
42 | throw new RuntimeException("You need to set the namenode! ")
43 | }
44 | case _ => configuration.set("fs.defaultFS", namenode.get)
45 |
46 | }
47 |
48 | configuration.set("dfs.replication", replicationFactor.getOrElse("1"))
49 | configuration.set(Utils.PARQUET_TABLES_FOLDER, parquetTablesFolder.getOrElse("parquetTablesFolder"))
50 |
51 | pTablesDal = new JawsHdfsParquetTables(configuration)
52 | }
53 |
54 | pTablesDal
55 | }
56 |
57 | test("testAddReadTable") {
58 | val table = Randomizer.getParquetTable
59 |
60 | pTablesDal.addParquetTable(table)
61 | val resultTable = pTablesDal.readParquetTable(table.name)
62 | assert(table === resultTable)
63 | pTablesDal.deleteParquetTable(table.name)
64 |
65 | }
66 |
67 | test("testDeleteTable") {
68 | val table = Randomizer.getParquetTable
69 |
70 | pTablesDal.addParquetTable(table)
71 | val tableBeforeDeletion = pTablesDal.readParquetTable(table.name)
72 | pTablesDal.deleteParquetTable(table.name)
73 | val tableAfterDeletion = pTablesDal.readParquetTable(table.name)
74 |
75 | assert(table === tableBeforeDeletion)
76 | assert(new ParquetTable === tableAfterDeletion)
77 |
78 | }
79 |
80 | test("testDeleteUnexistingTable") {
81 | val tName = Randomizer.getRandomString(5)
82 | pTablesDal.deleteParquetTable(tName)
83 | val tableAfterDeletion = pTablesDal.readParquetTable(tName)
84 |
85 | assert(new ParquetTable === tableAfterDeletion)
86 |
87 | }
88 |
89 | test("testTableDoesntExist") {
90 | val tName = Randomizer.getRandomString(5)
91 | assert(false === pTablesDal.tableExists(tName))
92 | }
93 |
94 | test("testTableExists") {
95 | val table = Randomizer.getParquetTable
96 | pTablesDal.addParquetTable(table)
97 | assert(true === pTablesDal.tableExists(table.name))
98 | pTablesDal.deleteParquetTable(table.name)
99 | }
100 |
101 | test("testGetTables Empty") {
102 | val result = pTablesDal.listParquetTables
103 | assert(false === (result == null))
104 | assert(0 === result.size)
105 | }
106 |
107 | test("testGetTables") {
108 | val tables = Randomizer.getParquetTables(5)
109 | tables.foreach(table => pTablesDal.addParquetTable(table))
110 | val result = pTablesDal.listParquetTables
111 | tables.foreach(table => pTablesDal.deleteParquetTable(table.name))
112 |
113 | assert(false === (result == null))
114 | assert(5 === result.size)
115 | tables.foreach(table => assert(true === result.contains(table)))
116 | }
117 |
118 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsResultsOnHdfsTest.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import org.scalatest.{ BeforeAndAfter, FunSuite }
4 | import com.typesafe.config.ConfigFactory
5 | import com.xpatterns.jaws.data.utils.{ Randomizer, Utils }
6 | import com.xpatterns.jaws.data.contracts.TJawsResults
7 | import org.junit.runner.RunWith
8 | import org.scalatest.junit.JUnitRunner
9 | import com.xpatterns.jaws.data.DTO.AvroResult
10 | import com.xpatterns.jaws.data.DTO.CustomResult
11 |
12 | /**
13 | * Created by emaorhian on 7/28/14.
14 | */
15 | @RunWith(classOf[JUnitRunner])
16 | class JawsResultsOnHdfsTest extends FunSuite with BeforeAndAfter {
17 |
18 | var resultsDal: TJawsResults = _
19 |
20 | before {
21 | if (resultsDal == null) {
22 |
23 | val conf = ConfigFactory.load
24 |
25 | val hadoopConf = conf.getConfig("hadoopConf").withFallback(conf)
26 |
27 | //hadoop conf
28 | val replicationFactor = Option(hadoopConf.getString("replicationFactor"))
29 | val forcedMode = Option(hadoopConf.getString("forcedMode"))
30 | val loggingFolder = Option(hadoopConf.getString("loggingFolder"))
31 | val stateFolder = Option(hadoopConf.getString("stateFolder"))
32 | val detailsFolder = Option(hadoopConf.getString("detailsFolder"))
33 | val resultsFolder = Option(hadoopConf.getString("resultsFolder"))
34 | val metaInfoFolder = Option(hadoopConf.getString("metaInfoFolder"))
35 | val queryNameFolder = Option(hadoopConf.getString("queryNameFolder"))
36 | val queryPublishedFolder = Option(hadoopConf.getString("queryPublishedFolder"))
37 | val queryUnpublishedFolder = Option(hadoopConf.getString("queryUnpublishedFolder"))
38 | val namenode = Option(hadoopConf.getString("namenode"))
39 |
40 | val configuration = new org.apache.hadoop.conf.Configuration()
41 | configuration.setBoolean(Utils.FORCED_MODE, forcedMode.getOrElse("false").toBoolean)
42 |
43 | // set hadoop name node and job tracker
44 | namenode match {
45 | case None => {
46 | throw new RuntimeException("You need to set the namenode! ")
47 | }
48 | case _ => configuration.set("fs.defaultFS", namenode.get)
49 |
50 | }
51 |
52 | configuration.set("dfs.replication", replicationFactor.getOrElse("1"))
53 |
54 | configuration.set(Utils.LOGGING_FOLDER, loggingFolder.getOrElse("jawsLogs"))
55 | configuration.set(Utils.STATUS_FOLDER, stateFolder.getOrElse("jawsStates"))
56 | configuration.set(Utils.DETAILS_FOLDER, detailsFolder.getOrElse("jawsDetails"))
57 | configuration.set(Utils.METAINFO_FOLDER, metaInfoFolder.getOrElse("jawsMetainfoFolder"))
58 | configuration.set(Utils.QUERY_NAME_FOLDER, queryNameFolder.getOrElse("jawsQueryNameFolder"))
59 | configuration.set(Utils.QUERY_PUBLISHED_FOLDER, queryPublishedFolder.getOrElse("jawsQueryPublishedFolder"))
60 | configuration.set(Utils.QUERY_UNPUBLISHED_FOLDER, queryUnpublishedFolder.getOrElse("jawsQueryUnpublishedFolder"))
61 | configuration.set(Utils.RESULTS_FOLDER, resultsFolder.getOrElse("jawsResultsFolder"))
62 | resultsDal = new JawsHdfsResults(configuration)
63 | }
64 |
65 | resultsDal
66 | }
67 |
68 | test("testWriteReadResults") {
69 | val uuid = Randomizer.getRandomString(10)
70 | val resultsConverter = Randomizer.getResultsConverter
71 | resultsDal.setResults(uuid, resultsConverter)
72 |
73 | val avroResults = resultsDal.getAvroResults(uuid)
74 | val customResults = resultsDal.getCustomResults(uuid)
75 |
76 | assert(resultsConverter.toAvroResults() === avroResults)
77 | assert(resultsConverter.toCustomResults() === customResults)
78 |
79 | }
80 |
81 | test("testDeleteResults") {
82 | val uuid = Randomizer.getRandomString(10)
83 | val resultsConverter = Randomizer.getResultsConverter
84 | resultsDal.setResults(uuid, resultsConverter)
85 |
86 | val avroResults = resultsDal.getAvroResults(uuid)
87 | val customResults = resultsDal.getCustomResults(uuid)
88 |
89 | resultsDal.deleteResults(uuid)
90 |
91 | val avroResultsDeleted = resultsDal.getAvroResults(uuid)
92 | val customResultsDeleted = resultsDal.getCustomResults(uuid)
93 |
94 | assert(resultsConverter.toAvroResults() === avroResults)
95 | assert(resultsConverter.toCustomResults() === customResults)
96 | assert(new AvroResult() === avroResultsDeleted)
97 | assert(new CustomResult() === customResultsDeleted)
98 |
99 | }
100 |
101 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsResultsTest.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.impl
2 |
3 | import org.scalatest.FunSuite
4 | import com.xpatterns.jaws.data.DTO.Column
5 | import org.apache.commons.lang.RandomStringUtils
6 | import com.xpatterns.jaws.data.utils.Randomizer
7 | import com.xpatterns.jaws.data.contracts.TJawsResults
8 | import org.scalatest.BeforeAndAfter
9 | import me.prettyprint.cassandra.service.CassandraHostConfigurator
10 | import org.junit.Assert
11 | import com.typesafe.config.ConfigFactory
12 | import me.prettyprint.cassandra.service.ThriftCluster
13 | import me.prettyprint.hector.api.factory.HFactory
14 | import me.prettyprint.cassandra.model.AllOneConsistencyLevelPolicy
15 | import org.junit.runner.RunWith
16 | import org.scalatest.junit.JUnitRunner
17 | import com.xpatterns.jaws.data.DTO.AvroResult
18 | import com.xpatterns.jaws.data.DTO.CustomResult
19 | import scala.collection.mutable.ArrayBuffer
20 | import org.apache.spark.sql.catalyst.expressions.Row
21 |
22 | @RunWith(classOf[JUnitRunner])
23 | class JawsResultsTest extends FunSuite with BeforeAndAfter {
24 |
25 | var resultsDal: TJawsResults = _
26 |
27 | before {
28 | if (resultsDal == null) {
29 |
30 | val conf = ConfigFactory.load
31 |
32 | val cassandraConf = conf.getConfig("cassandraConf").withFallback(conf)
33 |
34 | // cassandra configuration
35 | val cassandraHost = cassandraConf.getString("cassandra.host")
36 | val cassandraKeyspace = cassandraConf.getString("cassandra.keyspace")
37 | val cassandraClusterName = cassandraConf.getString("cassandra.cluster.name")
38 |
39 | val cassandraHostConfigurator = new CassandraHostConfigurator(cassandraHost)
40 | val cluster = new ThriftCluster(cassandraClusterName, cassandraHostConfigurator)
41 | val keyspace = HFactory.createKeyspace(cassandraKeyspace, cluster, new AllOneConsistencyLevelPolicy)
42 |
43 | resultsDal = new JawsCassandraResults(keyspace)
44 | }
45 |
46 | resultsDal
47 | }
48 |
49 | test("testWriteReadResults") {
50 | val uuid = Randomizer.getRandomString(10)
51 | val resultsConverter = Randomizer.getResultsConverter
52 | resultsDal.setResults(uuid, resultsConverter)
53 |
54 | val avroResults = resultsDal.getAvroResults(uuid)
55 | val customResults = resultsDal.getCustomResults(uuid)
56 |
57 | assert(resultsConverter.toAvroResults() === avroResults)
58 | assert(resultsConverter.toCustomResults() === customResults)
59 |
60 | }
61 |
62 | test("testDeleteResults") {
63 | val uuid = Randomizer.getRandomString(10)
64 | val resultsConverter = Randomizer.getResultsConverter
65 | resultsDal.setResults(uuid, resultsConverter)
66 |
67 | val avroResults = resultsDal.getAvroResults(uuid)
68 | val customResults = resultsDal.getCustomResults(uuid)
69 |
70 | resultsDal.deleteResults(uuid)
71 |
72 | val avroResultsDeleted = resultsDal.getAvroResults(uuid)
73 | val customResultsDeleted = resultsDal.getCustomResults(uuid)
74 |
75 | assert(resultsConverter.toAvroResults() === avroResults)
76 | assert(resultsConverter.toCustomResults() === customResults)
77 | assert(new AvroResult() === avroResultsDeleted)
78 | assert(new CustomResult() === customResultsDeleted)
79 |
80 | }
81 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/utils/AvroConverterComplexTest.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.utils
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.junit.JUnitRunner
5 | import org.scalatest.FunSuite
6 | import org.apache.spark.SparkConf
7 | import org.apache.spark.SparkContext
8 | import org.apache.spark.sql.SQLContext
9 | import org.apache.spark.sql.types.StructType
10 | import org.apache.spark.sql.types.DataType
11 | import com.xpatterns.jaws.data.DTO.AvroResult
12 |
13 | case class Obj(myString: String, myInteger: Int)
14 | case class ObjString(s1: String, s2: String)
15 | case class ComplexObj(s: String, obj: Obj)
16 |
17 | case class CompleteObj(
18 | myByte: Byte,
19 | myShort: Short,
20 | myLong: Long,
21 | myFloat: Float,
22 | myDouble: Double,
23 | myBoolean: Boolean,
24 | myObj: Obj,
25 | myMap: Map[String, Int],
26 | mySequence: Seq[Int],
27 | myObjSequence: Seq[Obj],
28 | myByteArray: Array[Byte])
29 |
30 | case class CompleteObj2(
31 | myByte: Byte,
32 | myShort: Short,
33 | myLong: Long,
34 | myFloat: Float,
35 | myDouble: Double,
36 | myObj: Obj,
37 | myMap: Map[String, Int],
38 | mySequence: Seq[Int],
39 | myObjSequence: Seq[Obj],
40 | myByteArray: Array[Byte])
41 | case class ComplObject(
42 | myString: String,
43 | myInt: Int,
44 | myByte: Byte,
45 | myShort: Short,
46 | myLong: Long,
47 | myFloat: Float,
48 | myDouble: Double,
49 | myBoolean: Boolean,
50 | myObj: Obj,
51 | myMap1: Map[String, Obj],
52 | myMap2: Map[String, ObjString],
53 | myMap3: Map[String, ComplexObj],
54 | mySequence: Seq[Seq[Array[Seq[Array[Array[Seq[CompleteObj]]]]]]],
55 | myArray: Array[ComplexObj])
56 |
57 | case class Obj1(array: Array[Array[Array[Obj]]])
58 | case class Obj4(array: Seq[Seq[Seq[Obj]]])
59 | case class Obj2(map: Map[String, Map[String, Map[String, Obj]]])
60 | case class Obj3(map: Map[String, Map[String, Obj]])
61 |
62 | @RunWith(classOf[JUnitRunner])
63 | class AvroConverterComplexTest extends FunSuite {
64 |
65 | def newObj(i: Int) = new Obj("s1" + i, i)
66 | def newObjString(i: Int) = new ObjString("s1_" + i, "s2_" + i)
67 | def newComplexObj(i: Int) = new ComplexObj("s_" + i, newObj(i))
68 | def newCompleteObj(i: Int) = new CompleteObj(
69 | Byte.MaxValue,
70 | Short.MaxValue,
71 | i,
72 | 0.3f,
73 | 0.6d,
74 | false,
75 | newObj(i),
76 | Map(("key1", i), ("key2", i + 1)),
77 | List(i, i + 1, i + 2),
78 | List(newObj(i + 1), newObj(i + 2)),
79 | Array((65 + i).toByte, (66 + i).toByte))
80 |
81 | def newComplObj(i: Int) = new ComplObject(
82 | "newComplObj " + i,
83 | Int.MaxValue,
84 | Byte.MinValue,
85 | Short.MinValue,
86 | Long.MaxValue,
87 | Float.PositiveInfinity,
88 | Double.MinPositiveValue,
89 | i % 2 == 0,
90 | newObj(i + 100),
91 | Map(("str11", newObj(i + 10)), ("str12", newObj(i + 11)), ("str13", newObj(i + 12)), ("str14", newObj(i + 13)), ("str15", newObj(i + 14)), ("str16", newObj(i + 15)), ("str17", newObj(i + 16)), ("str18", newObj(i + 17))),
92 | Map(("str21", newObjString(i + 20)), ("str22", newObjString(i + 21)), ("str23", newObjString(i + 22)), ("str24", newObjString(i + 23)), ("str25", newObjString(i + 24)), ("str26", newObjString(i + 25))),
93 | Map(("str31", newComplexObj(i + 30)), ("str32", newComplexObj(i + 31)), ("str33", newComplexObj(i + 32)), ("str34", newComplexObj(i + 33)), ("str35", newComplexObj(i + 34))),
94 | Seq(Seq(Array(Seq(Array(Array(Seq(newCompleteObj(i), newCompleteObj(i + 1), newCompleteObj(i + 2), newCompleteObj(i + 3), newCompleteObj(i + 4), newCompleteObj(i + 5)),
95 | Seq(newCompleteObj(i + 7), newCompleteObj(i + 8), newCompleteObj(i + 9))),
96 | Array(Seq(newCompleteObj(i + 10), newCompleteObj(11)),
97 | Seq(newCompleteObj(i + 12), newCompleteObj(i + 13)))),
98 | Array(Array(Seq(newCompleteObj(i), newCompleteObj(i + 1), newCompleteObj(i + 2), newCompleteObj(i + 3), newCompleteObj(i + 4), newCompleteObj(i + 5)),
99 | Seq(newCompleteObj(i + 7), newCompleteObj(i + 8), newCompleteObj(i + 9))),
100 | Array(Seq(newCompleteObj(i + 10), newCompleteObj(11)),
101 | Seq(newCompleteObj(i + 12), newCompleteObj(i + 13)))))))),
102 | Array(newComplexObj(i), newComplexObj(i / 2), newComplexObj(i / 3)))
103 |
104 | test("complex") {
105 | val listInt = (1 to 10).toList
106 |
107 | val conf = new SparkConf().setAppName("Simple Application").setMaster("local")
108 | val sc = new SparkContext(conf)
109 |
110 | val sqlContext = new SQLContext(sc)
111 | import sqlContext.implicits._
112 |
113 |
114 | val df = sc.parallelize(listInt.map(newComplObj(_))).toDF()
115 |
116 | val result = AvroConverter.getAvroResult(df.collect, df.schema)
117 | val schema = AvroConverter.getAvroSchema(df.schema)
118 | val avroResult = new AvroResult(schema, result)
119 | val serialized = avroResult.serializeResult
120 | val deserialized = AvroResult.deserializeResult(serialized, schema)
121 | sc.stop()
122 | }
123 |
124 | test("complex custom") {
125 | val listInt = (1 to 10).toList
126 |
127 | val conf = new SparkConf().setAppName("Simple Application").setMaster("local")
128 | val sc = new SparkContext(conf)
129 |
130 | val sqlContext = new SQLContext(sc)
131 | import sqlContext.implicits._
132 |
133 |
134 | val df = sc.parallelize(listInt.map(newComplObj(_))).toDF()
135 |
136 | val result = CustomConverter.getCustomResult(df.collect, df.schema)
137 | val schema = CustomConverter.getCustomSchema(df.schema)
138 | sc.stop()
139 | }
140 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/utils/AvroConverterCustomTest.scala:
--------------------------------------------------------------------------------
1 | package com.xpatterns.jaws.data.utils
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.SparkContext
5 | import org.apache.spark.sql.SQLContext
6 | import org.junit.runner.RunWith
7 | import org.scalatest.junit.JUnitRunner
8 | import org.scalatest.FunSuite
9 | import com.xpatterns.jaws.data.utils.ResultsConverter
10 | import com.xpatterns.jaws.data.DTO.AvroResult
11 | //import org.apache.spark.sql.parquet.SparkParquetUtility._
12 |
13 | case class Positions(
14 | start: Int,
15 | end: Int)
16 |
17 | case class Terms(
18 | name: String,
19 | score: Double,
20 | positions: Seq[Positions])
21 |
22 | case class AnnotatedTerms(
23 | name: String,
24 | category: String,
25 | score: Double,
26 | positions: Seq[Positions])
27 |
28 | case class Categories(
29 | name: String,
30 | score: Double)
31 |
32 | case class DocMetainfo(
33 | categories: Seq[Categories],
34 | annotated_terms: Seq[AnnotatedTerms],
35 | terms: Seq[Terms])
36 |
37 | case class NewPubmed(
38 | authors: Seq[String],
39 | body: String,
40 | category: String,
41 | documentId: String,
42 | doc_metainfo: DocMetainfo,
43 | publicationDate: String,
44 | publicationYear: Int,
45 | title: String)
46 |
47 | @RunWith(classOf[JUnitRunner])
48 | class AvroConverterCustomTest extends FunSuite {
49 |
50 | test("result with map of strings") {
51 | val conf = new SparkConf().setAppName("Simple Application").setMaster("local[2]")
52 | val sc = new SparkContext(conf)
53 |
54 | val sqlContext = new SQLContext(sc)
55 | import sqlContext.implicits._
56 |
57 | val pbList = List(1)
58 | val df = sc.parallelize(pbList).map(_ => new NewPubmed(
59 | List("ana", "ion"),
60 | "body",
61 | "category",
62 | "documentId",
63 | new DocMetainfo(
64 | List(new Categories("name", 1.1)),
65 | List(new AnnotatedTerms("", "category", 1.3, Seq(new Positions(1, 1)))),
66 | List(new Terms("name", 1.5, List(new Positions(1, 2))))),
67 | "publicationDate",
68 | 2015,
69 | "title")).toDF
70 |
71 | val values = df.collect
72 | val result = AvroConverter.getAvroResult(values, df.schema)
73 | val schema = AvroConverter.getAvroSchema(df.schema)
74 | val ar = new AvroResult(schema, result)
75 | val serialized = ar.serializeResult()
76 | val deserialized = AvroResult.deserializeResult(serialized, schema)
77 |
78 | sc.stop()
79 | print("done")
80 | }
81 |
82 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2014 Atigeo, LLC.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.xpatterns
6 | jaws-spark-sql-rest-integration-tests
7 | 1.1.0
8 | jar
9 |
10 | jaws-spark-sql-rest-integration-tests
11 | http://maven.apache.org
12 |
13 |
14 |
15 | mvnrepository
16 | http://repo1.maven.org/maven2
17 |
18 | false
19 |
20 |
21 | true
22 |
23 |
24 |
25 | cloudera-repo-releases
26 | https://repository.cloudera.com/artifactory/repo/
27 |
28 |
29 | Akka repository
30 | http://repo.akka.io/releases
31 |
32 |
33 |
34 |
35 | UTF-8
36 | 2.10
37 | 1.2.3
38 | 2.10.3
39 | 2.0.5
40 | 1.2.3
41 | 1.2.1
42 |
43 |
44 |
45 |
46 | org.scalatest
47 | scalatest_2.10
48 | 2.2.4
49 |
50 |
51 | com.typesafe
52 | config
53 | 1.2.1
54 |
55 |
56 | junit
57 | junit
58 | 4.4
59 |
60 |
61 | io.spray
62 | spray-client
63 | 1.3.1
64 |
65 |
66 | com.typesafe.akka
67 | akka-actor_2.10
68 | 2.3.0
69 |
70 |
71 |
72 | com.xpatterns
73 | jaws-spark-sql-data
74 | 1.1.0-spark1.1.0
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 | src/test/resources
83 |
84 |
85 | ${project.artifactId}
86 |
87 |
88 | org.apache.maven.plugins
89 | maven-compiler-plugin
90 |
91 |
92 | 1.6
93 | 1.6
94 |
95 |
96 |
97 | org.scala-tools
98 | maven-scala-plugin
99 |
100 |
101 | scala-compile-first
102 | process-resources
103 |
104 | add-source
105 | compile
106 |
107 |
108 |
109 | scala-test-compile
110 | process-test-resources
111 |
112 | testCompile
113 |
114 |
115 |
116 |
117 |
118 | org.apache.maven.plugins
119 | maven-shade-plugin
120 | 2.2
121 |
122 |
123 |
124 | package
125 |
126 | shade
127 |
128 |
129 |
130 |
131 |
132 | *:*
133 |
134 | META-INF/*.SF
135 | META-INF/*.DSA
136 | META-INF/*.RSA
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 | org.apache.maven.plugins
147 | maven-surefire-plugin
148 | 2.7
149 |
150 | true
151 |
152 |
153 |
154 |
155 | org.scalatest
156 | scalatest-maven-plugin
157 | 1.0
158 |
159 | ${project.build.directory}/surefire-reports
160 | .
161 | WDF TestSuite.txt
162 |
163 |
164 |
165 | test
166 |
167 | test
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 | ######### application configuration ###################
2 | appConf{
3 | jawsUrl="http://devbox.local:9080/jaws/"
4 | jawsHiveUrl="http://devbox.local:7080/jaws/hive/"
5 | namenodeIp="devbox.local"
6 | hdfsInputFolder="jawsTestFolder"
7 | database="testJawsDatabase"
8 | table="testPersons"
9 | runTachyon=true
10 | parquetFolder=jawsTest.parquet
11 | parquetTable=jawsTestParquet
12 |
13 | }
14 |
15 |
16 |
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/_metadata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/jaws-spark-sql-rest/e5b2d422f135d9307c54857d558b9022610a293c/jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/_metadata
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/part-r-1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/jaws-spark-sql-rest/e5b2d422f135d9307c54857d558b9022610a293c/jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/part-r-1.parquet
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/resources/people.txt:
--------------------------------------------------------------------------------
1 | Ana,5,f
2 | George,10,m
3 | Alina,20,f
4 | Paul,12,m
5 | Pavel,16,m
6 | Ioana,30,f
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/scala/api/GetDatabasesApiTest.scala:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import com.google.gson.Gson
4 | import com.xpatterns.jaws.data.DTO.{Tables, Databases}
5 | import org.junit.runner.RunWith
6 | import org.scalatest.junit.JUnitRunner
7 | import spray.client.pipelining._
8 | import scala.concurrent.Future
9 | import scala.concurrent.Await
10 | import scala.concurrent.duration.Duration._
11 | import scala.util.Success
12 | import scala.util.Failure
13 | import spray.http._
14 | import spray.httpx.SprayJsonSupport._
15 | import foundation.TestBase
16 | import scala.concurrent._
17 | import ExecutionContext.Implicits.global
18 |
19 | @RunWith(classOf[JUnitRunner])
20 | class GetDatabasesApiTest extends TestBase {
21 |
22 | test(" get databases ") {
23 | val url = s"${jawsUrl}hive/databases"
24 |
25 | val pipeline: HttpRequest => Future[Databases] = (
26 | addHeader("X-My-Special-Header", "fancy-value")
27 | ~> sendReceive
28 | ~> unmarshal[Databases])
29 |
30 | val response: Future[Databases] = pipeline(Get(url))
31 | Await.ready(response, Inf).value.get match {
32 | case Success(r: Databases) =>
33 | assert(r != null)
34 | assert(r.databases.contains("default"))
35 |
36 | case Failure(e) =>
37 | println(e.getMessage)
38 | fail()
39 | }
40 | }
41 |
42 | test("tables api") {
43 | val response = get(s"${jawsUrl}hive/tables")
44 |
45 | Await.ready(response, Inf).value.get match {
46 | case Success(r: HttpResponse) =>
47 | assert(r.status.isSuccess)
48 | val responseText = r.entity.data.asString
49 | val gson = new Gson()
50 | val tables = gson.fromJson(responseText, classOf[Array[Tables]])
51 | assert(tables.nonEmpty, "There is no table")
52 |
53 | case Failure(e) =>
54 | println(e.getMessage)
55 | fail()
56 | }
57 | }
58 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/scala/api/JawsIsUpTest.scala:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.junit.JUnitRunner
5 | import foundation.TestBase
6 | import com.xpatterns.jaws.data.utils.Utils
7 | import org.apache.hadoop.fs.FileUtil
8 | import org.apache.hadoop.fs.FileSystem
9 | import java.io.File
10 | import org.apache.hadoop.fs.Path
11 | import akka.io.IO
12 | import akka.pattern.ask
13 | import spray.can.Http
14 | import spray.http._
15 | import spray.client.pipelining._
16 | import akka.actor.ActorSystem
17 | import scala.concurrent.Future
18 | import scala.concurrent.Await
19 | import scala.concurrent.duration._
20 | import scala.concurrent.duration.Duration._
21 | import scala.util.Success
22 | import scala.util.Failure
23 |
24 | @RunWith(classOf[JUnitRunner])
25 | class JawsIsUpTest extends TestBase {
26 |
27 | test(" Jaws is up and running ") {
28 | implicit val system = ActorSystem()
29 | import system.dispatcher // execution context for futures
30 |
31 | val pipeline: HttpRequest => Future[HttpResponse] = sendReceive
32 | val response: Future[HttpResponse] = pipeline(Get(s"${jawsUrl}index"))
33 |
34 | Await.ready(response, Inf).value.get match {
35 | case Success(r : HttpResponse) => {
36 | assert(r.status.isSuccess)
37 | assert(r.entity.data.asString === "Jaws is up and running!", "Jaws is not Up!")
38 | }
39 | case Failure(e) => {println(e.getMessage)
40 | fail()
41 | }
42 | }
43 | }
44 |
45 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/scala/api/ParquetManagementApiTest.scala:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.junit.JUnitRunner
5 | import foundation.TestBase
6 | import com.xpatterns.jaws.data.utils.Utils
7 | import org.apache.hadoop.fs.FileUtil
8 | import org.apache.hadoop.fs.FileSystem
9 | import java.io.File
10 | import org.apache.hadoop.fs.Path
11 | import spray.http._
12 | import scala.concurrent.Await
13 | import scala.concurrent.duration.Duration._
14 | import scala.util.Success
15 | import scala.util.Failure
16 | @RunWith(classOf[JUnitRunner])
17 | class ParquetManagementApiTest extends TestBase {
18 |
19 | override def beforeAll() {
20 | println("creating parquet folder on hdfs")
21 | Utils.createFolderIfDoesntExist(hadoopConf, parquetFolder, true)
22 | val fs = FileSystem.newInstance(hadoopConf)
23 | val metadataFile = new File(getClass().getResource("/jawsTest.parquet/_metadata").getPath())
24 | val dataFile = new File(getClass().getResource("/jawsTest.parquet/part-r-1.parquet").getPath())
25 | FileUtil.copy(metadataFile, fs, new Path(parquetFolder), false, hadoopConf)
26 | FileUtil.copy(dataFile, fs, new Path(parquetFolder), false, hadoopConf)
27 | }
28 |
29 | test(" register test table ") {
30 |
31 | val username = System.getProperties().get("user.name")
32 | val url = s"${jawsUrl}parquet/tables?path=/user/$username/$parquetFolder/&pathType=hdfs&name=$parquetTable&overwrite=true"
33 |
34 | val postResult = post(url, "")
35 |
36 | Await.ready(postResult, Inf).value.get match {
37 | case Success(r: HttpResponse) => {
38 | assert(r.status.isSuccess)
39 | assert(r.entity.data.asString.equals(s"Table $parquetTable was registered"))
40 | }
41 | case Failure(e) => {
42 | println(e.getMessage)
43 | fail()
44 | }
45 |
46 | }
47 | }
48 |
49 | test(" register test table overwrite false ") {
50 |
51 | val username = System.getProperties().get("user.name")
52 | val url = s"${jawsUrl}parquet/tables?path=/user/$username/$parquetFolder/&pathType=hdfs&name=$parquetTable&overwrite=false"
53 |
54 | val postResult = post(url, "")
55 |
56 | Await.ready(postResult, Inf).value.get match {
57 | case Success(r: HttpResponse) => {
58 | assert(r.status.isFailure)
59 | assert(r.entity.data.asString.equals(s"The table already exists!"))
60 | }
61 | case Failure(e) => {
62 | println(e.getMessage)
63 | fail()
64 | }
65 |
66 | }
67 | }
68 |
69 | test(" select * from parquet table ") {
70 |
71 | val url = s"${jawsUrl}run?limited=true"
72 | val body = s"select * from $parquetTable"
73 |
74 | val queryId = postRun(url, body)
75 | val queryStatus = waitforCompletion(queryId, 100)
76 | assert(queryStatus === "DONE", "Query is not DONE!")
77 | validataAllResultsFromParquetTable(queryId)
78 |
79 | }
80 |
81 | test(" unregister test table ") {
82 |
83 | val username = System.getProperties().get("user.name")
84 | val url = s"${jawsUrl}parquet/tables/$parquetTable"
85 |
86 | val deleteResult = delete(url)
87 |
88 | Await.ready(deleteResult, Inf).value.get match {
89 | case Success(r: HttpResponse) => {
90 | assert(r.status.isSuccess)
91 | assert(r.entity.data.asString.equals(s"Table $parquetTable was unregistered"))
92 | }
93 | case Failure(e) => {
94 | println(e.getMessage)
95 | fail()
96 | }
97 |
98 | }
99 | }
100 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/scala/api/RunHiveApiTest.scala:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.junit.JUnitRunner
5 | import foundation.TestBase
6 | import com.xpatterns.jaws.data.utils.Utils
7 | import org.apache.hadoop.fs.FileUtil
8 | import org.apache.hadoop.fs.FileSystem
9 | import java.io.File
10 | import org.apache.hadoop.fs.Path
11 | import akka.io.IO
12 | import akka.pattern.ask
13 | import spray.can.Http
14 | import spray.http._
15 | import spray.client.pipelining._
16 | import akka.actor.ActorSystem
17 | import scala.concurrent.Future
18 | import scala.concurrent.Await
19 | import scala.concurrent.duration._
20 | import scala.concurrent.duration.Duration._
21 | import scala.util.Success
22 | import scala.util.Failure
23 | import scala.collection.GenSeq
24 |
25 | @RunWith(classOf[JUnitRunner])
26 | class RunHiveApiTest extends TestBase {
27 |
28 | test(" select count ") {
29 |
30 | val url = s"${jawsHiveUrl}run?limit=10"
31 | val body = s"use $database;\nselect count(*) from $table"
32 |
33 | val queryId = postRun(url, body)
34 | val queryStatus = waitforCompletion(queryId, 100)
35 | assert(queryStatus === "DONE", "Query is not DONE!")
36 | val results = getResults(queryId, 0, 200)
37 | assert(1 === results.result.length, "Different number of rows")
38 | assert(1 === results.result(0).length, "Different number of rows2")
39 | assert("6" === results.result(0)(0), "Different count")
40 | }
41 |
42 | test(" select * limited") {
43 |
44 | val url = s"${jawsHiveUrl}run?"
45 | val queryID = selectAllFromTable(url, table)
46 | validataAllResultsFromNormalTable(queryID, true)
47 | }
48 |
49 | test(" select * unlimited") {
50 |
51 | val url = s"${jawsHiveUrl}run"
52 | val queryID = selectAllFromTable(url, table)
53 | validataAllResultsFromNormalTable(queryID, true)
54 | }
55 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/scala/api/TestSuite.scala:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.junit.JUnitRunner
5 | import org.scalatest.Suites
6 |
7 | @RunWith(classOf[JUnitRunner])
8 | class TestSuite extends Suites(new JawsIsUpTest, new RunApiTest, new GetDatabasesApiTest, new ParquetManagementApiTest,
9 | new RunHiveApiTest) {
10 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-integration-tests/src/test/scala/foundation/UtilOperations.scala:
--------------------------------------------------------------------------------
1 | package foundation
2 |
3 | class UtilOperations{
4 |
5 | }
6 | object UtilOperations {
7 |
8 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2014 Atigeo, LLC.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/assembly/archive.xml:
--------------------------------------------------------------------------------
1 |
5 | archive
6 |
7 | tar.gz
8 |
9 |
10 |
11 | ${basedir}/src/main/webapp
12 | resources/webapp
13 |
14 |
15 | ${project.build.directory}/temp_build
16 | /
17 |
18 |
19 | ${basedir}/conf
20 | /conf
21 |
22 |
23 | ${project.build.directory}
24 | /target/
25 |
26 | jaws-spark-sql-rest.jar
27 |
28 |
29 |
30 |
31 |
32 | ${basedir}/src/main/scripts/start-jaws.sh
33 | /bin/
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | spray.can.server {
2 | # uncomment the next line for making this an HTTPS example
3 | # ssl-encryption = on
4 | idle-timeout = 301 s
5 | request-timeout = 300 s
6 | }
7 |
8 | remote{
9 | akka {
10 | //loglevel = "DEBUG"
11 | actor {
12 | provider = "akka.remote.RemoteActorRefProvider"
13 | }
14 | remote {
15 | enabled-transports = ["akka.remote.netty.tcp"]
16 | log-sent-messages = on
17 | log-received-messages = on
18 | netty.tcp {
19 | transport-class = "akka.remote.transport.netty.NettyTransport"
20 | hostname = "devbox.local"
21 | port = 4042
22 | }
23 | }
24 | }
25 | }
26 |
27 | ############ spark configuration - see spark documentation ####################
28 | sparkConfiguration {
29 | spark-executor-memory=2g
30 | spark-mesos-coarse=false
31 | spark-scheduler-mode=FAIR
32 | spark-cores-max=2
33 | spark-master="spark://devbox.local:7077"
34 | spark-path="/home/ubuntu/latest-mssh/spark-1.1.0"
35 | spark-mesos-executor-home="/home/ubuntu/latest-mssh/spark-1.1.0"
36 | spark-default-parallelism=384
37 | spark-storage-memoryFraction=0.3
38 | spark-shuffle-memoryFraction=0.6
39 | spark-shuffle-compress=true
40 | spark-shuffle-spill-compress=true
41 | spark-reducer-maxMbInFlight=48
42 | spark-akka-frameSize=10000
43 | spark-akka-threads=4
44 | spark-akka-timeout=100
45 | spark-task-maxFailures=4
46 | spark-shuffle-consolidateFiles=true
47 | spark-deploy-spreadOut=true
48 | spark-shuffle-spill=false
49 | #Serialization settings commented until more tests are performed
50 | #spark-serializer="org.apache.spark.serializer.KryoSerializer"
51 | #spark-kryoserializer-buffer-mb=10
52 | #spark-kryoserializer-buffer-max-mb=64
53 | spark-kryo-referenceTracking=false
54 |
55 |
56 | }
57 |
58 | ######### application configuration ###################
59 | appConf{
60 | # the interface on which to start the spray server : localhost/ip/hostname
61 | server.interface=localhost
62 | # the cors filter allowed hosts
63 | cors-filter-allowed-hosts="*"
64 | # the default number of results retrieved on queries
65 | nr.of.results=100
66 | # the ip of the destination namenode - it is used when querying with unlimited number of results.
67 | rdd.destination.ip="devbox.local"
68 | # where to store the results in the case of an unlimited query. Possible results : hdfs/tachyon. Default hdfs
69 | rdd.destination.location="hdfs"
70 | # the remote doamain actor address
71 | remote.domain.actor=""
72 | #remote.domain.actor="devbox.local:port,devbox2.local:port"
73 | # application name
74 | application.name="Jaws"
75 | # the port on which to deploy the apis
76 | web.services.port=9080
77 | # the port on which to deploy the web sockets api (logs)
78 | web.sockets.port=8182
79 | # the number of threads used to execute shark commands
80 | nr.of.threads=10
81 | # implicit akka timeout
82 | timeout=1000000
83 | #where to log: app.logging.type = cassandra/hdfs
84 | app.logging.type=cassandra
85 | # folder where to write the results schema
86 | schemaFolder=jawsSchemaFolder
87 | # the path to the xpatterns-jaws in target folder
88 | jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/jaws-spark-sql-rest.jar
89 | # the path to the hdfs namenode
90 | hdfs-namenode-path="hdfs://devbox.local:8020"
91 | # the path to the tachyon namenode
92 | tachyon-namenode-path="tachyon://devbox.local:19998"
93 | #jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/test-app.jar
94 | }
95 |
96 | ########## hadoop configuration - skip this if you are using cassandra logging ########
97 | hadoopConf {
98 | namenode="hdfs://devbox.local:8020"
99 | replicationFactor=1
100 | # set on true if you want to start fresh (all the existing folders will be recreated)
101 | forcedMode=false
102 | # folder where to write the logs
103 | loggingFolder=jawsLogs
104 | # folder where to write the jobs states
105 | stateFolder=jawsStates
106 | # folder where to write the jobs details
107 | detailsFolder=jawsDetails
108 | # folder where to write the jobs results
109 | resultsFolder=jawsResultsFolder
110 | # folder where to write the jobs meta information
111 | metaInfoFolder=jawsMetainfoFolder
112 | # folder where to write the name of query information
113 | queryNameFolder=jawsQueryNameFolder
114 | # folder where to write the published queries
115 | queryPublishedFolder=jawsQueryPublishedFolder
116 | # folder where to write the unpublished queries
117 | queryUnpublishedFolder=jawsQueryUnpublishedFolder
118 | # folder where to write the parquet tables information
119 | parquetTablesFolder=parquetTablesFolder
120 | }
121 |
122 | ########## cassandra configuration - skip this if you are using hdfs logging ##########
123 | cassandraConf {
124 | cassandra.host="devbox.local:9160"
125 | cassandra.keyspace=xpatterns_jaws
126 | cassandra.cluster.name=Jaws
127 | }
128 |
129 |
130 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/resources/cassandra-schema.txt:
--------------------------------------------------------------------------------
1 | create keyspace xpatterns_jaws
2 | with strategy_options={replication_factor:1}
3 | and placement_strategy = 'NetworkTopologyStrategy'
4 | and strategy_options = {DC1:2,DC2:2};
5 |
6 | use xpatterns_jaws;
7 |
8 | create column family logs
9 | with comparator = 'CompositeType(Int32Type,UTF8Type,LongType)'
10 | AND key_validation_class = 'Int32Type'
11 | AND default_validation_class = 'BytesType';
12 |
13 |
14 | create column family results
15 | with comparator = 'CompositeType(UTF8Type,UTF8Type,Int32Type)'
16 | AND key_validation_class = 'Int32Type'
17 | AND default_validation_class = 'BytesType';
18 |
19 |
20 | create column family parquet_tables
21 | with comparator = 'UTF8Type'
22 | AND key_validation_class = 'UTF8Type'
23 | AND default_validation_class = 'BytesType';
24 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/resources/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | fs.defaultFS
7 | hdfs://devbox.local:8020
8 |
9 |
10 | fs.trash.interval
11 | 1
12 |
13 |
14 | io.compression.codecs
15 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec
16 |
17 |
18 | hadoop.security.authentication
19 | simple
20 |
21 |
22 | hadoop.security.authorization
23 | false
24 |
25 |
26 | hadoop.rpc.protection
27 | authentication
28 |
29 |
30 | hadoop.security.auth_to_local
31 | DEFAULT
32 |
33 |
34 | hadoop.proxyuser.oozie.hosts
35 | *
36 |
37 |
38 | hadoop.proxyuser.oozie.groups
39 | *
40 |
41 |
42 | hadoop.proxyuser.mapred.hosts
43 | *
44 |
45 |
46 | hadoop.proxyuser.mapred.groups
47 | *
48 |
49 |
50 | hadoop.proxyuser.flume.hosts
51 | *
52 |
53 |
54 | hadoop.proxyuser.flume.groups
55 | *
56 |
57 |
58 | hadoop.proxyuser.HTTP.hosts
59 | *
60 |
61 |
62 | hadoop.proxyuser.HTTP.groups
63 | *
64 |
65 |
66 | hadoop.proxyuser.hive.hosts
67 | *
68 |
69 |
70 | hadoop.proxyuser.hive.groups
71 | *
72 |
73 |
74 | hadoop.proxyuser.hue.hosts
75 | *
76 |
77 |
78 | hadoop.proxyuser.hue.groups
79 | *
80 |
81 |
82 | hadoop.proxyuser.httpfs.hosts
83 | *
84 |
85 |
86 | hadoop.proxyuser.httpfs.groups
87 | *
88 |
89 |
90 | hadoop.proxyuser.hdfs.groups
91 | *
92 |
93 |
94 | hadoop.proxyuser.hdfs.hosts
95 | *
96 |
97 |
98 | hadoop.security.group.mapping
99 | org.apache.hadoop.security.ShellBasedUnixGroupsMapping
100 |
101 |
102 | hadoop.security.instrumentation.requires.admin
103 | false
104 |
105 |
106 | io.file.buffer.size
107 | 65536
108 |
109 |
110 | hadoop.ssl.enabled
111 | false
112 |
113 |
114 | hadoop.ssl.require.client.cert
115 | false
116 | true
117 |
118 |
119 | hadoop.ssl.keystores.factory.class
120 | org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory
121 | true
122 |
123 |
124 | hadoop.ssl.server.conf
125 | ssl-server.xml
126 | true
127 |
128 |
129 | hadoop.ssl.client.conf
130 | ssl-client.xml
131 | true
132 |
133 |
134 | fs.tachyon.impl
135 | tachyon.hadoop.TFS
136 |
137 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/resources/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | hive.metastore.local
7 | false
8 |
9 |
10 | hive.metastore.uris
11 | thrift://flaviusi-VirtualBox.local:9083
12 |
13 |
14 | hive.metastore.client.socket.timeout
15 | 300
16 |
17 |
18 | hive.metastore.warehouse.dir
19 | /user/hive/warehouse
20 |
21 |
22 | hive.warehouse.subdir.inherit.perms
23 | true
24 |
25 |
26 | mapred.reduce.tasks
27 | -1
28 |
29 |
30 | hive.exec.reducers.bytes.per.reducer
31 | 1073741824
32 |
33 |
34 | hive.exec.reducers.max
35 | 999
36 |
37 |
38 | hive.metastore.execute.setugi
39 | true
40 |
41 |
42 | hive.support.concurrency
43 | false
44 |
45 |
46 | hive.zookeeper.quorum
47 | flaviusi-VirtualBox.local
48 |
49 |
50 | hive.zookeeper.client.port
51 | 2181
52 |
53 |
54 | hbase.zookeeper.quorum
55 | flaviusi-VirtualBox.local
56 |
57 |
58 | hbase.zookeeper.property.clientPort
59 | 2181
60 |
61 |
62 | hive.zookeeper.namespace
63 | hive_zookeeper_namespace_hive
64 |
65 |
66 | hive.server2.enable.doAs
67 | true
68 |
69 |
70 | fs.hdfs.impl.disable.cache
71 | true
72 |
73 |
74 | hive.server2.use.SSL
75 | false
76 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/resources/jaws-env.sh:
--------------------------------------------------------------------------------
1 | export TACHYON_WAREHOUSE_PATH=/sharktables
2 | export TACHYON_MASTER=tachyon://devbox.local:19998
3 | export MESOS_NATIVE_LIBRARY=/home/user/mesos-0.19.0/lib/libmesos.so
4 | export LOGGING_OPTS="-Dlog4j.configuration=log4j.properties -DJAWS_LOG_FOLDER=$logsFolder"
5 | export JAVA_OPTS="$LOGGING_OPTS -XX:PermSize=1g -XX:MaxPermSize=1g -Djava.library.path=/home/user/mesos-0.19.0/lib/libmesos.so:/home/user/hadoopNativeLibs"
6 |
7 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | #
3 | # The following properties set the logging levels and log appender. The
4 | # log4j.rootCategory variable defines the default log level and one or more
5 | # appenders. For the console, use 'S'. For the daily rolling file, use 'R'.
6 | # For an HTML formatted log, use 'H'.
7 | #
8 | # To override the default (rootCategory) log level, define a property of the
9 | # form (see below for available values):
10 | #
11 | # log4j.logger. =
12 | #
13 | # Available logger names:
14 | # TODO
15 | #
16 | # Possible Log Levels:
17 | # FATAL, ERROR, WARN, INFO, DEBUG
18 | #
19 | #------------------------------------------------------------------------------
20 |
21 | log4j.rootCategory = INFO, defaultConsole, defaultFile
22 |
23 | #------------------------------------------------------------------------------
24 | #
25 | # The following properties configure the console (stdout) appender.
26 | # See http://logging.apache.org/log4j/docs/api/index.html for details.
27 | #
28 | #------------------------------------------------------------------------------
29 | log4j.appender.defaultConsole = org.apache.log4j.ConsoleAppender
30 | log4j.appender.defaultConsole.layout = org.apache.log4j.PatternLayout
31 | log4j.appender.defaultConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
32 |
33 | #------------------------------------------------------------------------------
34 | #
35 | # The following properties configure the Daily Rolling File appender.
36 | # See http://logging.apache.org/log4j/docs/api/index.html for details.
37 | #
38 | #------------------------------------------------------------------------------
39 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender
40 | log4j.appender.defaultFile.File = ${JAWS_LOG_FOLDER}/jaws-spark-sql-rest.log
41 | log4j.appender.defaultFile.Append = true
42 | log4j.appender.defaultFile.DatePattern = '.'yyy-MM-dd
43 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout
44 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
45 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/resources/sharkSettings.txt:
--------------------------------------------------------------------------------
1 | set spark.sql.shuffle.partitions=12
2 | set hive.column.compress=true
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/ActorUtils.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import akka.actor.ActorRef
4 | import messages.ErrorMessage
5 |
6 | import scala.util.{Failure, Success, Try}
7 |
8 | /**
9 | * Created by emaorhian
10 | */
11 |
12 | object ActorsPaths {
13 |
14 | def REMOTE_ACTOR_SYSTEM_PREFIX_PATH = "akka.tcp://remoteSystem@"
15 |
16 | def LOCAL_SUPERVISOR_ACTOR_NAME = "LocalSupervisor"
17 | def LOCAL_SUPERVISOR_ACTOR_PATH = s"/user/$LOCAL_SUPERVISOR_ACTOR_NAME"
18 |
19 | def REMOTE_SUPERVISOR_ACTOR_NAME = "RemoteSupervisor"
20 | def REMOTE_SUPERVISOR_ACTOR_PATH = s"/user/$REMOTE_SUPERVISOR_ACTOR_NAME"
21 |
22 | def GET_QUERIES_ACTOR_NAME = "GetQueries"
23 | def GET_QUERIES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_QUERIES_ACTOR_NAME"
24 |
25 | def GET_TABLES_ACTOR_NAME = "GetTables"
26 | def GET_TABLES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_TABLES_ACTOR_NAME"
27 |
28 | def RUN_SCRIPT_ACTOR_NAME = "RunScript"
29 | def RUN_SCRIPT_ACTOR_PATH = s"$REMOTE_SUPERVISOR_ACTOR_PATH/$RUN_SCRIPT_ACTOR_NAME"
30 |
31 | def GET_LOGS_ACTOR_NAME = "GetLogs"
32 | def GET_LOGS_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_LOGS_ACTOR_NAME"
33 |
34 | def LOGS_WEBSOCKETS_ACTOR_NAME = "LogsWebsockets"
35 | def LOGS_WEBSOCKETS_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$LOGS_WEBSOCKETS_ACTOR_NAME"
36 |
37 | def GET_RESULTS_ACTOR_NAME = "GetResults"
38 | def GET_RESULTS_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_RESULTS_ACTOR_NAME"
39 |
40 | def GET_DATABASES_ACTOR_NAME = "GetDatabases"
41 | def GET_DATABASES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_DATABASES_ACTOR_NAME"
42 |
43 | def GET_DATASOURCE_SCHEMA_ACTOR_NAME = "GetDatasourceSchemaActor"
44 | def GET_DATASOURCE_SCHEMA_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_DATASOURCE_SCHEMA_ACTOR_NAME"
45 |
46 | def BALANCER_ACTOR_NAME = "Balancer"
47 | def BALANCER_ACTOR_PATH = s"$REMOTE_SUPERVISOR_ACTOR_NAME/$BALANCER_ACTOR_NAME"
48 |
49 | def DELETE_QUERY_ACTOR_NAME = "DeleteQuery"
50 | def DELETE_QUERY_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$DELETE_QUERY_ACTOR_NAME"
51 |
52 | def QUERY_NAME_ACTOR_NAME = "QueryName"
53 | def QUERY_NAME_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$QUERY_NAME_ACTOR_NAME"
54 |
55 | def REGISTER_PARQUET_TABLE_ACTOR_NAME = "RegisterParquetTable"
56 | def REGISTER_PARQUET_TABLE_ACTOR_PATH = s"$REMOTE_SUPERVISOR_ACTOR_PATH/$REGISTER_PARQUET_TABLE_ACTOR_NAME"
57 |
58 | def GET_PARQUET_TABLES_ACTOR_NAME = "GetParquetTables"
59 | def GET_PARQUET_TABLES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_PARQUET_TABLES_ACTOR_NAME"
60 | }
61 |
62 | object ActorOperations {
63 | def returnResult (tryResult : Try[Any], results : Any, errorMessage : String, senderActor: ActorRef){
64 | tryResult match {
65 | case Success(v) => senderActor ! results
66 | case Failure(e) => senderActor ! ErrorMessage(s"$errorMessage ${e.getMessage}")
67 | }
68 | }
69 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/BalancerActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import akka.actor.ActorRef
4 | import akka.actor.Actor
5 | import akka.actor.ActorSelection
6 | import server.Configuration
7 | import messages.CancelMessage
8 | import server.JawsController
9 | import apiactors.ActorsPaths._
10 | import messages.RegisterTableMessage
11 | import akka.pattern._
12 | import akka.util.Timeout
13 | import messages.UnregisterTableMessage
14 |
15 | class BalancerActor extends Actor {
16 | var runActors: Array[ActorSelection] = null
17 | var registerParquetTableActors: Array[ActorSelection] = null
18 | implicit val timeout = Timeout(Configuration.timeout.toInt)
19 |
20 | if (!Configuration.remoteDomainActor.getOrElse("").isEmpty) {
21 | Configuration.log4j.info(s"There are remote actors at: ${Configuration.remoteDomainActor}")
22 | runActors = for (actorIp <- Configuration.remoteDomainActor.get.split(",")) yield context.actorSelection(s"$REMOTE_ACTOR_SYSTEM_PREFIX_PATH$actorIp$RUN_SCRIPT_ACTOR_PATH")
23 | registerParquetTableActors = for (actorIp <- Configuration.remoteDomainActor.get.split(",")) yield context.actorSelection(s"$REMOTE_ACTOR_SYSTEM_PREFIX_PATH$actorIp$REGISTER_PARQUET_TABLE_ACTOR_PATH")
24 | }
25 |
26 | def receive = {
27 | case message: CancelMessage =>
28 | JawsController.runScriptActor ! message
29 | Option(runActors) match {
30 | case None => Configuration.log4j.info("[BalancerActor] There aren't any remote run actors to send the cancel message to!")
31 | case _ => runActors.foreach { dom => dom ! message }
32 | }
33 |
34 | case message @ (_: RegisterTableMessage | _: UnregisterTableMessage) => {
35 | Option(registerParquetTableActors) match {
36 | case None => Configuration.log4j.info("[BalancerActor] There aren't any remote register parquet actors to send the register table message to!")
37 | case _ => registerParquetTableActors.foreach { dom =>
38 | {
39 | Configuration.log4j.info(s"Sending message to the registering actor at ${dom}")
40 | dom ! message
41 | }
42 | }
43 | }
44 |
45 | sender ! JawsController.registerParquetTableActor ? message
46 | }
47 |
48 | }
49 |
50 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/DeleteQueryApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import com.xpatterns.jaws.data.contracts.DAL
4 | import akka.actor.Actor
5 | import messages.DeleteQueryMessage
6 | import server.Configuration
7 | import com.xpatterns.jaws.data.utils.QueryState
8 | import scala.concurrent._
9 | import ExecutionContext.Implicits.global
10 | import scala.util.{ Success, Failure }
11 | import messages.ErrorMessage
12 |
13 | class DeleteQueryApiActor(dals: DAL) extends Actor {
14 | override def receive = {
15 |
16 | case message: DeleteQueryMessage => {
17 |
18 | Configuration.log4j.info(s"[DeleteQueryApiActor]: deleting query with id ${message.queryID}")
19 |
20 | val currentSender = sender
21 |
22 | val deleteQueryFuture = future {
23 | dals.loggingDal.getState(message.queryID) match {
24 | case QueryState.IN_PROGRESS => throw new Exception(s"The query ${message.queryID} is IN_PROGRESS. Please wait for its completion or cancel it")
25 | case QueryState.NOT_FOUND => throw new Exception(s"The query ${message.queryID} was not found. Please provide a valid query id")
26 | case _ => {
27 | dals.loggingDal.deleteQuery(message.queryID)
28 | dals.resultsDal.deleteResults(message.queryID)
29 | s"Query ${message.queryID} was deleted"
30 | }
31 | }
32 | }
33 |
34 | deleteQueryFuture onComplete {
35 | case Success(successfulMessage) => currentSender ! successfulMessage
36 | case Failure(e) => currentSender ! ErrorMessage(s"DELETE query failed with the following message: ${e.getMessage}")
37 | }
38 | }
39 | }
40 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/GetDatabasesApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import akka.actor.Actor
4 | import akka.actor.actorRef2Scala
5 | import apiactors.ActorOperations._
6 | import com.google.common.base.Preconditions
7 | import server.LogsActor
8 | import akka.actor.ActorLogging
9 | import com.xpatterns.jaws.data.contracts.DAL
10 | import messages.GetDatabasesMessage
11 | import java.util.UUID
12 | import server.Configuration
13 | import org.apache.spark.sql.hive.HiveContext
14 | import org.apache.spark.scheduler.HiveUtils
15 | import implementation.HiveContextWrapper
16 | import scala.concurrent._
17 | import ExecutionContext.Implicits.global
18 | import scala.util.{ Success, Failure }
19 | import messages.ErrorMessage
20 | import scala.util.Try
21 | import com.xpatterns.jaws.data.DTO.Column
22 | import com.xpatterns.jaws.data.DTO.Databases
23 |
24 | /**
25 | * Created by emaorhian
26 | */
27 | class GetDatabasesApiActor(hiveContext: HiveContextWrapper, dals: DAL) extends Actor {
28 |
29 | override def receive = {
30 |
31 | case message: GetDatabasesMessage => {
32 | Configuration.log4j.info("[GetDatabasesApiActor]: showing databases")
33 | val currentSender = sender
34 |
35 | val getDatabasesFuture = future {
36 | val uuid = System.currentTimeMillis() + UUID.randomUUID().toString()
37 | val metadataQueryResult = HiveUtils.runMetadataCmd(hiveContext, "show databases").flatten
38 | new Databases(metadataQueryResult)
39 |
40 | }
41 |
42 | getDatabasesFuture onComplete {
43 | case Success(result) => currentSender ! result
44 | case Failure(e) => currentSender ! ErrorMessage(s"GET databases failed with the following message: ${e.getMessage}")
45 | }
46 | }
47 |
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/GetDatasourceSchemaActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import akka.actor.Actor
4 | import implementation.SchemaSettingsFactory.{ Hdfs, Hive, Parquet, Tachyon }
5 | import implementation.HiveContextWrapper
6 | import messages.GetDatasourceSchemaMessage
7 | import org.apache.spark.scheduler.HiveUtils
8 | import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
9 | import org.apache.spark.sql.parquet.SparkParquetUtility._
10 | import server.Configuration
11 | import scala.concurrent._
12 | import ExecutionContext.Implicits.global
13 | import scala.util.{ Success, Failure }
14 | import messages.ErrorMessage
15 | import com.xpatterns.jaws.data.utils.{Utils, AvroConverter}
16 | import org.apache.spark.sql.types.StructType
17 | import com.xpatterns.jaws.data.utils.Utils._
18 | /**
19 | * Handles the operations used for getting the schema
20 | */
21 | class GetDatasourceSchemaActor(hiveContext: HiveContextWrapper) extends Actor {
22 |
23 | def receive = {
24 | case request: GetDatasourceSchemaMessage =>
25 | val hostname: String = Configuration.rddDestinationIp.get
26 | val path: String = s"${request.path}"
27 | Configuration.log4j.info(s"Getting the data source schema for path $path, sourceType ${request.sourceType}, storageType ${request.storageType}")
28 | val currentSender = sender()
29 |
30 | val getDatasourceSchemaFuture = future {
31 | var result: StructType = null
32 | request.sourceType match {
33 | case Hive() =>
34 |
35 | try {
36 | val table = hiveContext.table(path)
37 | result = table.schema
38 | } catch {
39 | // When the table doesn't exists, throw a new exception with a better message.
40 | case _:NoSuchTableException => throw new Exception("Table does not exist")
41 | }
42 | case Parquet() =>
43 | request.storageType match {
44 | case Hdfs() =>
45 | val hdfsURL = HiveUtils.getHdfsPath(hostname)
46 |
47 | // Make sure that file exists
48 | checkFileExistence(request.hdfsConf, hdfsURL, path)
49 |
50 | result = hiveContext.readXPatternsParquet(hdfsURL, path).schema
51 | case Tachyon() =>
52 | val tachyonURL = HiveUtils.getTachyonPath(hostname)
53 |
54 | // Make sure that file exists
55 | checkFileExistence(request.hdfsConf, tachyonURL, path)
56 |
57 | result = hiveContext.readXPatternsParquet(tachyonURL, path).schema
58 | }
59 | }
60 |
61 | Configuration.log4j.info("Reading the avro schema from result df")
62 |
63 | val avroSchema = AvroConverter.getAvroSchema(result).toString(true)
64 | Configuration.log4j.debug(avroSchema)
65 | avroSchema
66 | }
67 |
68 | getDatasourceSchemaFuture onComplete {
69 | case Success(result) => currentSender ! result
70 | case Failure(e) => currentSender ! ErrorMessage(s"GET data source schema failed with the following message: ${getCompleteStackTrace(e)}")
71 | }
72 |
73 | case request: Any => Configuration.log4j.error(request.toString)
74 | }
75 |
76 | /**
77 | * Checks the file existence on the sent file system. If the file is not found an exception is thrown
78 | * @param hdfsConfiguration the hdfs configuration
79 | * @param defaultFSUrl the file system default path. It is different for hdfs and for tachyon.
80 | * @param filePath the path for the file for which the existence is checked
81 | */
82 | private def checkFileExistence(hdfsConfiguration: org.apache.hadoop.conf.Configuration, defaultFSUrl:String, filePath:String) = {
83 | val newConf = new org.apache.hadoop.conf.Configuration(hdfsConfiguration)
84 | newConf.set("fs.defaultFS", defaultFSUrl)
85 | if (!Utils.checkFileExistence(defaultFSUrl + filePath, newConf)) {
86 | throw new Exception("File path does not exist")
87 | }
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/GetLogsApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import akka.actor.Actor
4 | import akka.actor.actorRef2Scala
5 | import apiactors.ActorOperations._
6 | import com.google.common.base.Preconditions
7 | import server.LogsActor
8 | import akka.actor.ActorLogging
9 | import com.xpatterns.jaws.data.contracts.DAL
10 | import messages.GetLogsMessage
11 | import org.joda.time.DateTime
12 | import java.util.Collection
13 | import server.Configuration
14 | import com.xpatterns.jaws.data.DTO.Logs
15 | import com.xpatterns.jaws.data.DTO.Log
16 | import scala.concurrent._
17 | import ExecutionContext.Implicits.global
18 | import scala.util.{ Success, Failure }
19 | import messages.ErrorMessage
20 |
21 | /**
22 | * Created by emaorhian
23 | */
24 | class GetLogsApiActor(dals: DAL) extends Actor {
25 |
26 | override def receive = {
27 |
28 | case message: GetLogsMessage => {
29 | Configuration.log4j.info("[GetLogsApiActor]: retrieving logs for: " + message.queryID)
30 | val currentSender = sender
31 |
32 | val getLogsFuture = future {
33 | val limit = Option(message.limit) getOrElse(100)
34 | val startDate = Option(message.startDate) getOrElse(new DateTime(1977, 1, 1, 1, 1, 1, 1).getMillis())
35 |
36 | dals.loggingDal.getLogs(message.queryID, startDate, limit)
37 | }
38 | getLogsFuture onComplete {
39 | case Success(result) => currentSender ! result
40 | case Failure(e) => currentSender ! ErrorMessage(s"GET logs failed with the following message: ${e.getMessage}")
41 | }
42 | }
43 | }
44 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/GetParquetTablesApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import messages._
4 | import scala.concurrent._
5 | import ExecutionContext.Implicits.global
6 | import scala.util.{ Success, Failure }
7 | import messages.ErrorMessage
8 | import spray.http.StatusCodes
9 | import scala.concurrent.Await
10 | import com.xpatterns.jaws.data.contracts.DAL
11 | import java.util.UUID
12 | import akka.util.Timeout
13 | import server.Configuration
14 | import akka.pattern.ask
15 | import org.apache.spark.scheduler.HiveUtils
16 | import implementation.HiveContextWrapper
17 | import akka.actor.Actor
18 | import com.xpatterns.jaws.data.DTO.Tables
19 | import scala.util.{ Try, Success, Failure }
20 | import apiactors.ActorOperations._
21 | import com.xpatterns.jaws.data.DTO.Column
22 | import com.xpatterns.jaws.data.DTO.Table
23 | import com.xpatterns.jaws.data.utils.CustomConverter
24 | /**
25 | * Created by emaorhian
26 | */
27 |
28 | class GetParquetTablesApiActor(hiveContext: HiveContextWrapper, dals: DAL) extends Actor {
29 |
30 | override def receive = {
31 |
32 | case message: GetParquetTablesMessage => {
33 | val currentSender = sender
34 |
35 | val getTablesFuture = future {
36 | if (message.tables.isEmpty) {
37 | val tables = dals.parquetTableDal.listParquetTables
38 | message.describe match {
39 | case true => Array(Tables("None", tables map (pTable => getFields(pTable.name))))
40 | case false => Array(Tables("None", tables map (pTable => Table(pTable.name, Array.empty, Array.empty))))
41 | }
42 |
43 | } else {
44 | var tablesMap = message.tables.map(table => {
45 | if (dals.parquetTableDal.tableExists(table) == false)
46 | throw new Exception(s" Table $table does not exist")
47 | getFields(table)
48 | })
49 | Array(Tables("None", tablesMap))
50 | }
51 | }
52 |
53 | getTablesFuture onComplete {
54 | case Success(result) => currentSender ! result
55 | case Failure(e) => currentSender ! ErrorMessage(s"GET tables failed with the following message: ${e.getMessage}")
56 | }
57 | }
58 | }
59 |
60 | def getFields(tableName: String): Table = {
61 | val tableSchemaRDD = hiveContext.table(tableName)
62 | val schema = CustomConverter.getCustomSchema(tableSchemaRDD.schema)
63 |
64 | Table(tableName, schema, Array.empty)
65 | }
66 |
67 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/GetQueriesApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import akka.actor.Actor
4 | import com.xpatterns.jaws.data.contracts.DAL
5 | import server.Configuration
6 | import scala.concurrent._
7 | import ExecutionContext.Implicits.global
8 | import scala.util.{ Success, Failure }
9 | import messages._
10 | /**
11 | * Created by emaorhian
12 | */
13 | class GetQueriesApiActor(dals: DAL) extends Actor {
14 |
15 | override def receive = {
16 |
17 | case message: GetPaginatedQueriesMessage =>
18 |
19 | Configuration.log4j.info("[GetQueriesApiActor]: retrieving " + message.limit + " number of queries starting with " + message.startQueryID)
20 | val currentSender = sender()
21 | val getQueriesFuture = future {
22 | dals.loggingDal.getQueries(message.startQueryID, message.limit)
23 | }
24 |
25 | getQueriesFuture onComplete {
26 | case Success(result) => currentSender ! result
27 | case Failure(e) => currentSender ! ErrorMessage(s"GET queries failed with the following message: ${e.getMessage}")
28 | }
29 |
30 | case message: GetQueriesMessage =>
31 | Configuration.log4j.info("[GetQueryInfoApiActor]: retrieving the query information for " + message.queryIDs)
32 |
33 | val currentSender = sender()
34 |
35 | val getQueryInfoFuture = future {
36 | dals.loggingDal.getQueries(message.queryIDs)
37 | }
38 |
39 | getQueryInfoFuture onComplete {
40 | case Success(result) => currentSender ! result
41 | case Failure(e) => currentSender ! ErrorMessage(s"GET query info failed with the following message: ${e.getMessage}")
42 | }
43 |
44 | case message: GetQueriesByName =>
45 | Configuration.log4j.info("[GetQueryInfoApiActor]: retrieving the queries for " + message.name)
46 |
47 | val currentSender = sender()
48 |
49 | val getQueryInfoFuture = future {
50 | dals.loggingDal.getQueriesByName(message.name)
51 | }
52 |
53 | getQueryInfoFuture onComplete {
54 | case Success(result) => currentSender ! result
55 | case Failure(e) => currentSender ! ErrorMessage(s"GET query info failed with the following message: ${e.getMessage}")
56 | }
57 |
58 | case _: GetPublishedQueries =>
59 | Configuration.log4j.info("[GetQueryInfoApiActor]: retrieving the published queries ")
60 |
61 | val currentSender = sender()
62 |
63 | val getQueryInfoFuture = future {
64 | dals.loggingDal.getPublishedQueries()
65 | }
66 |
67 | getQueryInfoFuture onComplete {
68 | case Success(result) => currentSender ! result
69 | case Failure(e) => currentSender ! ErrorMessage(s"GET published queries failed with the following message: ${e.getMessage}")
70 | }
71 | }
72 |
73 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/GetResultsApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import apiactors.ActorOperations._
4 | import scala.concurrent._
5 | import org.apache.spark.rdd.RDD
6 | import org.apache.spark.sql.hive.HiveContext
7 | import com.google.common.base.Preconditions
8 | import com.xpatterns.jaws.data.DTO.Column
9 | import com.xpatterns.jaws.data.utils.Utils
10 | import server.Configuration
11 | import akka.actor.Actor
12 | import akka.actor.actorRef2Scala
13 | import messages.GetResultsMessage
14 | import net.liftweb.json._
15 | import net.liftweb.json.DefaultFormats
16 | import com.xpatterns.jaws.data.contracts.DAL
17 | import org.apache.spark.scheduler.HiveUtils
18 | import ExecutionContext.Implicits.global
19 | import scala.util.Try
20 | import scala.util.Success
21 | import scala.util.Failure
22 | import messages.ErrorMessage
23 | import messages.ResultFormat._
24 | import com.xpatterns.jaws.data.DTO.AvroResult
25 | import com.xpatterns.jaws.data.DTO.CustomResult
26 | import com.xpatterns.jaws.data.utils.ResultsConverter
27 | import org.apache.spark.sql.catalyst.expressions.Row
28 | import com.xpatterns.jaws.data.DTO.AvroBinaryResult
29 |
30 | /**
31 | * Created by emaorhian
32 | */
33 | class GetResultsApiActor(hdfsConf: org.apache.hadoop.conf.Configuration, hiveContext: HiveContext, dals: DAL) extends Actor {
34 | implicit val formats = DefaultFormats
35 | override def receive = {
36 |
37 | case message: GetResultsMessage =>
38 | {
39 | Configuration.log4j.info(s"[GetResultsMessage]: retrieving results for: ${message.queryID} in the ${message.format}")
40 | val currentSender = sender
41 | val getResultsFuture = future {
42 |
43 | val (offset, limit) = getOffsetAndLimit(message)
44 | val metaInfo = dals.loggingDal.getMetaInfo(message.queryID)
45 |
46 | metaInfo.resultsDestination match {
47 | // cassandra
48 | case 0 => {
49 | var endIndex = offset + limit
50 | message.format match {
51 | case AVRO_BINARY_FORMAT => new AvroBinaryResult(getDBAvroResults(message.queryID, offset, endIndex))
52 | case AVRO_JSON_FORMAT => getDBAvroResults(message.queryID, offset, endIndex).result
53 | case _ => getCustomResults(message.queryID, offset, endIndex)
54 | }
55 |
56 | }
57 | //hdfs
58 | case 1 => {
59 | val destinationPath = HiveUtils.getHdfsPath(Configuration.rddDestinationIp.get)
60 | getFormattedResult(message.format, getResults(offset, limit, destinationPath))
61 |
62 | }
63 | //tachyon
64 | case 2 => {
65 | val destinationPath = HiveUtils.getTachyonPath(Configuration.rddDestinationIp.get)
66 | getFormattedResult(message.format, getResults(offset, limit, destinationPath))
67 |
68 | }
69 | case _ => {
70 | Configuration.log4j.info("[GetResultsMessage]: Unidentified results path : " + metaInfo.resultsDestination)
71 | null
72 | }
73 | }
74 | }
75 |
76 | getResultsFuture onComplete {
77 | case Success(results) => currentSender ! results
78 | case Failure(e) => currentSender ! ErrorMessage(s"GET results failed with the following message: ${e.getMessage}")
79 | }
80 |
81 | }
82 |
83 | def getResults(offset: Int, limit: Int, destinationPath: String): ResultsConverter = {
84 | val schemaBytes = Utils.readBytes(hdfsConf, Configuration.schemaFolder.getOrElse("jawsSchemaFolder") + "/" + message.queryID)
85 | val schema = HiveUtils.deserializaSchema(schemaBytes)
86 |
87 | val resultsRDD: RDD[Tuple2[Object, Array[Object]]] = hiveContext.sparkContext.objectFile(HiveUtils.getRddDestinationPath(message.queryID, destinationPath))
88 |
89 | val filteredResults = resultsRDD.filter(tuple => tuple._1.asInstanceOf[Long] >= offset && tuple._1.asInstanceOf[Long] < offset + limit).collect()
90 |
91 | val resultRows = filteredResults map { case (index, row) => Row.fromSeq(row) }
92 |
93 | new ResultsConverter(schema, resultRows)
94 |
95 | }
96 | }
97 |
98 | def getOffsetAndLimit(message: GetResultsMessage): Tuple2[Int, Int] = {
99 | var offset = message.offset
100 | var limit = message.limit
101 |
102 | Option(offset) match {
103 | case None => {
104 | Configuration.log4j.info("[GetResultsMessage]: offset null... setting it on 0")
105 | offset = 0
106 | }
107 | case _ => {
108 | Configuration.log4j.info("[GetResultsMessage]: offset = " + offset)
109 | }
110 | }
111 |
112 | Option(limit) match {
113 | case None => {
114 | Configuration.log4j.info("[GetResultsMessage]: limit null... setting it on 100")
115 | limit = 100
116 | }
117 | case _ => {
118 | Configuration.log4j.info("[GetResultsMessage]: limit = " + limit)
119 | }
120 | }
121 | (offset, limit)
122 | }
123 |
124 | private def getDBAvroResults(queryID: String, offset: Int, limit: Int) = {
125 | val result = dals.resultsDal.getAvroResults(queryID)
126 | val lastResultIndex = if (limit > result.result.length) result.result.length else limit
127 | new AvroResult(result.schema, result.result.slice(offset, lastResultIndex))
128 | }
129 |
130 | private def getCustomResults(queryID: String, offset: Int, limit: Int) = {
131 | val result = dals.resultsDal.getCustomResults(queryID)
132 | val lastResultIndex = if (limit > result.result.length) result.result.length else limit
133 | new CustomResult(result.schema, result.result.slice(offset, lastResultIndex))
134 | }
135 |
136 | private def getFormattedResult(format: String, resultsConverter: ResultsConverter) = {
137 | format match {
138 | case AVRO_BINARY_FORMAT => resultsConverter.toAvroBinaryResults()
139 | case AVRO_JSON_FORMAT => resultsConverter.toAvroResults().result
140 | case _ => resultsConverter.toCustomResults()
141 | }
142 | }
143 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/QueryPropertiesApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import akka.actor.Actor
4 | import com.xpatterns.jaws.data.contracts.DAL
5 | import com.xpatterns.jaws.data.utils.QueryState
6 | import messages.{UpdateQueryPropertiesMessage, ErrorMessage}
7 | import server.Configuration
8 | import scala.concurrent._
9 | import ExecutionContext.Implicits.global
10 |
11 | import scala.concurrent._
12 | import scala.util.{Failure, Success}
13 |
14 | /**
15 | * Handles the properties operation on a query
16 | */
17 | class QueryPropertiesApiActor (dals: DAL) extends Actor {
18 | override def receive = {
19 | case message: UpdateQueryPropertiesMessage =>
20 |
21 | Configuration.log4j.info(s"[QueryPropertiesApiActor]: updating query id ${message.queryID} with name ${message.name}")
22 |
23 | val currentSender = sender()
24 |
25 | val updateQueryFuture = future {
26 | dals.loggingDal.getState(message.queryID) match {
27 | case QueryState.NOT_FOUND => throw new Exception(s"The query ${message.queryID} was not found. Please provide a valid query id")
28 | case _ =>
29 | dals.loggingDal.setQueryProperties(message.queryID, message.name, message.description, message.published, message.overwrite)
30 | s"Query information for ${message.queryID} has been updated"
31 | }
32 | }
33 |
34 | updateQueryFuture onComplete {
35 | case Success(successfulMessage) => currentSender ! successfulMessage
36 | case Failure(e) => currentSender ! ErrorMessage(s"Updating query failed with the following message: ${e.getMessage}")
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/apiactors/RegisterParquetTableApiActor.scala:
--------------------------------------------------------------------------------
1 | package apiactors
2 |
3 | import implementation.HiveContextWrapper
4 | import com.xpatterns.jaws.data.contracts.DAL
5 | import akka.actor.Actor
6 | import messages.RegisterTableMessage
7 | import server.Configuration
8 | import org.apache.spark.scheduler.HiveUtils
9 | import messages.UnregisterTableMessage
10 | import scala.concurrent._
11 | import ExecutionContext.Implicits.global
12 | import scala.util.{ Success, Failure }
13 | import messages.ErrorMessage
14 |
15 | class RegisterParquetTableApiActor(hiveContext: HiveContextWrapper, dals: DAL) extends Actor {
16 | override def receive = {
17 |
18 | case message: RegisterTableMessage => {
19 | Configuration.log4j.info(s"[RegisterParquetTableApiActor]: registering table ${message.name} at ${message.path}")
20 | val currentSender = sender
21 |
22 | val registerTableFuture = future {
23 | val (namenode, folderPath) = if (message.namenode.isEmpty) HiveUtils.splitPath(message.path) else (message.namenode, message.path)
24 | HiveUtils.registerParquetTable(hiveContext, message.name, namenode, folderPath, dals)
25 | }
26 |
27 | registerTableFuture onComplete {
28 | case Success(_) => currentSender ! s"Table ${message.name} was registered"
29 | case Failure(e) => currentSender ! ErrorMessage(s"RegisterTable failed with the following message: ${e.getMessage}")
30 | }
31 | }
32 |
33 | case message: UnregisterTableMessage => {
34 | Configuration.log4j.info(s"[RegisterParquetTableApiActor]: Unregistering table ${message.name}")
35 | val currentSender = sender
36 |
37 | val unregisterTableFuture = future {
38 | // unregister table
39 | hiveContext.getCatalog.unregisterTable(Seq(message.name))
40 | dals.parquetTableDal.deleteParquetTable(message.name)
41 | }
42 |
43 | unregisterTableFuture onComplete {
44 | case Success(result) => currentSender ! s"Table ${message.name} was unregistered"
45 | case Failure(e) => currentSender ! ErrorMessage(s"UnregisterTable failed with the following message: ${e.getMessage}")
46 | }
47 | }
48 | }
49 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/customs/CORSDirectives.scala:
--------------------------------------------------------------------------------
1 | package customs
2 |
3 | import spray.http._
4 | import spray.routing._
5 | import spray.http.HttpHeaders._
6 | import spray.http.HttpMethod
7 | /**
8 | * Created by emaorhian
9 | */
10 | trait CORSDirectives { this: HttpService =>
11 | private def respondWithCORSHeaders(origin: String, rh: Seq[HttpHeader]) = {
12 | var headers: List[HttpHeader] = List(
13 | HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(List(origin))),
14 | HttpHeaders.`Access-Control-Allow-Credentials`(true),
15 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid")
16 | ) ++ rh.toList
17 |
18 | respondWithHeaders(headers)
19 | }
20 | private def respondWithCORSHeadersAllOrigins(rh: Seq[HttpHeader]) = {
21 | var headers: List[HttpHeader] = List(
22 | HttpHeaders.`Access-Control-Allow-Origin`(AllOrigins),
23 | HttpHeaders.`Access-Control-Allow-Credentials`(true),
24 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid")
25 | ) ++ rh.toList
26 |
27 | respondWithHeaders(headers)
28 | }
29 |
30 | def corsFilter(origins: List[String], rh: HttpHeader*)(route: Route) =
31 | if (origins.contains("*"))
32 | respondWithCORSHeadersAllOrigins(rh)(route)
33 | else
34 | optionalHeaderValueByName("Origin") {
35 | case None =>
36 | route
37 | case Some(clientOrigin) => {
38 | if (origins.contains(clientOrigin))
39 | respondWithCORSHeaders(clientOrigin, rh)(route)
40 | else {
41 | // Maybe, a Rejection will fit better
42 | complete(StatusCodes.Forbidden, "Invalid origin")
43 | }
44 | }
45 | }
46 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/customs/CustomDirectives.scala:
--------------------------------------------------------------------------------
1 | package customs
2 |
3 | import spray.routing._
4 | import Directives._
5 | import spray.http.StatusCodes.ClientError
6 |
7 | object CustomDirectives {
8 |
9 | def validateCondition(condition: Boolean, message: String, rejectStatusCode: ClientError): Directive0 = {
10 | if (condition == false) {
11 | complete(rejectStatusCode, message)
12 | } else
13 | pass
14 | }
15 |
16 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/customs/CustomIndexer.scala:
--------------------------------------------------------------------------------
1 | package customs
2 | import scala.Array.canBuildFrom
3 | import scala.Iterator
4 | import org.apache.spark.rdd.RDD
5 | import server.Configuration
6 |
7 | /**
8 | * Created by emaorhian
9 | */
10 | class CustomIndexer {
11 |
12 | def indexRdd(rdd: RDD[Array[Any]]): RDD[Tuple2[Long, Array[Any]]] = {
13 | val partitionCount = rdd.mapPartitionsWithIndex { (pid, iter) => Iterator((pid, iter.size)) }.collect
14 |
15 | var indexes = Array[Int](0)
16 | Configuration.log4j.debug("NumberOfPartitions is: " + partitionCount.size)
17 |
18 | val resultsNumber = partitionCount.foldLeft(0)((sizeSum, partInfo) => {
19 | indexes = indexes :+ (sizeSum + partInfo._2)
20 | sizeSum + partInfo._2
21 | })
22 |
23 | Configuration.log4j.debug("Number of results is: " + resultsNumber)
24 |
25 | val broadcastedIndexes = rdd.sparkContext.broadcast(indexes)
26 |
27 | //index each row
28 | val indexedRdd = rdd.mapPartitionsWithIndex { (index, iterator) =>
29 | var z = Array[Tuple2[Long, Array[Any]]]()
30 | var startIndex: Long = broadcastedIndexes.value(index)
31 | for (element <- iterator) {
32 | z = z ++ Array((startIndex, element))
33 | startIndex = startIndex + 1
34 | }
35 | z.iterator
36 | }
37 |
38 | return indexedRdd
39 | }
40 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/implementation/HiveContextWrapper.scala:
--------------------------------------------------------------------------------
1 | package implementation
2 |
3 | import org.apache.spark.sql.hive.HiveContext
4 | import org.apache.spark.SparkContext
5 |
6 | class HiveContextWrapper(sc: SparkContext) extends HiveContext(sc: SparkContext){
7 |
8 | def runMetadataSql(sql: String): Seq[String] = {
9 | runSqlHive(sql)
10 | }
11 |
12 | def getSparkContext() : SparkContext = {
13 | sc
14 | }
15 |
16 | def getCatalog = {
17 | catalog
18 | }
19 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/implementation/SchemaSettingsFactory.scala:
--------------------------------------------------------------------------------
1 | package implementation
2 |
3 | import server.Configuration
4 |
5 | /**
6 | * Created by lucianm on 12.02.2015.
7 | */
8 | object SchemaSettingsFactory {
9 |
10 | trait SourceType
11 |
12 | case class Parquet() extends SourceType
13 |
14 | case class Hive() extends SourceType
15 |
16 | trait StorageType
17 |
18 | case class Hdfs() extends StorageType
19 |
20 | case class Tachyon() extends StorageType
21 |
22 | val HIVE: String = "hive"
23 |
24 | val PARQUET: String = "parquet"
25 |
26 | def getSourceType(sourceType: String): SourceType = {
27 | if (sourceType.equalsIgnoreCase(HIVE)) new Hive
28 | else if (sourceType.equalsIgnoreCase(PARQUET)) new Parquet
29 | else throw new Exception(Configuration.UNSUPPORTED_SOURCE_TYPE)
30 | }
31 |
32 | val HDFS: String = "hdfs"
33 |
34 | val TACHYON: String = "tachyon"
35 |
36 | def getStorageType(storageType: String): StorageType = {
37 | if (storageType.equalsIgnoreCase(HDFS)) new Hdfs
38 | else if (storageType.equalsIgnoreCase(TACHYON)) new Tachyon
39 | else throw new Exception(Configuration.UNSUPPORTED_STORAGE_TYPE)
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/messages/Messages.scala:
--------------------------------------------------------------------------------
1 | package messages
2 |
3 | import implementation.SchemaSettingsFactory.{ StorageType, SourceType }
4 | import org.apache.hadoop.conf.Configuration
5 |
6 | /**
7 | * Created by emaorhian
8 | */
9 | case class CancelMessage(queryID: String) extends Serializable
10 | case class GetDatabasesMessage()
11 | case class GetQueriesMessage(queryIDs: Seq[String])
12 | case class GetQueriesByName(name: String)
13 | case class GetPublishedQueries()
14 | case class GetPaginatedQueriesMessage(startQueryID: String, limit: Int)
15 | case class GetLogsMessage(queryID: String, startDate: Long, limit: Int)
16 | case class GetResultsMessage(queryID: String, offset: Int, limit: Int, format : String)
17 | case class GetTablesMessage(database: String, describe: Boolean, tables: Array[String])
18 | case class GetExtendedTablesMessage(database: String, tables: Array[String])
19 | case class GetFormattedTablesMessage(database: String, tables: Array[String])
20 | case class RunQueryMessage(name: String)
21 | case class RunScriptMessage(script: String, limited: Boolean, maxNumberOfResults: Long, rddDestination: String)
22 | case class RunParquetMessage(script: String, tablePath: String, namenode:String, table: String, limited: Boolean, maxNumberOfResults: Long, rddDestination: String)
23 | case class GetDatasourceSchemaMessage(path: String, sourceType: SourceType, storageType: StorageType, hdfsConf:Configuration)
24 | case class ErrorMessage(message: String)
25 | case class DeleteQueryMessage(queryID: String)
26 | case class RegisterTableMessage(name: String, path: String, namenode: String)
27 | case class UnregisterTableMessage(name: String)
28 | case class GetParquetTablesMessage(tables: Array[String], describe: Boolean)
29 | case class UpdateQueryPropertiesMessage(queryID:String, name:Option[String], description:Option[String], published:Option[Boolean], overwrite:Boolean)
30 |
31 |
32 | object ResultFormat {
33 | val AVRO_BINARY_FORMAT = "avrobinary"
34 | val AVRO_JSON_FORMAT = "avrojson"
35 | val DEFAULT_FORMAT = "default"
36 | }
37 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/Configuration.scala:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import com.typesafe.config.Config
4 | import org.apache.log4j.Logger
5 |
6 | /**
7 | * Holds the configuration properties for Jaws
8 | */
9 | object Configuration {
10 |
11 | import com.typesafe.config.ConfigFactory
12 |
13 | val log4j = Logger.getLogger(Configuration.getClass)
14 |
15 | private val conf = ConfigFactory.load
16 | conf.checkValid(ConfigFactory.defaultReference)
17 |
18 | val remote = conf.getConfig("remote")
19 | val sparkConf = conf.getConfig("sparkConfiguration")
20 | val appConf = conf.getConfig("appConf")
21 | val hadoopConf = conf.getConfig("hadoopConf")
22 | val cassandraConf = conf.getConfig("cassandraConf")
23 |
24 | // cassandra configuration
25 | val cassandraHost = getStringConfiguration(cassandraConf, "cassandra.host")
26 | val cassandraKeyspace = getStringConfiguration(cassandraConf, "cassandra.keyspace")
27 | val cassandraClusterName = getStringConfiguration(cassandraConf, "cassandra.cluster.name")
28 |
29 | //hadoop conf
30 | val replicationFactor = getStringConfiguration(hadoopConf, "replicationFactor")
31 | val forcedMode = getStringConfiguration(hadoopConf, "forcedMode")
32 | val loggingFolder = getStringConfiguration(hadoopConf, "loggingFolder")
33 | val stateFolder = getStringConfiguration(hadoopConf, "stateFolder")
34 | val detailsFolder = getStringConfiguration(hadoopConf, "detailsFolder")
35 | val resultsFolder = getStringConfiguration(hadoopConf, "resultsFolder")
36 | val metaInfoFolder = getStringConfiguration(hadoopConf, "metaInfoFolder")
37 | val queryNameFolder = getStringConfiguration(hadoopConf, "queryNameFolder")
38 | val queryPublishedFolder = getStringConfiguration(hadoopConf, "queryPublishedFolder")
39 | val queryUnpublishedFolder = getStringConfiguration(hadoopConf, "queryUnpublishedFolder")
40 | val namenode = getStringConfiguration(hadoopConf, "namenode")
41 | val parquetTablesFolder = getStringConfiguration(hadoopConf, "parquetTablesFolder")
42 |
43 | //app configuration
44 | val serverInterface = getStringConfiguration(appConf, "server.interface")
45 | val loggingType = getStringConfiguration(appConf, "app.logging.type")
46 | val rddDestinationIp = getStringConfiguration(appConf, "rdd.destination.ip")
47 | val rddDestinationLocation = getStringConfiguration(appConf, "rdd.destination.location")
48 | val remoteDomainActor = getStringConfiguration(appConf, "remote.domain.actor")
49 | val applicationName = getStringConfiguration(appConf, "application.name")
50 | val webServicesPort = getStringConfiguration(appConf, "web.services.port")
51 | val webSocketsPort = getStringConfiguration(appConf, "web.sockets.port")
52 | val nrOfThreads = getStringConfiguration(appConf, "nr.of.threads")
53 | val timeout = getStringConfiguration(appConf, "timeout").getOrElse("10000").toInt
54 | val schemaFolder = getStringConfiguration(appConf, "schemaFolder")
55 | val numberOfResults = getStringConfiguration(appConf, "nr.of.results")
56 | val corsFilterAllowedHosts = getStringConfiguration(appConf, "cors-filter-allowed-hosts")
57 | val jarPath = getStringConfiguration(appConf, "jar-path")
58 | val hdfsNamenodePath = getStringConfiguration(appConf, "hdfs-namenode-path").getOrElse("")
59 | val tachyonNamenodePath = getStringConfiguration(appConf, "tachyon-namenode-path").getOrElse("")
60 |
61 | val LIMIT_EXCEPTION_MESSAGE = "The limit is null!"
62 | val SCRIPT_EXCEPTION_MESSAGE = "The script is empty or null!"
63 | val UUID_EXCEPTION_MESSAGE = "The uuid is empty or null!"
64 | val META_INFO_EXCEPTION_MESSAGE = "The metainfo is null!"
65 | val LIMITED_EXCEPTION_MESSAGE = "The limited flag is null!"
66 | val RESULTS_NUMBER_EXCEPTION_MESSAGE = "The results number is null!"
67 | val FILE_EXCEPTION_MESSAGE = "The file is null or empty!"
68 | val QUERY_NAME_MESSAGE = "The query name is null or empty!"
69 | val FILE_PATH_TYPE_EXCEPTION_MESSAGE = "The file path must be hdfs or tachyon"
70 | val DATABASE_EXCEPTION_MESSAGE = "The database is null or empty!"
71 | val TABLE_EXCEPTION_MESSAGE = "The table name is null or empty!"
72 | val PATH_IS_EMPTY = "Request parameter \'path\' must not be empty!"
73 | val TABLE_ALREADY_EXISTS_EXCEPTION_MESSAGE = "The table already exists!"
74 | val UNSUPPORTED_SOURCE_TYPE = "Unsupported value for parameter \'sourceType\' !"
75 | val UNSUPPORTED_STORAGE_TYPE = "Unsupported value for parameter \'storageType\' !"
76 |
77 | def getStringConfiguration(configuration: Config, configurationPath: String): Option[String] = {
78 | if (configuration.hasPath(configurationPath)) Option(configuration.getString(configurationPath).trim) else Option(null)
79 | }
80 |
81 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/JawsController.scala:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import java.net.InetAddress
4 | import server.api._
5 | import scala.collection.JavaConverters._
6 | import com.typesafe.config.Config
7 | import com.xpatterns.jaws.data.utils.Utils
8 | import akka.actor.ActorSystem
9 | import customs.CORSDirectives
10 | import com.xpatterns.jaws.data.impl.CassandraDal
11 | import com.xpatterns.jaws.data.impl.HdfsDal
12 | import spray.routing.Directive.pimpApply
13 | import spray.routing.SimpleRoutingApp
14 | import com.xpatterns.jaws.data.contracts.DAL
15 | import org.apache.spark.scheduler.HiveUtils
16 | import implementation.HiveContextWrapper
17 | import org.apache.spark.SparkContext
18 | import org.apache.spark.scheduler.LoggingListener
19 | import org.apache.spark.SparkConf
20 |
21 | /**
22 | * Created by emaorhian
23 | */
24 | object JawsController extends App with UIApi with IndexApi with ParquetApi with MetadataApi with QueryManagementApi
25 | with SimpleRoutingApp with CORSDirectives {
26 | initialize()
27 |
28 | // initialize parquet tables
29 | initializeParquetTables()
30 |
31 | implicit val spraySystem: ActorSystem = ActorSystem("spraySystem")
32 |
33 | startServer(interface = Configuration.serverInterface.getOrElse(InetAddress.getLocalHost.getHostName),
34 | port = Configuration.webServicesPort.getOrElse("8080").toInt) {
35 | pathPrefix("jaws") {
36 | uiRoute ~ indexRoute ~ runLogsResultsQueriesCancelRoute ~ parquetRoute ~ hiveSchemaRoute
37 | }
38 | }
39 |
40 | private val reactiveServer = new ReactiveServer(Configuration.webSocketsPort.getOrElse("8081").toInt, MainActors.logsActor)
41 | reactiveServer.start()
42 |
43 | def initialize() = {
44 | Configuration.log4j.info("Initializing...")
45 |
46 | hdfsConf = getHadoopConf
47 | Utils.createFolderIfDoesntExist(hdfsConf, Configuration.schemaFolder.getOrElse("jawsSchemaFolder"), forcedMode = false)
48 |
49 | Configuration.loggingType.getOrElse("cassandra") match {
50 | case "cassandra" => dals = new CassandraDal(Configuration.cassandraHost.get, Configuration.cassandraClusterName.get, Configuration.cassandraKeyspace.get)
51 | case _ => dals = new HdfsDal(hdfsConf)
52 | }
53 |
54 | hiveContext = createHiveContext(dals)
55 | }
56 |
57 | def createHiveContext(dal: DAL): HiveContextWrapper = {
58 | val jars = Array(Configuration.jarPath.get)
59 |
60 | def configToSparkConf(config: Config, contextName: String, jars: Array[String]): SparkConf = {
61 | val sparkConf = new SparkConf().setAppName(contextName).setJars(jars)
62 | for (
63 | property <- config.entrySet().asScala if property.getKey.startsWith("spark") && property.getValue != null
64 | ) {
65 | val key = property.getKey.replaceAll("-", ".")
66 | println(key + " | " + property.getValue.unwrapped())
67 | sparkConf.set(key, property.getValue.unwrapped().toString)
68 | }
69 | sparkConf
70 | }
71 |
72 | val hContext: HiveContextWrapper = {
73 | val sparkConf = configToSparkConf(Configuration.sparkConf, Configuration.applicationName.getOrElse("Jaws"), jars)
74 | val sContext = new SparkContext(sparkConf)
75 |
76 | val hContext = new HiveContextWrapper(sContext)
77 | hContext.sparkContext.addSparkListener(new LoggingListener(dal))
78 |
79 | HiveUtils.setSharkProperties(hContext, this.getClass.getClassLoader.getResourceAsStream("sharkSettings.txt"))
80 | //make sure that lazy variable hiveConf gets initialized
81 | hContext.runMetadataSql("use default")
82 | hContext
83 | }
84 | hContext
85 | }
86 |
87 | def getHadoopConf: org.apache.hadoop.conf.Configuration = {
88 | val configuration = new org.apache.hadoop.conf.Configuration()
89 | configuration.setBoolean(Utils.FORCED_MODE, Configuration.forcedMode.getOrElse("false").toBoolean)
90 |
91 | // set hadoop name node and job tracker
92 | Configuration.namenode match {
93 | case None =>
94 | val message = "You need to set the namenode! "
95 | Configuration.log4j.error(message)
96 | throw new RuntimeException(message)
97 |
98 | case _ => configuration.set("fs.defaultFS", Configuration.namenode.get)
99 |
100 | }
101 |
102 | configuration.set("dfs.replication", Configuration.replicationFactor.getOrElse("1"))
103 |
104 | configuration.set(Utils.LOGGING_FOLDER, Configuration.loggingFolder.getOrElse("jawsLogs"))
105 | configuration.set(Utils.STATUS_FOLDER, Configuration.stateFolder.getOrElse("jawsStates"))
106 | configuration.set(Utils.DETAILS_FOLDER, Configuration.detailsFolder.getOrElse("jawsDetails"))
107 | configuration.set(Utils.METAINFO_FOLDER, Configuration.metaInfoFolder.getOrElse("jawsMetainfoFolder"))
108 | configuration.set(Utils.QUERY_NAME_FOLDER, Configuration.queryNameFolder.getOrElse("jawsQueryNameFolder"))
109 | configuration.set(Utils.QUERY_PUBLISHED_FOLDER, Configuration.queryPublishedFolder.getOrElse("jawsQueryPublishedFolder"))
110 | configuration.set(Utils.QUERY_UNPUBLISHED_FOLDER, Configuration.queryUnpublishedFolder.getOrElse("jawsQueryUnpublishedFolder"))
111 | configuration.set(Utils.RESULTS_FOLDER, Configuration.resultsFolder.getOrElse("jawsResultsFolder"))
112 | configuration.set(Utils.PARQUET_TABLES_FOLDER, Configuration.parquetTablesFolder.getOrElse("parquetTablesFolder"))
113 |
114 | configuration
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/LogsActor.scala:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import akka.actor.{ Actor, ActorLogging }
4 | import scala.collection._
5 | import org.java_websocket.WebSocket
6 | import server.ReactiveServer.Close
7 | import server.ReactiveServer.Error
8 | import server.ReactiveServer.Open
9 | import akka.actor.actorRef2Scala
10 |
11 | /**
12 | * Created by emaorhian
13 | */
14 | object LogsActor {
15 | sealed trait LogsMessage
16 |
17 | case class Unregister(ws: Option[WebSocket]) extends LogsMessage
18 | case class PushLogs(uuid: String, msg: String) extends LogsMessage
19 |
20 | }
21 |
22 | class LogsActor extends Actor with ActorLogging {
23 | import LogsActor._
24 | import server.ReactiveServer._
25 |
26 | val uuidToClients = mutable.Map[String, mutable.ListBuffer[WebSocket]]()
27 |
28 | override def receive = {
29 | case Open(uuid, ws, hs) => {
30 | var webSockets = mutable.ListBuffer[WebSocket]()
31 | uuidToClients.get(uuid) match {
32 | case None => webSockets = mutable.ListBuffer[WebSocket]()
33 | case Some(wss) => webSockets = wss
34 | }
35 | uuidToClients += ((uuid, (webSockets :+ ws)))
36 | log.info("registered monitor for {}", ws.getLocalSocketAddress())
37 | }
38 |
39 | case Close(ws, code, reason, ext) => self ! Unregister(ws)
40 |
41 | case Error(ws, ex) => self ! Unregister(ws)
42 |
43 | case PushLogs(uuid, msg) =>
44 | log.debug("received msg '{}'", msg)
45 | val webSockets = uuidToClients.get(uuid)
46 | webSockets match {
47 | case None => log.debug("There is no such uuid")
48 | case Some(wss) => wss.foreach(ws => ws.send(msg))
49 | }
50 |
51 | case Unregister(ws) => {
52 | ws match {
53 | case None => log.info("There is nothing to unregister")
54 | case Some(wss) =>
55 | log.info("unregister monitor")
56 | uuidToClients.foreach(tuple => {
57 | val clients = tuple._2
58 | clients -= wss
59 | uuidToClients.put(tuple._1, clients)
60 | })
61 | }
62 |
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/MainActors.scala:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import akka.actor.{ActorSystem, Props, ActorRef}
4 | import apiactors.ActorsPaths._
5 | import apiactors.ActorsPaths
6 | import scala.concurrent.Await
7 | import akka.util.Timeout
8 | import akka.pattern.ask
9 |
10 | /**
11 | * Created by emaorhian
12 | */
13 | object MainActors {
14 | // self: Systems =>
15 | val localSystem: ActorSystem = ActorSystem("localSystem")
16 | val remoteSystem: ActorSystem = ActorSystem("remoteSystem", Configuration.remote)
17 | val localSupervisor = localSystem.actorOf(Props(classOf[Supervisor]), ActorsPaths.LOCAL_SUPERVISOR_ACTOR_NAME)
18 | val remoteSupervisor = remoteSystem.actorOf(Props(classOf[Supervisor]), ActorsPaths.REMOTE_SUPERVISOR_ACTOR_NAME)
19 | val logsActor = createActor(Props(new LogsActor), LOGS_WEBSOCKETS_ACTOR_NAME, localSupervisor)
20 |
21 |
22 | def createActor(props: Props, name: String, supervisor: ActorRef): ActorRef = {
23 | implicit val timeout = Timeout(Configuration.timeout)
24 | val future = ask(supervisor, (props, name))
25 | val actor = Await.result(future, timeout.duration).asInstanceOf[ActorRef]
26 | actor
27 | }
28 | }
29 |
30 |
31 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/ReactiveServer.scala:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import akka.actor.ActorRef
4 | import java.net.InetSocketAddress
5 | import org.java_websocket.WebSocket
6 | import org.java_websocket.server.WebSocketServer
7 | import org.java_websocket.handshake.ClientHandshake
8 | import akka.actor.actorRef2Scala
9 |
10 | /**
11 | * Created by emaorhian
12 | */
13 | object ReactiveServer {
14 | sealed trait ReactiveServerMessage
15 | case class Message(ws: WebSocket, msg: String)
16 | extends ReactiveServerMessage
17 | case class Open(uuid: String, ws: WebSocket, hs: ClientHandshake)
18 | extends ReactiveServerMessage
19 | case class Close(ws: Option[WebSocket], code: Int, reason: String, external: Boolean)
20 | extends ReactiveServerMessage
21 | case class Error(ws: Option[WebSocket], ex: Exception)
22 | extends ReactiveServerMessage
23 |
24 | }
25 | class ReactiveServer(val port: Int, val reactor: ActorRef)
26 | extends WebSocketServer(new InetSocketAddress(port)) {
27 |
28 | val urlPattern = """^\/jaws\/logs\?.*(?<=&|\?)uuid=([^&]+)""".r
29 |
30 | final override def onMessage(ws: WebSocket, msg: String) {
31 | }
32 |
33 | final override def onOpen(ws: WebSocket, hs: ClientHandshake) {
34 | Option(ws) match {
35 | case None => Configuration.log4j.debug("[ReactiveServer] the ws is null")
36 | case _ => {
37 | var description = hs.getResourceDescriptor()
38 | val urlPattern(uuid) = description
39 | reactor ! ReactiveServer.Open(uuid, ws, hs)
40 | }
41 | }
42 | }
43 | final override def onClose(ws: WebSocket, code: Int, reason: String, external: Boolean) {
44 | Option(ws) match {
45 | case None => Configuration.log4j.debug("[ReactiveServer] the ws is null")
46 | case _ => {
47 | reactor ! ReactiveServer.Close(Option(ws), code, reason, external)
48 |
49 | }
50 | }
51 | }
52 | final override def onError(ws: WebSocket, ex: Exception) {
53 | reactor ! ReactiveServer.Error(Option(ws), ex)
54 |
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/Supervisor.scala:
--------------------------------------------------------------------------------
1 | package server
2 | import akka.actor.Actor
3 | import akka.actor.OneForOneStrategy
4 | import akka.actor.SupervisorStrategy._
5 | import scala.concurrent.duration._
6 | import akka.actor.Props
7 | import akka.actor.actorRef2Scala
8 | import akka.actor.ActorSystem
9 |
10 | /**
11 | * Created by emaorhian
12 | */
13 | class Supervisor extends Actor {
14 |
15 | override val supervisorStrategy =
16 | OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange = 1 minute) {
17 |
18 | case ex : Throwable => {
19 | Resume
20 | }
21 | }
22 |
23 | def receive = {
24 | case (p: Props, name: String) => sender ! context.actorOf(p, name)
25 | }
26 |
27 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/api/BaseApi.scala:
--------------------------------------------------------------------------------
1 | package server.api
2 |
3 | import akka.actor.{Props, ActorRef}
4 | import akka.util.Timeout
5 | import apiactors.{BalancerActor, RunScriptApiActor}
6 | import com.xpatterns.jaws.data.contracts.DAL
7 | import implementation.HiveContextWrapper
8 | import server.Configuration
9 | import spray.routing.HttpService
10 | import server.MainActors._
11 | import apiactors.ActorsPaths._
12 |
13 | /**
14 | * The base trait api. It contains the common data used by the other api classes.
15 | */
16 | trait BaseApi extends HttpService {
17 | // The default timeout for the futures
18 | implicit val timeout = Timeout(Configuration.timeout.toInt)
19 |
20 | // The hdfs configuration that is initialized when the server starts
21 | var hdfsConf: org.apache.hadoop.conf.Configuration = _
22 |
23 | // The hive context that is initialized when the server starts
24 | var hiveContext: HiveContextWrapper = _
25 |
26 | // Holds the DAL. It is initialized when the server starts
27 | var dals: DAL = _
28 |
29 | // The actor that is handling the scripts that are run on Hive or Spark SQL. This field is lazy because the hdfs
30 | // configuration and the hive context are not initialized at the moment of creating the object.
31 | lazy val runScriptActor = createActor(Props(new RunScriptApiActor(hdfsConf, hiveContext, dals)), RUN_SCRIPT_ACTOR_NAME, remoteSupervisor)
32 |
33 | // The actor that is handling the parquet tables
34 | lazy val balancerActor = createActor(Props(classOf[BalancerActor]), BALANCER_ACTOR_NAME, remoteSupervisor)
35 |
36 | /**
37 | * @param pathType the path type of the requested name node
38 | * @return the proper namenode path
39 | */
40 | protected def getNamenodeFromPathType(pathType:String):String = {
41 | if ("hdfs".equals(pathType)) {
42 | Configuration.hdfsNamenodePath
43 | } else if ("tachyon".equals(pathType)) {
44 | Configuration.tachyonNamenodePath
45 | } else {
46 | ""
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/api/IndexApi.scala:
--------------------------------------------------------------------------------
1 | package server.api
2 |
3 | import customs.CORSDirectives
4 | import server.Configuration
5 | import spray.http.{HttpMethods, HttpHeaders}
6 | import spray.routing.{HttpService, Route}
7 |
8 | /**
9 | * Handles the calls to index page
10 | */
11 | trait IndexApi extends HttpService with CORSDirectives {
12 | /**
13 | * Handles the /jaws/index call. If the server starts successfully, this call returns a proper message.
14 | */
15 | def indexRoute: Route = path("index") {
16 | get {
17 | corsFilter(List(Configuration.corsFilterAllowedHosts.getOrElse("*"))) {
18 | complete {
19 | "Jaws is up and running!"
20 | }
21 | }
22 |
23 | } ~
24 | options {
25 | corsFilter(List(Configuration.corsFilterAllowedHosts.getOrElse("*")), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) {
26 | complete {
27 | "OK"
28 | }
29 | }
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scala/server/api/UIApi.scala:
--------------------------------------------------------------------------------
1 | package server.api
2 |
3 |
4 | import customs.CORSDirectives
5 | import spray.http.{StatusCodes, HttpMethods, HttpHeaders}
6 | import spray.routing._
7 |
8 | /**
9 | * Handles the calls for getting the ui stored in webapp.
10 | */
11 | trait UIApi extends HttpService with CORSDirectives {
12 | /**
13 | * Handles the call /jaws/ui/ for getting the ui.
14 | */
15 | def uiRoute: Route = pathPrefix("ui") {
16 | // Handles the call made to /ui/ by returning the index page stored in webapp folder.
17 | pathSingleSlash {
18 | get {
19 | getFromResource("webapp/index.html")
20 | } ~ options {
21 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) {
22 | complete {
23 | "OK"
24 | }
25 | }
26 | }
27 | } ~
28 | // When a request is made to other resource from ui, the call is redirected to he default path
29 | pathEnd {
30 | redirect("ui/", StatusCodes.PermanentRedirect)
31 | } ~
32 | get {
33 | getFromResourceDirectory("webapp")
34 | } ~
35 | options {
36 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) {
37 | complete {
38 | "OK"
39 | }
40 | }
41 | }
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/scripts/start-jaws.sh:
--------------------------------------------------------------------------------
1 | get_abs_script_path() {
2 | pushd . >/dev/null
3 | cd $(dirname $0)
4 | dir=$(pwd)
5 | popd >/dev/null
6 | }
7 |
8 | get_abs_script_path
9 | parentdir="$(dirname "$dir")"
10 | logsFolder=$parentdir/logs
11 | if [ ! -d "$logsFolder" ]; then
12 | echo "Creating logs folder"$logsFolder
13 | mkdir $logsFolder
14 | fi
15 |
16 |
17 | export CLASSPATH_PREFIX=$parentdir"/resources"
18 |
19 |
20 | echo "Exporting system variables..."
21 | . $parentdir/conf/jaws-env.sh
22 |
23 |
24 | export CLASSPATH_PREFIX=$parentdir"/resources"
25 |
26 |
27 | echo $TACHYON_WAREHOUSE_PATH
28 | echo $TACHYON_MASTER
29 | echo $MESOS_NATIVE_LIBRARY
30 | echo $JAVA_OPTS
31 | echo $CLASSPATH_PREFIX
32 |
33 | echo "Deploying jaws..."
34 | $dir/main-jaws.sh
35 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/main/webapp/hello.txt:
--------------------------------------------------------------------------------
1 | Hello Jaws!
2 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 | spray.can.server {
2 | # uncomment the next line for making this an HTTPS example
3 | # ssl-encryption = on
4 | idle-timeout = 301 s
5 | request-timeout = 300 s
6 | }
7 |
8 | remote{
9 | akka {
10 | //loglevel = "DEBUG"
11 | actor {
12 | provider = "akka.remote.RemoteActorRefProvider"
13 | }
14 | remote {
15 | enabled-transports = ["akka.remote.netty.tcp"]
16 | log-sent-messages = on
17 | log-received-messages = on
18 | netty.tcp {
19 | transport-class = "akka.remote.transport.netty.NettyTransport"
20 | hostname = "devbox.local"
21 | port = 4042
22 | }
23 | }
24 | }
25 | }
26 |
27 | ############ spark configuration - see spark documentation ####################
28 | sparkConfiguration {
29 | spark-executor-memory=2g
30 | spark-mesos-coarse=false
31 | spark-scheduler-mode=FAIR
32 | spark-cores-max=2
33 | spark-master="spark://devbox.local:7077"
34 | spark-path="/home/ubuntu/latest-mssh/spark-1.0.1"
35 | spark-default-parallelism=384
36 | spark-storage-memoryFraction=0.3
37 | spark-shuffle-memoryFraction=0.6
38 | spark-shuffle-compress=true
39 | spark-shuffle-spill-compress=true
40 | spark-reducer-maxMbInFlight=48
41 | spark-akka-frameSize=10000
42 | spark-akka-threads=4
43 | spark-akka-timeout=100
44 | spark-task-maxFailures=4
45 | spark-shuffle-consolidateFiles=true
46 | spark-deploy-spreadOut=true
47 | spark-shuffle-spill=false
48 | #Serialization settings commented until more tests are performed
49 | #spark-serializer="org.apache.spark.serializer.KryoSerializer"
50 | #spark-kryoserializer-buffer-mb=10
51 | #spark-kryoserializer-buffer-max-mb=64
52 | spark-kryo-referenceTracking=false
53 |
54 |
55 | }
56 |
57 | ######### application configuration ###################
58 | appConf{
59 | # the interface on which to start the spray server : localhost/ip/hostname
60 | server.interface=localhost
61 | # the cors filter allowed hosts
62 | cors-filter-allowed-hosts="*"
63 | # the default number of results retrieved on queries
64 | nr.of.results=100
65 | # the ip of the destination namenode - it is used when querying with unlimited number of results.
66 | rdd.destination.ip="devbox.local"
67 | # where to store the results in the case of an unlimited query. Possible results : hdfs/tachyon. Default hdfs
68 | rdd.destination.location="hdfs"
69 | # the remote doamain actor address
70 | remote.domain.actor=""
71 | #remote.domain.actor="devbox.local:port,devbox2.local:port"
72 | # application name
73 | application.name="Jaws"
74 | # the port on which to deploy the apis
75 | web.services.port=9080
76 | # the port on which to deploy the web sockets api (logs)
77 | web.sockets.port=8182
78 | # the number of threads used to execute shark commands
79 | nr.of.threads=10
80 | # implicit akka timeout
81 | timeout=1000000
82 | #where to log: app.logging.type = cassandra/hdfs
83 | app.logging.type=cassandra
84 | # folder where to write the results schema
85 | schemaFolder=jawsSchemaFolder
86 | # the path to the xpatterns-jaws in target folder
87 | jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/jaws-spark-sql-rest.jar
88 | #jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/test-app.jar
89 | # the path to the hdfs namenode
90 | hdfs-namenode-path="hdfs://devbox.local:8020"
91 | # the path to the tachyon namenode
92 | tachyon-namenode-path="tachyon://devbox.local:19998"
93 | }
94 |
95 | ########## hadoop configuration - skip this if you are using cassandra logging ########
96 | hadoopConf {
97 | namenode="hdfs://devbox.local:8020"
98 | replicationFactor=1
99 | # set on true if you want to start fresh (all the existing folders will be recreated)
100 | forcedMode=false
101 | # folder where to write the logs
102 | loggingFolder=jawsLogs
103 | # folder where to write the jobs states
104 | stateFolder=jawsStates
105 | # folder where to write the jobs details
106 | detailsFolder=jawsDetails
107 | # folder where to write the jobs results
108 | resultsFolder=jawsResultsFolder
109 | # folder where to write the jobs meta information
110 | metaInfoFolder=jawsMetainfoFolder
111 | # folder where to write the name of query information
112 | queryNameFolder=jawsQueryNameFolder
113 | # folder where to write the published queries
114 | queryPublishedFolder=jawsQueryPublishedFolder
115 | # folder where to write the unpublished queries
116 | queryUnpublishedFolder=jawsQueryUnpublishedFolder
117 | # folder where to write the parquet tables information
118 | parquetTablesFolder=parquetTablesFolder
119 | }
120 |
121 | ########## cassandra configuration - skip this if you are using hdfs logging ##########
122 | cassandraConf {
123 | cassandra.host="devbox.local:9160"
124 | cassandra.keyspace=xpatterns_jaws
125 | cassandra.cluster.name=Jaws
126 | }
127 |
128 |
129 |
130 |
131 | test{
132 | dataFilePath=/src/test/resources
133 | }
134 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/resources/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | fs.defaultFS
7 | hdfs://devbox.local:8020
8 |
9 |
10 | fs.trash.interval
11 | 1
12 |
13 |
14 | io.compression.codecs
15 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec
16 |
17 |
18 | hadoop.security.authentication
19 | simple
20 |
21 |
22 | hadoop.security.authorization
23 | false
24 |
25 |
26 | hadoop.rpc.protection
27 | authentication
28 |
29 |
30 | hadoop.security.auth_to_local
31 | DEFAULT
32 |
33 |
34 | hadoop.proxyuser.oozie.hosts
35 | *
36 |
37 |
38 | hadoop.proxyuser.oozie.groups
39 | *
40 |
41 |
42 | hadoop.proxyuser.mapred.hosts
43 | *
44 |
45 |
46 | hadoop.proxyuser.mapred.groups
47 | *
48 |
49 |
50 | hadoop.proxyuser.flume.hosts
51 | *
52 |
53 |
54 | hadoop.proxyuser.flume.groups
55 | *
56 |
57 |
58 | hadoop.proxyuser.HTTP.hosts
59 | *
60 |
61 |
62 | hadoop.proxyuser.HTTP.groups
63 | *
64 |
65 |
66 | hadoop.proxyuser.hive.hosts
67 | *
68 |
69 |
70 | hadoop.proxyuser.hive.groups
71 | *
72 |
73 |
74 | hadoop.proxyuser.hue.hosts
75 | *
76 |
77 |
78 | hadoop.proxyuser.hue.groups
79 | *
80 |
81 |
82 | hadoop.proxyuser.httpfs.hosts
83 | *
84 |
85 |
86 | hadoop.proxyuser.httpfs.groups
87 | *
88 |
89 |
90 | hadoop.proxyuser.hdfs.groups
91 | *
92 |
93 |
94 | hadoop.proxyuser.hdfs.hosts
95 | *
96 |
97 |
98 | hadoop.security.group.mapping
99 | org.apache.hadoop.security.ShellBasedUnixGroupsMapping
100 |
101 |
102 | hadoop.security.instrumentation.requires.admin
103 | false
104 |
105 |
106 | io.file.buffer.size
107 | 65536
108 |
109 |
110 | hadoop.ssl.enabled
111 | false
112 |
113 |
114 | hadoop.ssl.require.client.cert
115 | false
116 | true
117 |
118 |
119 | hadoop.ssl.keystores.factory.class
120 | org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory
121 | true
122 |
123 |
124 | hadoop.ssl.server.conf
125 | ssl-server.xml
126 | true
127 |
128 |
129 | hadoop.ssl.client.conf
130 | ssl-client.xml
131 | true
132 |
133 |
134 | fs.tachyon.impl
135 | tachyon.hadoop.TFS
136 |
137 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/resources/data/kv1.txt:
--------------------------------------------------------------------------------
1 | 1,a
2 | 2,b
3 | 3,c
4 | 4,d
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/resources/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | hive.metastore.local
7 | false
8 |
9 |
10 | hive.metastore.uris
11 | thrift://devbox.local:9083
12 |
13 |
14 | hive.metastore.client.socket.timeout
15 | 300
16 |
17 |
18 | hive.metastore.warehouse.dir
19 | /user/hive/warehouse
20 |
21 |
22 | hive.warehouse.subdir.inherit.perms
23 | true
24 |
25 |
26 | mapred.reduce.tasks
27 | -1
28 |
29 |
30 | hive.exec.reducers.bytes.per.reducer
31 | 1073741824
32 |
33 |
34 | hive.exec.reducers.max
35 | 999
36 |
37 |
38 | hive.metastore.execute.setugi
39 | true
40 |
41 |
42 | hive.support.concurrency
43 | false
44 |
45 |
46 | hive.zookeeper.quorum
47 | devbox.local
48 |
49 |
50 | hive.zookeeper.client.port
51 | 2181
52 |
53 |
54 | hbase.zookeeper.quorum
55 | devbox.local
56 |
57 |
58 | hbase.zookeeper.property.clientPort
59 | 2181
60 |
61 |
62 | hive.zookeeper.namespace
63 | hive_zookeeper_namespace_hive
64 |
65 |
66 | hive.server2.enable.doAs
67 | true
68 |
69 |
70 | fs.hdfs.impl.disable.cache
71 | true
72 |
73 |
74 | hive.server2.use.SSL
75 | false
76 |
77 |
78 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/resources/jaws-env.sh:
--------------------------------------------------------------------------------
1 | export TACHYON_WAREHOUSE_PATH=/sharktables
2 | export TACHYON_MASTER=tachyon://devbox.local:19998
3 | export MESOS_NATIVE_LIBRARY=/home/user/mesos-0.19.0/lib/libmesos.so
4 | export JAVA_OPTS="$JAVA_OPTS -Djava.library.path=/home/user/mesos-0.19.0/lib/libmesos.so:/home/user/hadoopNativeLibs"
5 |
6 |
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | #
3 | # The following properties set the logging levels and log appender. The
4 | # log4j.rootCategory variable defines the default log level and one or more
5 | # appenders. For the console, use 'S'. For the daily rolling file, use 'R'.
6 | # For an HTML formatted log, use 'H'.
7 | #
8 | # To override the default (rootCategory) log level, define a property of the
9 | # form (see below for available values):
10 | #
11 | # log4j.logger. =
12 | #
13 | # Available logger names:
14 | # TODO
15 | #
16 | # Possible Log Levels:
17 | # FATAL, ERROR, WARN, INFO, DEBUG
18 | #
19 | #------------------------------------------------------------------------------
20 |
21 | #log4j.category.me.prettyprint.cassandra = INFO, dataConsole, dataFile
22 | #log4j.additivity.me.prettyprint.cassandra = false
23 | #log4j.category.DATA = INFO, dataConsole, dataFile
24 | #log4j.additivity.DATA = false
25 |
26 | log4j.rootCategory = INFO, defaultConsole, defaultFile
27 |
28 | #log4j.category.com.xpatterns.xrelevance.content.data = INFO, dataConsole, dataFile
29 | #log4j.additivity.com.xpatterns.xrelevance.content.data = false
30 | #log4j.category.com.xpatterns.xrelevance.configuration.data = INFO, dataConsole, dataFile
31 | #log4j.additivity.com.xpatterns.xrelevance.configuration.data = false
32 | #log4j.category.com.xpatterns.xrelevance.data = INFO, dataConsole, dataFile
33 | #log4j.additivity.com.xpatterns.xrelevance.data = false
34 |
35 |
36 | #------------------------------------------------------------------------------
37 | #
38 | # The following properties configure the console (stdout) appender.
39 | # See http://logging.apache.org/log4j/docs/api/index.html for details.
40 | #
41 | #------------------------------------------------------------------------------
42 | log4j.appender.defaultConsole = org.apache.log4j.ConsoleAppender
43 | log4j.appender.defaultConsole.layout = org.apache.log4j.PatternLayout
44 | log4j.appender.defaultConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
45 |
46 | #------------------------------------------------------------------------------
47 | #
48 | # The following properties configure the Daily Rolling File appender.
49 | # See http://logging.apache.org/log4j/docs/api/index.html for details.
50 | #
51 | #------------------------------------------------------------------------------
52 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender
53 | log4j.appender.defaultFile.File = jaws-spark-sql-rest.log
54 | log4j.appender.defaultFile.Append = true
55 | log4j.appender.defaultFile.DatePattern = '.'yyy-MM-dd
56 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout
57 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
58 |
59 | #console for data project
60 | #log4j.appender.dataConsole = org.apache.log4j.ConsoleAppender
61 | #log4j.appender.dataConsole.layout = org.apache.log4j.PatternLayout
62 | #log4j.appender.dataConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
63 |
64 | #file for data project
65 | #log4j.appender.dataFile = org.apache.log4j.DailyRollingFileAppender
66 | #log4j.appender.dataFile.File = ${catalina.home}/logs/xpatterns-api-data-4.0.log
67 | #log4j.appender.dataFile.Append = true
68 | #log4j.appender.dataFile.DatePattern = '.'yyy-MM-dd
69 | #log4j.appender.dataFile.layout = org.apache.log4j.PatternLayout
70 | #log4j.appender.dataFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/resources/sharkSettings.txt:
--------------------------------------------------------------------------------
1 | set mapreduce.job.reduces=128
2 | set hive.column.compress=true
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/scala/api/DeleteQueryTest.scala:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import scala.concurrent._
4 | import org.scalatest.FunSuite
5 | import org.scalamock.scalatest.MockFactory
6 | import org.scalatest.BeforeAndAfter
7 | import org.scalamock.proxy.ProxyMockFactory
8 | import org.scalatest.WordSpecLike
9 | import org.scalatest.concurrent._
10 | import server.JawsController
11 | import com.xpatterns.jaws.data.contracts.DAL
12 | import akka.actor.ActorRef
13 | import server.Configuration
14 | import com.xpatterns.jaws.data.impl.CassandraDal
15 | import com.xpatterns.jaws.data.impl.HdfsDal
16 | import scala.concurrent.ExecutionContext.Implicits.global
17 | import akka.actor.ActorSystem
18 | import akka.actor.Props
19 | import org.junit.runner.RunWith
20 | import org.scalatest.junit.JUnitRunner
21 | import akka.util.Timeout
22 | import akka.pattern.ask
23 | import com.xpatterns.jaws.data.DTO.Query
24 | import scala.concurrent.duration._
25 | import akka.testkit.TestActorRef
26 | import akka.actor.Status.Success
27 | import com.xpatterns.jaws.data.contracts.TJawsLogging
28 | import com.xpatterns.jaws.data.utils.QueryState
29 | import java.util.UUID
30 | import apiactors.DeleteQueryApiActor
31 | import messages.DeleteQueryMessage
32 | import messages.ErrorMessage
33 |
34 | @RunWith(classOf[JUnitRunner])
35 | class DeleteQueryTest extends FunSuite with BeforeAndAfter with ScalaFutures {
36 |
37 | val hdfsConf = JawsController.getHadoopConf
38 | var dals: DAL = _
39 |
40 | implicit val timeout = Timeout(10000)
41 | implicit val system = ActorSystem("localSystem")
42 |
43 | before {
44 | Configuration.loggingType.getOrElse("cassandra") match {
45 | case "cassandra" => dals = new CassandraDal(Configuration.cassandraHost.get, Configuration.cassandraClusterName.get, Configuration.cassandraKeyspace.get)
46 | case _ => dals = new HdfsDal(hdfsConf)
47 | }
48 | }
49 |
50 | // **************** TESTS *********************
51 |
52 | test(" not found ") {
53 |
54 | val tAct = TestActorRef(new DeleteQueryApiActor(dals))
55 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString()
56 | val f = tAct ? DeleteQueryMessage(queryId)
57 | whenReady(f)(s => assert(s === new ErrorMessage(s"DELETE query failed with the following message: The query ${queryId} was not found. Please provide a valid query id")))
58 |
59 | }
60 |
61 |
62 | test(" in progress ") {
63 |
64 | val tAct = TestActorRef(new DeleteQueryApiActor(dals))
65 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString()
66 | dals.loggingDal.setState(queryId, QueryState.IN_PROGRESS)
67 |
68 | val f = tAct ? DeleteQueryMessage(queryId)
69 | whenReady(f)(s => assert(s === new ErrorMessage(s"DELETE query failed with the following message: The query ${queryId} is IN_PROGRESS. Please wait for its completion or cancel it")))
70 |
71 | }
72 |
73 | test(" ok ") {
74 |
75 | val tAct = TestActorRef(new DeleteQueryApiActor(dals))
76 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString()
77 | dals.loggingDal.setState(queryId, QueryState.DONE)
78 |
79 | val f = tAct ? DeleteQueryMessage(queryId)
80 | whenReady(f)(s => assert(s === s"Query ${queryId} was deleted"))
81 |
82 | }
83 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/scala/api/GetQueryInfoTest.scala:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import scala.concurrent._
4 | import org.scalatest.FunSuite
5 | import org.scalamock.scalatest.MockFactory
6 | import org.scalatest.BeforeAndAfter
7 | import org.scalamock.proxy.ProxyMockFactory
8 | import org.scalatest.WordSpecLike
9 | import org.scalatest.concurrent._
10 | import server.JawsController
11 | import com.xpatterns.jaws.data.contracts.DAL
12 | import akka.actor.ActorRef
13 | import server.Configuration
14 | import com.xpatterns.jaws.data.impl.CassandraDal
15 | import com.xpatterns.jaws.data.impl.HdfsDal
16 | import scala.concurrent.ExecutionContext.Implicits.global
17 | import akka.actor.ActorSystem
18 | import akka.actor.Props
19 | import org.junit.runner.RunWith
20 | import org.scalatest.junit.JUnitRunner
21 | import akka.util.Timeout
22 | import akka.pattern.ask
23 | import com.xpatterns.jaws.data.DTO.Query
24 | import scala.concurrent.duration._
25 | import akka.testkit.TestActorRef
26 | import akka.actor.Status.Success
27 | import com.xpatterns.jaws.data.contracts.TJawsLogging
28 | import com.xpatterns.jaws.data.utils.QueryState
29 | import java.util.UUID
30 | import com.xpatterns.jaws.data.DTO.QueryMetaInfo
31 | import apiactors.GetQueriesApiActor
32 | import messages.GetQueriesMessage
33 | import com.xpatterns.jaws.data.DTO.Queries
34 |
35 | @RunWith(classOf[JUnitRunner])
36 | class GetQueryInfoTest extends FunSuite with BeforeAndAfter with ScalaFutures {
37 |
38 | val hdfsConf = JawsController.getHadoopConf
39 | var dals: DAL = _
40 |
41 | implicit val timeout = Timeout(10000)
42 | implicit val system = ActorSystem("localSystem")
43 |
44 | before {
45 | Configuration.loggingType.getOrElse("cassandra") match {
46 | case "cassandra" => dals = new CassandraDal(Configuration.cassandraHost.get, Configuration.cassandraClusterName.get, Configuration.cassandraKeyspace.get)
47 | case _ => dals = new HdfsDal(hdfsConf)
48 | }
49 | }
50 |
51 | // **************** TESTS *********************
52 |
53 | test(" not found ") {
54 |
55 | val tAct = TestActorRef(new GetQueriesApiActor(dals))
56 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString()
57 | val f = tAct ? GetQueriesMessage(Seq(queryId))
58 | whenReady(f)(s => s match {
59 | case queries: Queries => {
60 | assert(queries.queries.size === 1)
61 | assert(queries.queries(0) === new Query("NOT_FOUND", queryId, "", new QueryMetaInfo))
62 | }
63 | case _ => fail
64 | })
65 | }
66 |
67 | test(" found ") {
68 |
69 | val tAct = TestActorRef(new GetQueriesApiActor(dals))
70 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString()
71 | val executionTime = 100L
72 | val currentTimestamp = System.currentTimeMillis()
73 | val metaInfo = new QueryMetaInfo(100, 150, 1, true)
74 | dals.loggingDal.setState(queryId, QueryState.IN_PROGRESS)
75 | dals.loggingDal.setScriptDetails(queryId, "test script")
76 | dals.loggingDal.setExecutionTime(queryId, executionTime)
77 | dals.loggingDal.setTimestamp(queryId, currentTimestamp)
78 | dals.loggingDal.setRunMetaInfo(queryId, metaInfo)
79 | metaInfo.timestamp = currentTimestamp
80 | metaInfo.executionTime = executionTime
81 |
82 | val f = tAct ? GetQueriesMessage(Seq(queryId))
83 | whenReady(f)(s => s match {
84 | case queries: Queries =>
85 | assert(queries.queries.length === 1)
86 | assert(queries.queries(0) === new Query("IN_PROGRESS", queryId, "test script", metaInfo))
87 | case _ => fail()
88 | })
89 |
90 | dals.loggingDal.deleteQuery(queryId)
91 |
92 | }
93 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/scala/implementation/HiveUtilsTest.scala:
--------------------------------------------------------------------------------
1 | package implementation
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.junit.JUnitRunner
5 | import org.scalatest.FunSuite
6 | import org.apache.spark.scheduler.HiveUtils
7 | import scala.util.Try
8 |
9 | @RunWith(classOf[JUnitRunner])
10 | class HiveUtilsTest extends FunSuite {
11 |
12 | test("split path: ok hdfs") {
13 | val (namenode, path) = HiveUtils.splitPath("hdfs://devbox:8020/user/ubuntu/testParquet.parquet")
14 | assert(namenode === "hdfs://devbox:8020")
15 | assert(path === "/user/ubuntu/testParquet.parquet")
16 |
17 | }
18 |
19 | test("split path: ok tachyon") {
20 | val (namenode, path) = HiveUtils.splitPath("tachyon://devbox:19998/user/ubuntu/testParquet.parquet")
21 | assert(namenode === "tachyon://devbox:19998")
22 | assert(path === "/user/ubuntu/testParquet.parquet")
23 |
24 | }
25 |
26 | test("split path: empty") {
27 | val trySplit = Try(HiveUtils.splitPath(""))
28 |
29 | assert(trySplit.isFailure === true)
30 | assert("Invalid file path format : " === trySplit.failed.get.getMessage())
31 | }
32 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/scala/utils/TestSuite.scala:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.Suites
5 | import org.scalatest.junit.JUnitRunner
6 | import api.{QueryPropertiesTest, DeleteQueryTest, GetQueryInfoTest}
7 | import implementation.HiveUtilsTest
8 |
9 |
10 | @RunWith(classOf[JUnitRunner])
11 | class TestSuite extends Suites(new DeleteQueryTest, new QueryPropertiesTest, new GetQueryInfoTest, new HiveUtilsTest) {
12 | }
--------------------------------------------------------------------------------
/jaws-spark-sql-rest/src/test/scala/utils/TestUtils.scala:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import org.apache.log4j.Logger
4 | import server.JawsController
5 | import java.text.SimpleDateFormat
6 | import java.util.Date
7 |
8 | object TestUtils {
9 | import com.typesafe.config.ConfigFactory
10 |
11 |
12 | val timestamp = new SimpleDateFormat("yyyyMMdd-HHmmss")
13 |
14 | def getWarehousePath(prefix: String): String = {
15 | System.getProperty("user.dir") + "/test_warehouses/" + prefix + "-warehouse-" +
16 | timestamp.format(new Date)
17 | }
18 |
19 | def getMetastorePath(prefix: String): String = {
20 | System.getProperty("user.dir") + "/test_warehouses/" + prefix + "-metastore-" +
21 | timestamp.format(new Date)
22 | }
23 |
24 | val log4j = Logger.getLogger(TestUtils.getClass())
25 |
26 | private val conf = ConfigFactory.load
27 | conf.checkValid(ConfigFactory.defaultReference)
28 |
29 | val test = conf.getConfig("test").withFallback(conf)
30 | val dataFilePath = System.getProperty("user.dir") + Option(test.getString("dataFilePath")).getOrElse("") + "/data"
31 | }
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.xpatterns
6 | jaws-spark-sql
7 | 1.1.0-spark1.3.1
8 | pom
9 |
10 | jaws-spark-sql
11 | http://maven.apache.org
12 |
13 |
14 | jaws-spark-sql-rest
15 | jaws-spark-sql-data
16 | jaws-hive-sql-rest
17 |
18 |
19 |
--------------------------------------------------------------------------------