├── .gitignore ├── LICENSE.md ├── README.md ├── jaws-hive-sql-rest ├── LICENSE.md ├── pom.xml └── src │ ├── main │ ├── assembly │ │ └── archive.xml │ ├── resources │ │ ├── application.conf │ │ └── log4j.properties │ └── scala │ │ ├── apiactors │ │ └── HiveRunnerActor.scala │ │ ├── customs │ │ ├── CORSDirectives.scala │ │ ├── CommandsProcessor.scala │ │ └── ResultsProcessor.scala │ │ └── server │ │ └── HiveController.scala │ └── test │ ├── resources │ └── application.conf │ └── scala │ ├── CommandsProcessorTest.scala │ └── ResultsProcessorTest.scala ├── jaws-spark-sql-data ├── LICENSE.md ├── pom.xml └── src │ ├── main │ └── scala │ │ └── com │ │ └── xpatterns │ │ └── jaws │ │ └── data │ │ ├── DTO │ │ ├── AvroBinaryResult.scala │ │ ├── AvroResult.scala │ │ ├── Column.scala │ │ ├── CustomResult.scala │ │ ├── Databases.scala │ │ ├── Log.scala │ │ ├── Logs.scala │ │ ├── ParquetTable.scala │ │ ├── Queries.scala │ │ ├── Query.scala │ │ ├── QueryMetaInfo.scala │ │ ├── Table.scala │ │ └── Tables.scala │ │ ├── contracts │ │ ├── DAL.scala │ │ ├── TJawsLogging.scala │ │ ├── TJawsParquetTables.scala │ │ └── TJawsResults.scala │ │ ├── impl │ │ ├── CassandraDal.scala │ │ ├── HdfsDal.scala │ │ ├── JawsCassandraLogging.scala │ │ ├── JawsCassandraParquetTables.scala │ │ ├── JawsCassandraResults.scala │ │ ├── JawsHdfsLogging.scala │ │ ├── JawsHdfsParquetTables.scala │ │ └── JawsHdfsResults.scala │ │ └── utils │ │ ├── AvroConverter.scala │ │ ├── CustomConverter.scala │ │ ├── GsonHelper.scala │ │ ├── QueryState.scala │ │ ├── Randomizer.scala │ │ ├── ResultsConverter.scala │ │ └── Utils.scala │ └── test │ ├── resources │ └── application.conf │ └── scala │ └── com │ └── xpatterns │ └── jaws │ └── data │ ├── impl │ ├── JawsCassandraParquetTablesTest.scala │ ├── JawsHdfsParquetTablesTest.scala │ ├── JawsLoggingOnHdfsTest.scala │ ├── JawsLoggingTest.scala │ ├── JawsResultsOnHdfsTest.scala │ └── JawsResultsTest.scala │ └── utils │ ├── AvroConverterComplexTest.scala │ ├── AvroConverterCustomTest.scala │ ├── AvroConverterTest.scala │ ├── AvroSerializerTest.scala │ └── CustomConverterTest.scala ├── jaws-spark-sql-integration-tests ├── LICENSE.md ├── pom.xml └── src │ └── test │ ├── resources │ ├── application.conf │ ├── jawsTest.parquet │ │ ├── _metadata │ │ └── part-r-1.parquet │ └── people.txt │ └── scala │ ├── api │ ├── GetDatabasesApiTest.scala │ ├── JawsIsUpTest.scala │ ├── ParquetManagementApiTest.scala │ ├── RunApiTest.scala │ ├── RunHiveApiTest.scala │ └── TestSuite.scala │ └── foundation │ ├── TestBase.scala │ └── UtilOperations.scala ├── jaws-spark-sql-rest ├── LICENSE.md ├── pom.xml └── src │ ├── main │ ├── assembly │ │ └── archive.xml │ ├── resources │ │ ├── application.conf │ │ ├── cassandra-schema.txt │ │ ├── core-site.xml │ │ ├── hive-site.xml │ │ ├── jaws-env.sh │ │ ├── log4j.properties │ │ └── sharkSettings.txt │ ├── scala │ │ ├── apiactors │ │ │ ├── ActorUtils.scala │ │ │ ├── BalancerActor.scala │ │ │ ├── DeleteQueryApiActor.scala │ │ │ ├── GetDatabasesApiActor.scala │ │ │ ├── GetDatasourceSchemaActor.scala │ │ │ ├── GetLogsApiActor.scala │ │ │ ├── GetParquetTablesApiActor.scala │ │ │ ├── GetQueriesApiActor.scala │ │ │ ├── GetResultsApiActor.scala │ │ │ ├── GetTablesApiActor.scala │ │ │ ├── QueryPropertiesApiActor.scala │ │ │ ├── RegisterParquetTableApiActor.scala │ │ │ └── RunScriptApiActor.scala │ │ ├── customs │ │ │ ├── CORSDirectives.scala │ │ │ ├── CustomDirectives.scala │ │ │ └── CustomIndexer.scala │ │ ├── implementation │ │ │ ├── HiveContextWrapper.scala │ │ │ └── SchemaSettingsFactory.scala │ │ ├── messages │ │ │ └── Messages.scala │ │ ├── org │ │ │ └── apache │ │ │ │ └── spark │ │ │ │ └── scheduler │ │ │ │ ├── HiveUtils.scala │ │ │ │ ├── LoggingListener.scala │ │ │ │ └── RunScriptTask.scala │ │ └── server │ │ │ ├── Configuration.scala │ │ │ ├── JawsController.scala │ │ │ ├── LogsActor.scala │ │ │ ├── MainActors.scala │ │ │ ├── ReactiveServer.scala │ │ │ ├── Supervisor.scala │ │ │ └── api │ │ │ ├── BaseApi.scala │ │ │ ├── IndexApi.scala │ │ │ ├── MetadataApi.scala │ │ │ ├── ParquetApi.scala │ │ │ ├── QueryManagementApi.scala │ │ │ └── UIApi.scala │ ├── scripts │ │ └── start-jaws.sh │ └── webapp │ │ └── hello.txt │ └── test │ ├── resources │ ├── application.conf │ ├── core-site.xml │ ├── data │ │ └── kv1.txt │ ├── hive-site.xml │ ├── jaws-env.sh │ ├── log4j.properties │ └── sharkSettings.txt │ └── scala │ ├── api │ ├── DeleteQueryTest.scala │ ├── GetQueryInfoTest.scala │ └── QueryPropertiesTest.scala │ ├── implementation │ └── HiveUtilsTest.scala │ └── utils │ ├── TestSuite.scala │ └── TestUtils.scala └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | syntax: glob 3 | *.log 4 | RemoteSystemsTempFiles 5 | *.metadata 6 | target/* 7 | .DS_Store 8 | *.settings 9 | *.classpath 10 | *.idea 11 | *.iml 12 | *.orig 13 | *.txt~ 14 | src/main/webapp/resources/node_modules/ 15 | xpatterns-data-init/application-context.xml 16 | atlassian-ide-plugin.xml 17 | bin/* 18 | */META-INF/ 19 | */src/main/webapp/resources/node_modules/ 20 | */node_modules/ 21 | atlassian-ide-plugin.xml 22 | RemoteSystemsTempFiles 23 | Servers 24 | bin 25 | .metadata 26 | target 27 | .DS_Store 28 | .settings 29 | .classpath 30 | .project 31 | project 32 | classpath 33 | HadoopCryptoCompressor 34 | .idea 35 | .iml 36 | .orig 37 | .java.orig 38 | syntax: glob 39 | java-driver 40 | ioenl 41 | derby.log 42 | mancenter-2.5 43 | dependency-reduced-pom.xml 44 | test_warehouses 45 | localhost 46 | jawsSchemaFolder 47 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2014 Atigeo, LLC. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- /jaws-hive-sql-rest/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2014 Atigeo, LLC. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/main/assembly/archive.xml: -------------------------------------------------------------------------------- 1 | 5 | archive 6 | 7 | tar.gz 8 | 9 | 10 | 11 | ${project.build.directory}/temp_build 12 | / 13 | 14 | 15 | ${basedir}/conf 16 | /conf 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | spray.can.server { 2 | # uncomment the next line for making this an HTTPS example 3 | # ssl-encryption = on 4 | idle-timeout = 301 s 5 | request-timeout = 300 s 6 | } 7 | 8 | 9 | 10 | ######### application configuration ################### 11 | appConf{ 12 | # the interface on which to start the spray server : localhost/ip/hostname 13 | server.interface=localhost 14 | # the cors filter allowed hosts 15 | cors-filter-allowed-hosts="*" 16 | # implicit akka timeout 17 | timeout=1000000 18 | #app port 19 | web.services.port=7080 20 | #where to log: app.logging.type = cassandra/hdfs 21 | app.logging.type=cassandra 22 | # the number of threads used to execute hive commands 23 | nr.of.threads=10 24 | 25 | } 26 | 27 | 28 | ########## hadoop configuration - skip this if you are using cassandra logging ######## 29 | hadoopConf { 30 | namenode="hdfs://devbox.local:8020" 31 | replicationFactor=1 32 | # set on true if you want to start fresh (all the existing folders will be recreated) 33 | forcedMode=false 34 | # folder where to write the logs 35 | loggingFolder=jawsLogs 36 | # folder where to write the jobs states 37 | stateFolder=jawsStates 38 | # folder where to write the jobs details 39 | detailsFolder=jawsDetails 40 | # folder where to write the jobs results 41 | resultsFolder=jawsResultsFolder 42 | # folder where to write the jobs meta information 43 | metaInfoFolder=jawsMetainfoFolder 44 | # folder where to write the name of query information 45 | queryNameFolder=jawsQueryNameFolder 46 | # folder where to write the published queries 47 | queryPublishedFolder=jawsQueryPublishedFolder 48 | # folder where to write the unpublished queries 49 | queryUnpublishedFolder=jawsQueryUnpublishedFolder 50 | # folder where to write the parquet tables information 51 | parquetTablesFolder=parquetTablesFolder 52 | } 53 | 54 | ########## cassandra configuration - skip this if you are using hdfs logging ########## 55 | cassandraConf { 56 | cassandra.host="devbox.local:9160" 57 | cassandra.keyspace=xpatterns_jaws 58 | cassandra.cluster.name=Jaws 59 | } -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # 3 | # The following properties set the logging levels and log appender. The 4 | # log4j.rootCategory variable defines the default log level and one or more 5 | # appenders. For the console, use 'S'. For the daily rolling file, use 'R'. 6 | # For an HTML formatted log, use 'H'. 7 | # 8 | # To override the default (rootCategory) log level, define a property of the 9 | # form (see below for available values): 10 | # 11 | # log4j.logger. = 12 | # 13 | # Available logger names: 14 | # TODO 15 | # 16 | # Possible Log Levels: 17 | # FATAL, ERROR, WARN, INFO, DEBUG 18 | # 19 | #------------------------------------------------------------------------------ 20 | 21 | #log4j.category.me.prettyprint.cassandra = INFO, dataConsole, dataFile 22 | #log4j.additivity.me.prettyprint.cassandra = false 23 | #log4j.category.DATA = INFO, dataConsole, dataFile 24 | #log4j.additivity.DATA = false 25 | 26 | log4j.rootCategory = INFO, defaultConsole, defaultFile 27 | 28 | #log4j.category.com.xpatterns.xrelevance.content.data = INFO, dataConsole, dataFile 29 | #log4j.additivity.com.xpatterns.xrelevance.content.data = false 30 | #log4j.category.com.xpatterns.xrelevance.configuration.data = INFO, dataConsole, dataFile 31 | #log4j.additivity.com.xpatterns.xrelevance.configuration.data = false 32 | #log4j.category.com.xpatterns.xrelevance.data = INFO, dataConsole, dataFile 33 | #log4j.additivity.com.xpatterns.xrelevance.data = false 34 | 35 | 36 | #------------------------------------------------------------------------------ 37 | # 38 | # The following properties configure the console (stdout) appender. 39 | # See http://logging.apache.org/log4j/docs/api/index.html for details. 40 | # 41 | #------------------------------------------------------------------------------ 42 | log4j.appender.defaultConsole = org.apache.log4j.ConsoleAppender 43 | log4j.appender.defaultConsole.layout = org.apache.log4j.PatternLayout 44 | log4j.appender.defaultConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 45 | 46 | #------------------------------------------------------------------------------ 47 | # 48 | # The following properties configure the Daily Rolling File appender. 49 | # See http://logging.apache.org/log4j/docs/api/index.html for details. 50 | # 51 | #------------------------------------------------------------------------------ 52 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender 53 | log4j.appender.defaultFile.File = jaws-hive-sql-rest.log 54 | log4j.appender.defaultFile.Append = true 55 | log4j.appender.defaultFile.DatePattern = '.'yyy-MM-dd 56 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout 57 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 58 | 59 | #console for data project 60 | #log4j.appender.dataConsole = org.apache.log4j.ConsoleAppender 61 | #log4j.appender.dataConsole.layout = org.apache.log4j.PatternLayout 62 | #log4j.appender.dataConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 63 | 64 | #file for data project 65 | #log4j.appender.dataFile = org.apache.log4j.DailyRollingFileAppender 66 | #log4j.appender.dataFile.File = ${catalina.home}/logs/xpatterns-api-data-4.0.log 67 | #log4j.appender.dataFile.Append = true 68 | #log4j.appender.dataFile.DatePattern = '.'yyy-MM-dd 69 | #log4j.appender.dataFile.layout = org.apache.log4j.PatternLayout 70 | #log4j.appender.dataFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/main/scala/apiactors/HiveRunnerActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import server.Configuration 4 | import sys.process._ 5 | import scala.collection.mutable.ListBuffer 6 | import akka.actor.Actor 7 | import scala.util.Try 8 | import scala.util.Success 9 | import scala.util.Failure 10 | import com.xpatterns.jaws.data.contracts.DAL 11 | import customs.CommandsProcessor._ 12 | import customs.ResultsProcessor._ 13 | import java.io.ByteArrayOutputStream 14 | import java.io.OutputStreamWriter 15 | import sys.process._ 16 | import scala.concurrent.ExecutionContext 17 | import java.util.concurrent.Executors 18 | import java.util.UUID 19 | import com.xpatterns.jaws.data.utils.QueryState 20 | import scala.concurrent._ 21 | import java.io.ByteArrayInputStream 22 | import java.io.InputStreamReader 23 | import java.io.BufferedReader 24 | import scala.io.Source 25 | import scala.io.BufferedSource 26 | import com.xpatterns.jaws.data.utils.Utils._ 27 | 28 | /** 29 | * Created by emaorhian 30 | */ 31 | 32 | case class RunQueryMessage(script: String, limit: Int) 33 | case class ErrorMessage(message: String) 34 | 35 | class HiveRunnerActor(dals: DAL) extends Actor { 36 | 37 | override def receive = { 38 | 39 | case message: RunQueryMessage => { 40 | Configuration.log4j.info(s"[HiveRunnerActor]: Running script=${message.script}") 41 | val uuid = System.currentTimeMillis() + UUID.randomUUID().toString() 42 | implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(Configuration.nrOfThreads.getOrElse("10").toInt)) 43 | var script = "" 44 | 45 | val startTime = System.currentTimeMillis() 46 | dals.loggingDal.setTimestamp(uuid, startTime) 47 | 48 | val tryPreRunScript = Try { 49 | writeLaunchStatus(uuid, message.script) 50 | script = prepareCommands(message.script, message.limit) 51 | } 52 | 53 | tryPreRunScript match { 54 | case Success(v) => sender ! uuid 55 | case Failure(e) => sender ! ErrorMessage(s"Run hive query failed with the following message: ${getCompleteStackTrace(e)}") 56 | } 57 | 58 | val runResponse = future { 59 | Configuration.log4j.info(s"[HiveRunnerActor]: Executing commands $script") 60 | runHiveScript(script, uuid) 61 | } 62 | 63 | runResponse onComplete { 64 | case Success(s) => { 65 | val message = s"[HiveRunnerActor]: Query $uuid has successfully finished" 66 | dals.resultsDal.setResults(uuid, s) 67 | setStatus(uuid, message, QueryState.DONE) 68 | 69 | val executionTime = System.currentTimeMillis() - startTime 70 | dals.loggingDal.setExecutionTime(uuid, executionTime) 71 | } 72 | case Failure(e) => { 73 | val message = s"[HiveRunnerActor]: Query $uuid has failed with the following exception ${getCompleteStackTrace(e)}" 74 | setStatus(uuid, message, QueryState.FAILED) 75 | } 76 | } 77 | } 78 | } 79 | 80 | private def runHiveScript(script: String, uuid: String) = { 81 | val stdOutOS = new ByteArrayOutputStream 82 | val osWriter = new OutputStreamWriter(stdOutOS) 83 | 84 | val command = Seq("hive", "-e", script) 85 | 86 | try { 87 | command ! ProcessLogger( 88 | stdOutLine => osWriter.write(s"$stdOutLine\n"), 89 | stdErrLine => { 90 | Configuration.log4j.info(stdErrLine) 91 | dals.loggingDal.addLog(uuid, "hive", System.currentTimeMillis(), stdErrLine) 92 | }) 93 | osWriter flush () 94 | 95 | getLastResults(new ByteArrayInputStream(stdOutOS.toByteArray())) 96 | 97 | } finally { 98 | if (osWriter != null) osWriter close () 99 | } 100 | } 101 | 102 | private def writeLaunchStatus(uuid: String, script: String) { 103 | dals.loggingDal.addLog(uuid, "hive", System.currentTimeMillis(), s"Launching task for $uuid") 104 | dals.loggingDal.setState(uuid, QueryState.IN_PROGRESS) 105 | dals.loggingDal.setScriptDetails(uuid, script) 106 | } 107 | 108 | private def setStatus(uuid: String, message: String, status: QueryState.Value) { 109 | Configuration.log4j.info(message) 110 | dals.loggingDal.addLog(uuid, "hive", System.currentTimeMillis(), message) 111 | dals.loggingDal.setState(uuid, status) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/main/scala/customs/CORSDirectives.scala: -------------------------------------------------------------------------------- 1 | package customs 2 | 3 | import spray.http._ 4 | import spray.routing._ 5 | import spray.http.HttpHeaders._ 6 | import spray.http.HttpMethod 7 | /** 8 | * Created by emaorhian 9 | */ 10 | trait CORSDirectives { this: HttpService => 11 | private def respondWithCORSHeaders(origin: String, rh: Seq[HttpHeader]) = { 12 | var headers: List[HttpHeader] = List( 13 | HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(List(origin))), 14 | HttpHeaders.`Access-Control-Allow-Credentials`(true), 15 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid") 16 | ) ++ rh.toList 17 | 18 | respondWithHeaders(headers) 19 | } 20 | private def respondWithCORSHeadersAllOrigins(rh: Seq[HttpHeader]) = { 21 | var headers: List[HttpHeader] = List( 22 | HttpHeaders.`Access-Control-Allow-Origin`(AllOrigins), 23 | HttpHeaders.`Access-Control-Allow-Credentials`(true), 24 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid") 25 | ) ++ rh.toList 26 | 27 | respondWithHeaders(headers) 28 | } 29 | 30 | def corsFilter(origins: List[String], rh: HttpHeader*)(route: Route) = 31 | if (origins.contains("*")) 32 | respondWithCORSHeadersAllOrigins(rh)(route) 33 | else 34 | optionalHeaderValueByName("Origin") { 35 | case None => 36 | route 37 | case Some(clientOrigin) => { 38 | if (origins.contains(clientOrigin)) 39 | respondWithCORSHeaders(clientOrigin, rh)(route) 40 | else { 41 | // Maybe, a Rejection will fit better 42 | complete(StatusCodes.Forbidden, "Invalid origin") 43 | } 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/main/scala/customs/CommandsProcessor.scala: -------------------------------------------------------------------------------- 1 | package customs 2 | 3 | import scala.collection.mutable.ListBuffer 4 | import org.apache.commons.lang.StringUtils 5 | import org.apache.commons.lang.RandomStringUtils 6 | import scala.collection.mutable.ListBuffer 7 | 8 | class CommandsProcessor 9 | object CommandsProcessor { 10 | 11 | val MORE_THAT_ONE_SELECT_EXCEPTION_MESSAGE = "The query must contain only one select, at the end" 12 | val QUERY_DELIMITATOR = "_jaws_query_delimitator_" 13 | 14 | def prepareCommands(script: String, numberOfResults: Int) = { 15 | val commandList = filterCommands(script) 16 | 17 | val commandsNb = commandList.size 18 | val firstCommands = commandList.take(commandsNb - 1) map (command => if (command.trim().toLowerCase().startsWith("select")) limitQuery(numberOfResults, command) else command) 19 | val lastCommand = if (commandList(commandsNb - 1).trim().toLowerCase().startsWith("select")) limitQuery(numberOfResults, commandList(commandsNb - 1)) else commandList(commandsNb - 1) 20 | 21 | firstCommands += ("set hive.cli.print.header=true", s"select '$QUERY_DELIMITATOR'", lastCommand) 22 | 23 | firstCommands addString (new StringBuilder, ";") toString 24 | } 25 | 26 | def filterCommands(script: String) = { 27 | val commandsList = ListBuffer.empty[String] 28 | script.split(";").foreach(oneCmd => { 29 | var command = oneCmd.trim() 30 | val trimmedCmd = oneCmd.trim() 31 | if (command.endsWith("\\")) { 32 | command = StringUtils.chop(command) + ";" 33 | } 34 | 35 | if (StringUtils.isBlank(command) == false) { 36 | commandsList += command 37 | } 38 | 39 | }) 40 | commandsList 41 | } 42 | 43 | def limitQuery(numberOfResults: Long, cmd: String): String = { 44 | val temporaryTableName = RandomStringUtils.randomAlphabetic(10) 45 | // take only x results 46 | return s"select $temporaryTableName.* from ( $cmd ) $temporaryTableName limit $numberOfResults" 47 | } 48 | } -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/main/scala/customs/ResultsProcessor.scala: -------------------------------------------------------------------------------- 1 | package customs 2 | 3 | import java.io.ByteArrayInputStream 4 | import scala.io.Source 5 | import customs.CommandsProcessor._ 6 | import com.xpatterns.jaws.data.DTO.Column 7 | import org.apache.spark.sql.catalyst.expressions.Row 8 | import com.xpatterns.jaws.data.utils.ResultsConverter 9 | import org.apache.spark.sql.types._ 10 | 11 | class ResultsProcessor 12 | object ResultsProcessor { 13 | val headerMatcher = "([^.]*?\\.)?(.+)".r 14 | 15 | def getLastResults(inputStream: ByteArrayInputStream): ResultsConverter = { 16 | val reader = Source.fromInputStream(inputStream) 17 | try { 18 | val lastCmdResults = reader getLines () dropWhile (!_.equals(QUERY_DELIMITATOR)) toList 19 | val headers = toStructType(getHeader(lastCmdResults(1))) 20 | val results = getResults(lastCmdResults drop 2) 21 | new ResultsConverter(headers, results) 22 | 23 | } finally if (reader != null) reader close () 24 | } 25 | 26 | def getHeader(headerLine: String): Array[String] = { 27 | headerLine split "\t" map (column => headerMatcher replaceAllIn (column, m => m group 2)) 28 | } 29 | 30 | def toStructType (headers : Array[String]) : StructType = { 31 | val fields = headers map (column => new StructField(column, StringType, true)) 32 | new StructType(fields) 33 | } 34 | 35 | def getResults(resultLines: List[String]): Array[Row] = { 36 | val resultsArray = resultLines map (line => line split "\t") toArray 37 | val result = resultsArray map (arr => Row.fromSeq(arr)) 38 | result 39 | } 40 | } -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | spray.can.server { 2 | # uncomment the next line for making this an HTTPS example 3 | # ssl-encryption = on 4 | idle-timeout = 301 s 5 | request-timeout = 300 s 6 | } 7 | 8 | 9 | 10 | ######### application configuration ################### 11 | appConf{ 12 | # the interface on which to start the spray server : localhost/ip/hostname 13 | server.interface=localhost 14 | # the cors filter allowed hosts 15 | cors-filter-allowed-hosts="*" 16 | # implicit akka timeout 17 | timeout=1000000 18 | #app port 19 | web.services.port=7080 20 | #where to log: app.logging.type = cassandra/hdfs 21 | app.logging.type=cassandra 22 | # the number of threads used to execute shark commands 23 | nr.of.threads=10 24 | } 25 | 26 | ########## hadoop configuration - skip this if you are using cassandra logging ######## 27 | hadoopConf { 28 | namenode="hdfs://devbox.local:8020" 29 | replicationFactor=1 30 | # set on true if you want to start fresh (all the existing folders will be recreated) 31 | forcedMode=false 32 | # folder where to write the logs 33 | loggingFolder=jawsLogs 34 | # folder where to write the jobs states 35 | stateFolder=jawsStates 36 | # folder where to write the jobs details 37 | detailsFolder=jawsDetails 38 | # folder where to write the jobs results 39 | resultsFolder=jawsResultsFolder 40 | # folder where to write the jobs meta information 41 | metaInfoFolder=jawsMetainfoFolder 42 | # folder where to write the name of query information 43 | queryNameFolder=jawsQueryNameFolder 44 | # folder where to write the published queries 45 | queryPublishedFolder=jawsQueryPublishedFolder 46 | # folder where to write the unpublished queries 47 | queryUnpublishedFolder=jawsQueryUnpublishedFolder 48 | # folder where to write the parquet tables information 49 | parquetTablesFolder=parquetTablesFolder 50 | 51 | 52 | 53 | } 54 | 55 | ########## cassandra configuration - skip this if you are using hdfs logging ########## 56 | cassandraConf { 57 | cassandra.host="devbox.local:9160" 58 | cassandra.keyspace=xpatterns_jaws 59 | cassandra.cluster.name=Jaws 60 | } -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/test/scala/CommandsProcessorTest.scala: -------------------------------------------------------------------------------- 1 | import org.junit.runner.RunWith 2 | import org.scalatest.FunSuite 3 | import org.scalatest.junit.JUnitRunner 4 | import customs.CommandsProcessor._ 5 | import scala.collection.mutable.ListBuffer 6 | import scala.util.Try 7 | import org.scalatest.Matchers._ 8 | @RunWith(classOf[JUnitRunner]) 9 | class CommandsProcessorTest extends FunSuite { 10 | 11 | test("filterCommands : ok") { 12 | val filteredResults = filterCommands("use databaseName ;show tables; ;select * from table") 13 | 14 | assert(filteredResults.size === 3, "Different number of commands") 15 | assert(filteredResults === ListBuffer("use databaseName", "show tables", "select * from table")) 16 | 17 | } 18 | 19 | test("test the used regex") { 20 | val filteredResults = "select\\s+([\\w]+)\\.\\* from \\( select \\* from table \\) ([\\w]+) limit 2" 21 | "select adda.* from ( select * from table ) adda limit 2" should fullyMatch regex filteredResults 22 | 23 | } 24 | 25 | test("prepareCommands : ok-last command is a select") { 26 | 27 | val tryPrepareCommands = Try(prepareCommands("use databaseName ;show tables; ;select * from table", 2)) 28 | val requiredCommandString = s"use databaseName;show tables;set hive.cli.print.header=true;select '$QUERY_DELIMITATOR';select\\s+([\\w]+)\\.\\* from \\( select \\* from table \\) ([\\w]+) limit 2" 29 | 30 | assert(tryPrepareCommands.isSuccess, "Prepare commands failed") 31 | val returnedCommandString = tryPrepareCommands.get 32 | returnedCommandString should fullyMatch regex requiredCommandString 33 | } 34 | 35 | test("prepareCommands : ok-last command is not a select") { 36 | 37 | val tryPrepareCommands = Try(prepareCommands("use databaseName ;show tables; ;show tables", 2)) 38 | val requiredCommandString = s"use databaseName;show tables;set hive.cli.print.header=true;select '$QUERY_DELIMITATOR';show tables" 39 | 40 | assert(tryPrepareCommands.isSuccess, "Prepare commands failed") 41 | val returnedCommandString = tryPrepareCommands.get 42 | returnedCommandString should be (requiredCommandString) 43 | } 44 | 45 | test("prepareCommands : ok-one command") { 46 | 47 | val tryPrepareCommands = Try(prepareCommands("show databases", 2)) 48 | val requiredCommandString = s"set hive.cli.print.header=true;select '$QUERY_DELIMITATOR';show databases" 49 | 50 | assert(tryPrepareCommands.isSuccess, "Prepare commands failed") 51 | val returnedCommandString = tryPrepareCommands.get 52 | returnedCommandString should fullyMatch regex requiredCommandString 53 | } 54 | 55 | } -------------------------------------------------------------------------------- /jaws-hive-sql-rest/src/test/scala/ResultsProcessorTest.scala: -------------------------------------------------------------------------------- 1 | import org.junit.runner.RunWith 2 | import org.scalatest.junit.JUnitRunner 3 | import org.scalatest.FunSuite 4 | import scala.util.Try 5 | import customs.ResultsProcessor._ 6 | import org.scalatest.Matchers._ 7 | import java.io.ByteArrayOutputStream 8 | import java.io.OutputStreamWriter 9 | import customs.CommandsProcessor._ 10 | import java.io.ByteArrayInputStream 11 | import com.xpatterns.jaws.data.DTO.Column 12 | import org.apache.spark.sql.catalyst.expressions.Row 13 | import com.xpatterns.jaws.data.utils.ResultsConverter 14 | import scala.collection.mutable.WrappedArray 15 | import org.apache.spark.sql.types._ 16 | 17 | @RunWith(classOf[JUnitRunner]) 18 | class ResultsProcessorTest extends FunSuite { 19 | 20 | test("getHeader : columns with .") { 21 | 22 | val headers = getHeader("mzzmjgycpp.name mzzmjgycpp.age mzzmjgycpp.sex") 23 | val requiredHeaders = Array("name", "age", "sex") 24 | 25 | headers should be(requiredHeaders) 26 | } 27 | 28 | test("getHeader : columns without .") { 29 | 30 | val headers = getHeader("name age sex") 31 | val requiredHeaders = Array("name", "age", "sex") 32 | 33 | headers should be(requiredHeaders) 34 | } 35 | 36 | test("getResults") { 37 | 38 | val results = getResults(List("name age sex", "name1 age1 sex1", "name2 age2 sex2")) 39 | val requiredResults = Array(Row.fromSeq(Array("name", "age", "sex")), Row.fromSeq(Array("name1", "age1", "sex1")), 40 | Row.fromSeq(Array("name2", "age2", "sex2"))) 41 | 42 | results should be(requiredResults) 43 | } 44 | 45 | test("get Last Results") { 46 | 47 | val stdOutOS = new ByteArrayOutputStream 48 | val osWriter = new OutputStreamWriter(stdOutOS) 49 | osWriter.write("db1\n") 50 | osWriter.write("db2\n") 51 | osWriter.write("db3\n") 52 | osWriter.write(s"$QUERY_DELIMITATOR\n") 53 | osWriter.write("mzzmjgycpp.name mzzmjgycpp.age mzzmjgycpp.sex\n") 54 | osWriter.write("name age sex\n") 55 | osWriter.write("name1 age1 sex1\n") 56 | osWriter.write("name2 age2 sex2") 57 | 58 | osWriter.flush() 59 | 60 | val results = getLastResults(new ByteArrayInputStream(stdOutOS.toByteArray())) 61 | val requiredSchema = new StructType(Array(StructField("name", StringType, true), StructField("age", StringType, true), StructField("sex", StringType, true))) 62 | val requiredResults = new ResultsConverter(requiredSchema, 63 | Array(Row.fromSeq(Array("name", "age", "sex")), Row.fromSeq(Array("name1", "age1", "sex1")), Row.fromSeq(Array("name2", "age2", "sex2")))) 64 | 65 | osWriter.close() 66 | assert(results.schema === requiredResults.schema, "Not the same schema") 67 | assert(results.result === requiredResults.result, "Not the same results") 68 | } 69 | 70 | test("get Last Results - no results") { 71 | 72 | val stdOutOS = new ByteArrayOutputStream 73 | val osWriter = new OutputStreamWriter(stdOutOS) 74 | osWriter.write("db1\n") 75 | osWriter.write("db2\n") 76 | osWriter.write("db3\n") 77 | osWriter.write(s"$QUERY_DELIMITATOR\n") 78 | osWriter.write("mzzmjgycpp.name mzzmjgycpp.age mzzmjgycpp.sex\n") 79 | 80 | osWriter.flush() 81 | 82 | val results = getLastResults(new ByteArrayInputStream(stdOutOS.toByteArray())) 83 | val requiredResults = new ResultsConverter(StructType(Array(StructField("name", StringType, true), StructField("age", StringType, true), StructField("sex", StringType, true))), Array.empty) 84 | 85 | osWriter.close() 86 | assert(results.schema === requiredResults.schema, "Not the same schema") 87 | assert(results.result === requiredResults.result, "Not the same results") 88 | } 89 | 90 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2014 Atigeo, LLC. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/AvroBinaryResult.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | import spray.json.DefaultJsonProtocol._ 3 | import java.io.ByteArrayOutputStream 4 | import java.io.ObjectOutputStream 5 | import java.io.ObjectInputStream 6 | import java.io.ByteArrayInputStream 7 | import org.apache.avro.generic.GenericRecord 8 | import org.apache.avro.Schema 9 | import spray.json.RootJsonFormat 10 | import org.apache.avro.generic.GenericDatumWriter 11 | import org.apache.avro.io.EncoderFactory 12 | import org.apache.avro.generic.GenericDatumReader 13 | import org.apache.avro.io.DecoderFactory 14 | 15 | case class AvroBinaryResult(schema: Schema, result: Array[Byte]) { 16 | def this() = { 17 | this(null, Array.empty) 18 | } 19 | 20 | def this(avroResult : AvroResult) = { 21 | this(avroResult.schema, avroResult.serializeResult()) 22 | } 23 | 24 | override def hashCode(): Int = { 25 | val prime = 31 26 | var result = 1 27 | Option(result) match { 28 | case None => result = prime * result + 0 29 | case _ => result = prime * result + result.hashCode() 30 | } 31 | Option(schema) match { 32 | case None => result = prime * result + 0 33 | case _ => result = prime * result + schema.hashCode() 34 | } 35 | 36 | result 37 | } 38 | 39 | override def equals(other: Any): Boolean = { 40 | 41 | other match { 42 | 43 | case that: AvroBinaryResult => 44 | (that canEqual this) && 45 | result.deep == that.result.deep && 46 | schema == that.schema 47 | 48 | case _ => false 49 | } 50 | } 51 | 52 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/AvroResult.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | import spray.json.DefaultJsonProtocol._ 3 | import java.io.ByteArrayOutputStream 4 | import java.io.ObjectOutputStream 5 | import java.io.ObjectInputStream 6 | import java.io.ByteArrayInputStream 7 | import org.apache.avro.generic.GenericRecord 8 | import org.apache.avro.Schema 9 | import spray.json.RootJsonFormat 10 | import org.apache.avro.generic.GenericDatumWriter 11 | import org.apache.avro.io.EncoderFactory 12 | import org.apache.avro.generic.GenericDatumReader 13 | import org.apache.avro.io.DecoderFactory 14 | import scala.collection.mutable.ArrayBuffer 15 | import org.apache.avro.generic.IndexedRecord 16 | import org.apache.avro.file.SeekableByteArrayInput 17 | import org.apache.avro.file.FileReader 18 | import org.apache.avro.file.DataFileReader 19 | import org.apache.avro.file.SeekableInput 20 | import org.apache.avro.file.DataFileWriter 21 | 22 | case class AvroResult(schema: Schema, result: Array[GenericRecord]) { 23 | 24 | def this() = { 25 | this(null, Array.empty) 26 | } 27 | 28 | override def hashCode(): Int = { 29 | val prime = 31 30 | var result = 1 31 | Option(result) match { 32 | case None => result = prime * result + 0 33 | case _ => result = prime * result + result.hashCode() 34 | } 35 | Option(schema) match { 36 | case None => result = prime * result + 0 37 | case _ => result = prime * result + schema.hashCode() 38 | } 39 | 40 | result 41 | } 42 | 43 | override def equals(other: Any): Boolean = { 44 | 45 | other match { 46 | 47 | case that: AvroResult => 48 | (that canEqual this) && 49 | result.deep == that.result.deep && 50 | schema == that.schema 51 | 52 | case _ => false 53 | } 54 | } 55 | 56 | def serializeResult(): Array[Byte] = { 57 | val datumWriter = new GenericDatumWriter[GenericRecord](schema) 58 | val baos = new ByteArrayOutputStream() 59 | val fileWriter = new DataFileWriter[GenericRecord](datumWriter) 60 | fileWriter.create(schema, baos) 61 | val binaryResults = result map (row => { 62 | fileWriter.append(row) 63 | }) 64 | 65 | fileWriter.close() 66 | baos.toByteArray() 67 | } 68 | 69 | override def toString() = { 70 | var s = s"schema = ${schema.toString()} \n results = " 71 | result.foreach { r => s+= r.toString() } 72 | s 73 | } 74 | } 75 | object AvroResult { 76 | 77 | def deserializeResult(byteArray: Array[Byte], schema: Schema): Array[GenericRecord] = { 78 | val reader = new GenericDatumReader[GenericRecord](schema) 79 | val in = new SeekableByteArrayInput(byteArray) 80 | 81 | var dfr: FileReader[GenericRecord] = null 82 | val records = ArrayBuffer[GenericRecord]() 83 | try { 84 | dfr = DataFileReader.openReader(in, reader); 85 | while (dfr.hasNext()) { 86 | records += dfr.next() 87 | } 88 | 89 | } finally { 90 | if (dfr != null) { 91 | dfr.close(); 92 | } 93 | } 94 | 95 | records.toArray 96 | } 97 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Column.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | import spray.json.DefaultJsonProtocol._ 3 | import spray.json.JsonFormat 4 | 5 | /** 6 | * Created by emaorhian 7 | */ 8 | case class Column(name: String, dataType: String, comment: String, members: Array[Column]) { 9 | 10 | def this() = { 11 | this("", "", "", Array.empty) 12 | } 13 | 14 | override def hashCode(): Int = { 15 | val prime = 31 16 | var result = 1 17 | Option(name) match { 18 | case None => result = prime * result + 0 19 | case _ => result = prime * result + name.hashCode() 20 | } 21 | Option(dataType) match { 22 | case None => result = prime * result + 0 23 | case _ => result = prime * result + dataType.hashCode() 24 | } 25 | Option(comment) match { 26 | case None => result = prime * result + 0 27 | case _ => result = prime * result + comment.hashCode() 28 | } 29 | Option(members) match { 30 | case None => result = prime * result + 0 31 | case _ => result = prime * result + members.hashCode() 32 | } 33 | 34 | result 35 | } 36 | 37 | override def equals(other: Any): Boolean = { 38 | 39 | other match { 40 | 41 | case that: Column => 42 | (that canEqual this) && 43 | name == that.name && 44 | dataType == that.dataType && 45 | comment == that.comment && 46 | members.deep == that.members.deep 47 | 48 | case _ => false 49 | } 50 | } 51 | } 52 | 53 | object Column { 54 | implicit val columnJsonFormat: JsonFormat[Column] = lazyFormat(jsonFormat(Column.apply, "name", "dataType","comment", "members")) 55 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/CustomResult.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import spray.json.DefaultJsonProtocol._ 4 | import spray.json.RootJsonFormat 5 | 6 | case class CustomResult(schema: Array[Column], result: Array[Array[Any]]) { 7 | 8 | def this() = { 9 | this(Array.empty, Array.empty) 10 | } 11 | 12 | override def hashCode(): Int = { 13 | val prime = 31 14 | var result = 1 15 | Option(result) match { 16 | case None => result = prime * result + 0 17 | case _ => result = prime * result + result.hashCode() 18 | } 19 | Option(schema) match { 20 | case None => result = prime * result + 0 21 | case _ => result = prime * result + schema.hashCode() 22 | } 23 | 24 | result 25 | } 26 | 27 | override def equals(other: Any): Boolean = { 28 | 29 | other match { 30 | 31 | case that: CustomResult => 32 | (that canEqual this) && 33 | result.deep == that.result.deep && 34 | schema.deep == that.schema.deep 35 | 36 | case _ => false 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Databases.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import spray.json.DefaultJsonProtocol._ 4 | 5 | /** 6 | * Created by emaorhian 7 | */ 8 | case class Databases(databases: Array[String]) 9 | 10 | object Databases { 11 | implicit val databasesJson = jsonFormat1(apply) 12 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Log.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import spray.json.DefaultJsonProtocol._ 4 | 5 | 6 | /** 7 | * Created by emaorhian 8 | */ 9 | case class Log(log: String, queryID: String, timestamp: Long) 10 | 11 | object Log { 12 | implicit val logJson = jsonFormat3(apply) 13 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Logs.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | import spray.json.DefaultJsonProtocol._ 3 | 4 | /** 5 | * Created by emaorhian 6 | */ 7 | case class Logs (logs : Array[Log], status: String) 8 | 9 | object Logs { 10 | implicit val logsJson = jsonFormat2(apply) 11 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/ParquetTable.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | import spray.json.DefaultJsonProtocol._ 3 | 4 | case class ParquetTable(name: String, filePath: String, namenode : String){ 5 | def this() = { 6 | this("","","") 7 | } 8 | } 9 | object ParquetTable { 10 | implicit val logJson = jsonFormat3(apply) 11 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Queries.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import spray.json.DefaultJsonProtocol.arrayFormat 4 | import spray.json.DefaultJsonProtocol.jsonFormat1 5 | 6 | /** 7 | * Created by emaorhian 8 | */ 9 | case class Queries (queries : Array[Query]) 10 | 11 | object Queries { 12 | implicit val queriesJson = jsonFormat1(apply) 13 | 14 | 15 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Query.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import spray.json.DefaultJsonProtocol._ 4 | 5 | /** 6 | * Created by emaorhian 7 | */ 8 | case class Query(state: String, queryID: String, query: String, metaInfo : QueryMetaInfo) 9 | 10 | object Query { 11 | implicit val logJson = jsonFormat4(apply) 12 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/QueryMetaInfo.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import org.apache.log4j.Logger 4 | import spray.json.DefaultJsonProtocol._ 5 | import spray.json._ 6 | import scala.collection.mutable 7 | 8 | 9 | /** 10 | * Created by emaorhian 11 | */ 12 | case class QueryMetaInfo(var name:Option[String], var description:Option[String], var published:Option[Boolean], var timestamp:Long, var executionTime:Long, 13 | var nrOfResults:Long, var maxNrOfResults:Long, var resultsDestination:Int, 14 | var isLimited:Boolean){ 15 | // resultsDestination : 0-cassandra, 1-hdfs, 2-tachyon 16 | def this() = { 17 | this(None, None, None, 0, 0, 0, 0, 0, false) 18 | } 19 | 20 | def this(nrOfResults : Long, maxNrOfResults : Long, resultsDestination : Int, isLimited : Boolean) = { 21 | this(None, None, None, 0, 0, nrOfResults, maxNrOfResults, resultsDestination, isLimited) 22 | } 23 | 24 | } 25 | 26 | object QueryMetaInfo { 27 | val logger = Logger.getLogger("QueryMetaInfo") 28 | 29 | // A custom json format is defined because some fields might be missing. 30 | implicit val logJson = new RootJsonFormat[QueryMetaInfo] { 31 | def write(metaInfo: QueryMetaInfo):JsValue = { 32 | val fields:mutable.Map[String, JsValue] = mutable.Map.empty[String, JsValue] 33 | 34 | // Don't serialize the null values of name and description because this value means that they are deleted. 35 | val queryHasName = if (metaInfo.name != None && metaInfo.name.get != null) { 36 | fields("name") = JsString(metaInfo.name.get) 37 | true 38 | } else { 39 | false 40 | } 41 | 42 | // Write the description or published only when the query has a name 43 | // to make sure that these properties are not visible 44 | if (metaInfo.description != None && metaInfo.description.get != null && queryHasName) { 45 | fields("description") = JsString(metaInfo.description.get) 46 | } 47 | 48 | if (metaInfo.published != None && metaInfo.name != None && metaInfo.name.get != null && queryHasName) { 49 | fields("published") = JsBoolean(metaInfo.published.get) 50 | } 51 | 52 | fields("timestamp") = JsNumber(metaInfo.timestamp) 53 | fields("executionTime") = JsNumber(metaInfo.executionTime) 54 | fields("nrOfResults") = JsNumber(metaInfo.nrOfResults) 55 | fields("maxNrOfResults") = JsNumber(metaInfo.maxNrOfResults) 56 | fields("resultsDestination") = JsNumber(metaInfo.resultsDestination) 57 | fields("isLimited") = JsBoolean(metaInfo.isLimited) 58 | 59 | JsObject(fields.toMap) 60 | } 61 | 62 | def read(value: JsValue):QueryMetaInfo = value match { 63 | case JsObject(fields) => 64 | val name = if (fields.contains("name")) { 65 | Some(fields.getOrElse("name", JsNull).convertTo[Option[String]].orNull) 66 | } else { 67 | None 68 | } 69 | 70 | val description = if (fields.contains("description")) { 71 | Some(fields.getOrElse("description", JsNull).convertTo[Option[String]].orNull) 72 | } else { 73 | None 74 | } 75 | 76 | val published = if (fields.contains("published")) { 77 | fields.getOrElse("published", JsNull).convertTo[Option[Boolean]] 78 | } else { 79 | None 80 | } 81 | 82 | val timestamp = fields.getOrElse("timestamp", JsNumber(0)).convertTo[Long] 83 | val executionTime = fields.getOrElse("executionTime", JsNumber(0)).convertTo[Long] 84 | val nrOfResults = fields.getOrElse("nrOfResults", JsNumber(0)).convertTo[Long] 85 | val maxNrOfResults = fields.getOrElse("maxNrOfResults", JsNumber(0)).convertTo[Long] 86 | val resultsDestination = fields.getOrElse("resultsDestination", JsNumber(0)).convertTo[Int] 87 | val isLimited = fields.getOrElse("isLimited", JsFalse).convertTo[Boolean] 88 | 89 | new QueryMetaInfo(name, description, published, timestamp, executionTime, nrOfResults, maxNrOfResults, 90 | resultsDestination, isLimited) 91 | 92 | case _ => deserializationError("Error while trying to parse a QueryMetaInfo") 93 | } 94 | } 95 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Table.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import spray.json.DefaultJsonProtocol._ 4 | 5 | 6 | /** 7 | * Created by emaorhian 8 | */ 9 | case class Table(name: String, columns: Array[Column], extraInfo : Array[Array[String]]) 10 | 11 | object Table { 12 | implicit val logJson = jsonFormat3(apply) 13 | 14 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/DTO/Tables.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.DTO 2 | 3 | import scala.collection.JavaConverters._ 4 | import spray.json.DefaultJsonProtocol._ 5 | 6 | /** 7 | * Created by emaorhian 8 | */ 9 | case class Tables(database: String, tables: Array[Table]) 10 | 11 | object Tables { 12 | implicit val tablesJson = jsonFormat2(apply) 13 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/DAL.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.contracts 2 | 3 | /** 4 | * Created by emaorhian 5 | */ 6 | trait DAL { 7 | 8 | def loggingDal : TJawsLogging 9 | def resultsDal : TJawsResults 10 | def parquetTableDal : TJawsParquetTables 11 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/TJawsLogging.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.contracts 2 | 3 | import com.xpatterns.jaws.data.DTO.QueryMetaInfo 4 | import com.xpatterns.jaws.data.DTO.Logs 5 | import com.xpatterns.jaws.data.DTO.Queries 6 | import com.xpatterns.jaws.data.utils.QueryState 7 | import com.xpatterns.jaws.data.utils.Utils 8 | import com.xpatterns.jaws.data.DTO.Query 9 | 10 | /** 11 | * Created by emaorhian 12 | */ 13 | trait TJawsLogging { 14 | def setState(queryId: String, queryState: QueryState.QueryState) 15 | def setScriptDetails(queryId: String, scriptDetails: String) 16 | def addLog(queryId: String, jobId: String, time: Long, log: String) 17 | 18 | def setExecutionTime(queryId: String, executionTime: Long): Unit = { 19 | Utils.TryWithRetry { 20 | val metaInfo = getMetaInfo(queryId) 21 | metaInfo.executionTime = executionTime 22 | setMetaInfo(queryId, metaInfo) 23 | } 24 | } 25 | 26 | def setTimestamp(queryId: String, time: Long): Unit = { 27 | Utils.TryWithRetry { 28 | val metaInfo = getMetaInfo(queryId) 29 | metaInfo.timestamp = time 30 | setMetaInfo(queryId, metaInfo) 31 | } 32 | } 33 | 34 | def setRunMetaInfo(queryId: String, metainfo: QueryMetaInfo) = { 35 | Utils.TryWithRetry { 36 | val newMetaInfo = getMetaInfo(queryId) 37 | newMetaInfo.nrOfResults = metainfo.nrOfResults 38 | newMetaInfo.maxNrOfResults = metainfo.maxNrOfResults 39 | newMetaInfo.resultsDestination = metainfo.resultsDestination 40 | newMetaInfo.isLimited = metainfo.isLimited 41 | setMetaInfo(queryId, newMetaInfo) 42 | } 43 | } 44 | 45 | def setQueryProperties(queryId: String, name: Option[String], description: Option[String], published:Option[Boolean], 46 | overwrite: Boolean) = { 47 | Utils.TryWithRetry { 48 | val metaInfo = getMetaInfo(queryId) 49 | 50 | if (name != None) { 51 | updateQueryName(queryId, metaInfo, name.get, overwrite) 52 | } 53 | 54 | if (description != None) { 55 | metaInfo.description = description 56 | } 57 | 58 | // When the name of a query is not present, the description and published flags should be removed, 59 | // because they appear only when a query has a name 60 | if (metaInfo.name == None || metaInfo.name.get == null) { 61 | metaInfo.description = None 62 | metaInfo.published = None 63 | } else if (published != None) { 64 | setQueryPublishedStatus(metaInfo.name.get, metaInfo, published.get) 65 | metaInfo.published = published 66 | } 67 | 68 | setMetaInfo(queryId, metaInfo) 69 | } 70 | } 71 | 72 | private def updateQueryName(queryId: String, metaInfo: QueryMetaInfo, name: String, overwrite:Boolean):Unit = { 73 | val newQueryName = if (name != null) name.trim() else null 74 | 75 | if (newQueryName != null && newQueryName.isEmpty) { 76 | return 77 | } 78 | 79 | if (!overwrite) { 80 | if (newQueryName != null && getQueriesByName(newQueryName).queries.nonEmpty) { 81 | // When the query name already exist and the overwrite flag is not set, 82 | // then the client should be warned about it 83 | throw new Exception(s"There is already a query with the name $name. To overwrite " + 84 | s"the query name, please send the parameter overwrite set on true") 85 | } 86 | } else if (newQueryName != null) { 87 | // When overwriting the old values, the old queries should have the name and description reset 88 | val notFoundState = QueryState.NOT_FOUND.toString 89 | for (query <- getQueriesByName(newQueryName).queries) { 90 | if (query.state != notFoundState) { 91 | query.metaInfo.name = None 92 | query.metaInfo.description = None 93 | setMetaInfo(query.queryID, query.metaInfo) 94 | } 95 | } 96 | } 97 | 98 | if (metaInfo.name != None && metaInfo.name.get != null) { 99 | // Delete the old query name 100 | deleteQueryName(metaInfo.name.get) 101 | // Remove the old published status of the query from storage 102 | deleteQueryPublishedStatus(metaInfo.name.get, metaInfo.published) 103 | } 104 | metaInfo.name = Some(newQueryName) 105 | 106 | if (newQueryName != null) { 107 | // Save the query name to be able to search it 108 | saveQueryName(newQueryName, queryId) 109 | 110 | // Set the default published value 111 | val published = metaInfo.published.getOrElse(false) 112 | setQueryPublishedStatus(newQueryName, metaInfo, published) 113 | metaInfo.published = Some(published) 114 | } 115 | } 116 | 117 | def setQueryPublishedStatus(name: String, metaInfo: QueryMetaInfo, published: Boolean) 118 | def deleteQueryPublishedStatus(name: String, published: Option[Boolean]) 119 | 120 | def setMetaInfo(queryId: String, metainfo: QueryMetaInfo) 121 | 122 | def getState(queryId: String): QueryState.QueryState 123 | def getScriptDetails(queryId: String): String 124 | def getLogs(queryId: String, time: Long, limit: Int): Logs 125 | def getMetaInfo(queryId: String): QueryMetaInfo 126 | 127 | def getQueries(queryId: String, limit: Int): Queries 128 | def getQueries(queryIds: Seq[String]): Queries = { 129 | Utils.TryWithRetry { 130 | val queryArray = queryIds map (queryID => new Query(getState(queryID).toString, queryID, getScriptDetails(queryID), getMetaInfo(queryID))) toArray 131 | val queries = new Queries(queryArray) 132 | queries 133 | } 134 | } 135 | 136 | def getPublishedQueries():Array[String] 137 | def getQueriesByName(name:String):Queries 138 | def deleteQueryName(name: String) 139 | def saveQueryName(name: String, queryId: String) 140 | 141 | def deleteQuery(queryId: String) 142 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/TJawsParquetTables.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.contracts 2 | 3 | import com.xpatterns.jaws.data.DTO.ParquetTable 4 | 5 | trait TJawsParquetTables { 6 | def addParquetTable(pTable : ParquetTable) 7 | def deleteParquetTable(name : String) 8 | def listParquetTables() : Array[ParquetTable] 9 | def tableExists(name : String) : Boolean 10 | def readParquetTable(name : String) : ParquetTable 11 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/contracts/TJawsResults.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.contracts 2 | 3 | import com.xpatterns.jaws.data.utils.Utils 4 | import com.xpatterns.jaws.data.utils.ResultsConverter 5 | import spray.json.DefaultJsonProtocol._ 6 | import com.xpatterns.jaws.data.DTO.AvroResult 7 | import com.xpatterns.jaws.data.DTO.CustomResult 8 | 9 | /** 10 | * Created by emaorhian 11 | */ 12 | trait TJawsResults { 13 | def setAvroResults (uuid: String, avroResults : AvroResult) 14 | def getAvroResults(uuid: String) : AvroResult 15 | def setCustomResults(uuid: String, results: CustomResult) 16 | def getCustomResults(uuid: String): CustomResult 17 | 18 | def setResults(uuid: String, results: ResultsConverter) { 19 | Utils.TryWithRetry { 20 | 21 | setAvroResults(uuid, results.toAvroResults()) 22 | setCustomResults(uuid, results.toCustomResults()) 23 | } 24 | } 25 | def deleteResults(uuid: String) 26 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/CassandraDal.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import me.prettyprint.cassandra.model.AllOneConsistencyLevelPolicy 4 | import me.prettyprint.cassandra.service.CassandraHostConfigurator 5 | import me.prettyprint.cassandra.service.ThriftCluster 6 | import me.prettyprint.hector.api.factory.HFactory 7 | import com.xpatterns.jaws.data.contracts.DAL 8 | import com.xpatterns.jaws.data.contracts.TJawsLogging 9 | import com.xpatterns.jaws.data.contracts.TJawsResults 10 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables 11 | 12 | /** 13 | * Created by emaorhian 14 | */ 15 | class CassandraDal (cassandraHost : String, clusterName : String, keyspaceName : String) extends DAL { 16 | val cassandraHostConfigurator = new CassandraHostConfigurator(cassandraHost) 17 | val cluster = new ThriftCluster(clusterName, cassandraHostConfigurator) 18 | val keyspace = HFactory.createKeyspace(keyspaceName, cluster, new AllOneConsistencyLevelPolicy) 19 | 20 | val loggingDal: TJawsLogging = new JawsCassandraLogging(keyspace) 21 | val resultsDal: TJawsResults = new JawsCassandraResults(keyspace) 22 | val parquetTableDal: TJawsParquetTables = new JawsCassandraParquetTables(keyspace) 23 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/HdfsDal.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import com.xpatterns.jaws.data.contracts.DAL 4 | import com.xpatterns.jaws.data.contracts.TJawsLogging 5 | import com.xpatterns.jaws.data.contracts.TJawsResults 6 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables 7 | 8 | /** 9 | * Created by emaorhian 10 | */ 11 | class HdfsDal(configuration: org.apache.hadoop.conf.Configuration) extends DAL { 12 | val loggingDal: TJawsLogging = new JawsHdfsLogging(configuration) 13 | val resultsDal: TJawsResults = new JawsHdfsResults(configuration) 14 | val parquetTableDal: TJawsParquetTables = new JawsHdfsParquetTables(configuration) 15 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/JawsCassandraParquetTables.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables 4 | import com.xpatterns.jaws.data.DTO.ParquetTable 5 | import com.xpatterns.jaws.data.utils.Utils 6 | import org.apache.log4j.Logger 7 | import me.prettyprint.hector.api.beans.Composite 8 | import me.prettyprint.hector.api.factory.HFactory 9 | import me.prettyprint.hector.api.Keyspace 10 | import me.prettyprint.cassandra.serializers.IntegerSerializer 11 | import me.prettyprint.hector.api.Serializer 12 | import me.prettyprint.cassandra.serializers.StringSerializer 13 | import me.prettyprint.cassandra.serializers.CompositeSerializer 14 | import me.prettyprint.cassandra.serializers.LongSerializer 15 | import me.prettyprint.hector.api.query.ColumnQuery 16 | import me.prettyprint.cassandra.model.thrift.ThriftColumnQuery 17 | import spray.json._ 18 | import spray.json.DefaultJsonProtocol._ 19 | 20 | class JawsCassandraParquetTables(keyspace: Keyspace) extends TJawsParquetTables { 21 | 22 | val CF_PARQUET_TABLES = "parquet_tables" 23 | val ROW_ID = "tables" 24 | 25 | val is = IntegerSerializer.get.asInstanceOf[Serializer[Int]] 26 | val ss = StringSerializer.get.asInstanceOf[Serializer[String]] 27 | val cs = CompositeSerializer.get.asInstanceOf[Serializer[Composite]] 28 | val ls = LongSerializer.get.asInstanceOf[Serializer[Long]] 29 | 30 | val logger = Logger.getLogger("JawsCassandraParquetTables") 31 | 32 | override def addParquetTable(pTable: ParquetTable) { 33 | Utils.TryWithRetry { 34 | logger.debug(s"Adding the parquet table ${pTable.name} for the filepath ${pTable.filePath}") 35 | val mutator = HFactory.createMutator(keyspace, ss) 36 | 37 | val valueTouple = (pTable.namenode, pTable.filePath).toJson.prettyPrint 38 | mutator.addInsertion(ROW_ID, CF_PARQUET_TABLES, HFactory.createColumn(pTable.name, valueTouple, ss, ss)) 39 | mutator.execute() 40 | } 41 | } 42 | 43 | override def deleteParquetTable(name: String) { 44 | Utils.TryWithRetry { 45 | logger.debug(s"Deleting parquet table $name") 46 | 47 | val mutator = HFactory.createMutator(keyspace, ss) 48 | 49 | mutator.addDeletion(ROW_ID, CF_PARQUET_TABLES, name, ss) 50 | mutator.execute 51 | } 52 | } 53 | override def listParquetTables(): Array[ParquetTable] = { 54 | Utils.TryWithRetry { 55 | var result = Array[ParquetTable]() 56 | logger.debug("listing all parquet tables") 57 | val sliceQuery = HFactory.createSliceQuery(keyspace, ss, ss, ss).setColumnFamily(CF_PARQUET_TABLES).setKey(ROW_ID).setRange(null, null, false, Int.MaxValue) 58 | val queryResult = sliceQuery.execute 59 | Option(queryResult) match { 60 | case None => result 61 | case _ => { 62 | val columnSlice = queryResult.get 63 | Option(columnSlice) match { 64 | case None => result 65 | case _ => { 66 | val columns = columnSlice.getColumns 67 | Option(columns) match { 68 | case None => result 69 | case _ => { 70 | columns.size match { 71 | case 0 => result 72 | case size: Int => { 73 | for (index <- 0 until size) { 74 | val column = columns.get(index) 75 | val (namenode, filepath) = column.getValue.parseJson.fromJson[Tuple2[String, String]] 76 | result = result :+ new ParquetTable(column.getName, filepath, namenode) 77 | } 78 | result 79 | } 80 | } 81 | } 82 | } 83 | } 84 | } 85 | } 86 | } 87 | } 88 | } 89 | 90 | override def tableExists(name: String): Boolean = { 91 | Utils.TryWithRetry { 92 | logger.debug(s"Reading the parquet table ${name}") 93 | val columnQuery = HFactory.createColumnQuery(keyspace, ss, ss, ss) 94 | columnQuery.setColumnFamily(CF_PARQUET_TABLES).setKey(ROW_ID).setName(name) 95 | 96 | val queryResult = columnQuery.execute 97 | Option(queryResult) match { 98 | case None => false 99 | case _ => { 100 | val column = queryResult.get 101 | Option(column) match { 102 | case None => false 103 | case _ => true 104 | } 105 | } 106 | } 107 | } 108 | } 109 | 110 | override def readParquetTable(name: String): ParquetTable = { 111 | Utils.TryWithRetry { 112 | logger.debug(s"Reading the parquet table ${name}") 113 | val columnQuery = HFactory.createColumnQuery(keyspace, ss, ss, ss) 114 | columnQuery.setColumnFamily(CF_PARQUET_TABLES).setKey(ROW_ID).setName(name) 115 | 116 | val queryResult = columnQuery.execute 117 | Option(queryResult) match { 118 | case None => new ParquetTable 119 | case _ => { 120 | val column = queryResult.get 121 | Option(column) match { 122 | case None => new ParquetTable 123 | case _ => 124 | { 125 | val (namenode, filepath) = column.getValue.parseJson.fromJson[Tuple2[String, String]] 126 | new ParquetTable(column.getName, filepath, namenode) 127 | } 128 | 129 | } 130 | } 131 | } 132 | } 133 | } 134 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/JawsHdfsParquetTables.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables 4 | import spray.json._ 5 | import spray.json.DefaultJsonProtocol._ 6 | import com.xpatterns.jaws.data.DTO.ParquetTable 7 | import org.apache.hadoop.conf.Configuration 8 | import com.xpatterns.jaws.data.utils.Utils 9 | import org.apache.log4j.Logger 10 | import java.util.Comparator 11 | 12 | class JawsHdfsParquetTables(configuration: Configuration) extends TJawsParquetTables { 13 | 14 | val logger = Logger.getLogger("JawsHdfsParquetTables") 15 | 16 | val forcedMode = configuration.getBoolean(Utils.FORCED_MODE, false) 17 | Utils.createFolderIfDoesntExist(configuration, configuration.get(Utils.PARQUET_TABLES_FOLDER), forcedMode) 18 | 19 | override def addParquetTable(pTable: ParquetTable) { 20 | logger.debug(s"Writing parquet table ${pTable.name} with path ${pTable.filePath} ") 21 | val valueTouple = (pTable.namenode, pTable.filePath).toJson.prettyPrint 22 | Utils.rewriteFile(valueTouple, configuration, getParquetTableFilePath(pTable.name)) 23 | } 24 | 25 | override def deleteParquetTable(name: String) { 26 | logger.debug(s"Deleting parquet table called $name") 27 | var filePath = getParquetTableFilePath(name) 28 | Utils.deleteFile(configuration, filePath) 29 | } 30 | 31 | override def listParquetTables(): Array[ParquetTable] = { 32 | 33 | logger.debug("Listing parquet tables: ") 34 | var tables = Array[ParquetTable]() 35 | 36 | var files = Utils.listFiles(configuration, Utils.PARQUET_TABLES_FOLDER, new Comparator[String]() { 37 | 38 | override def compare(o1: String, o2: String): Int = { 39 | return o1.compareTo(o2) 40 | } 41 | 42 | }) 43 | 44 | val iterator = files.iterator() 45 | 46 | while (iterator.hasNext()) { 47 | val tableName = iterator.next() 48 | 49 | val (namenode, filepath) = Utils.readFile(configuration, Utils.PARQUET_TABLES_FOLDER + "/" + tableName).parseJson.fromJson[Tuple2[String, String]] 50 | tables = tables :+ new ParquetTable(tableName, filepath, namenode) 51 | } 52 | 53 | tables 54 | } 55 | 56 | override def tableExists(name: String): Boolean = { 57 | logger.debug(s"Checking table existence for $name") 58 | val filename = getParquetTableFilePath(name) 59 | 60 | Utils.checkFileExistence(filename, configuration) 61 | } 62 | 63 | override def readParquetTable(name: String): ParquetTable = { 64 | logger.debug(s"Reading table $name") 65 | val filename = getParquetTableFilePath(name) 66 | 67 | if (Utils.checkFileExistence(filename, configuration)){ 68 | val (namenode, filepath) = Utils.readFile(configuration, filename).parseJson.fromJson[Tuple2[String, String]] 69 | new ParquetTable(name, filepath, namenode) 70 | } 71 | 72 | else new ParquetTable 73 | 74 | } 75 | 76 | def getParquetTableFilePath(name: String): String = { 77 | configuration.get(Utils.PARQUET_TABLES_FOLDER) + "/" + name 78 | } 79 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/impl/JawsHdfsResults.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import com.xpatterns.jaws.data.contracts.TJawsResults 4 | import org.apache.log4j.Logger 5 | import org.apache.hadoop.conf.Configuration 6 | import net.liftweb.json._ 7 | import spray.json._ 8 | import org.apache.hadoop.fs.Path 9 | import org.apache.hadoop.io.IOUtils 10 | import org.apache.hadoop.fs.FileSystem 11 | import java.io.InputStream 12 | import org.apache.commons.io.output.ByteArrayOutputStream 13 | import com.xpatterns.jaws.data.utils.Utils 14 | import com.xpatterns.jaws.data.utils.ResultsConverter 15 | import com.xpatterns.jaws.data.DTO.AvroResult 16 | import com.xpatterns.jaws.data.DTO.CustomResult 17 | import org.apache.avro.Schema 18 | import com.google.gson.GsonBuilder 19 | 20 | class JawsHdfsResults(configuration: Configuration) extends TJawsResults { 21 | 22 | val logger = Logger.getLogger("JawsHdfsResults") 23 | val forcedMode = configuration.getBoolean(Utils.FORCED_MODE, false) 24 | Utils.createFolderIfDoesntExist(configuration, configuration.get(Utils.RESULTS_FOLDER), forcedMode) 25 | Utils.createFolderIfDoesntExist(configuration, s"${configuration.get(Utils.RESULTS_FOLDER)}/avro", forcedMode) 26 | Utils.createFolderIfDoesntExist(configuration, s"${configuration.get(Utils.RESULTS_FOLDER)}/custom", forcedMode) 27 | 28 | implicit val formats = DefaultFormats 29 | def setAvroResults(uuid: String, avroResults: AvroResult) { 30 | logger.debug("Writing avro results to query " + uuid) 31 | 32 | val (schemaFile, resultsFile) = getAvroResultsFilePaths(uuid) 33 | Utils.rewriteFile(avroResults.schema.toString(), configuration, schemaFile) 34 | val bytesR = avroResults.serializeResult() 35 | Utils.rewriteFile(bytesR, configuration, resultsFile) 36 | } 37 | 38 | def getAvroResults(uuid: String): AvroResult = { 39 | logger.debug("Reading results for query: " + uuid) 40 | val (schemaFile, resultsFile) = getAvroResultsFilePaths(uuid) 41 | if (Utils.checkFileExistence(schemaFile, configuration) && Utils.checkFileExistence(resultsFile, configuration)) { 42 | val schemaParser = new Schema.Parser() 43 | val schema = schemaParser.parse(Utils.readFile(configuration, schemaFile)) 44 | val results = Utils.readBytes(configuration, resultsFile) 45 | new AvroResult(schema, AvroResult.deserializeResult(results, schema)) 46 | } else new AvroResult() 47 | } 48 | 49 | def setCustomResults(uuid: String, results: CustomResult) { 50 | logger.debug("Writing custom results to query " + uuid) 51 | val customFile = getCustomResultsFilePaths(uuid) 52 | val gson = new GsonBuilder().create() 53 | Utils.rewriteFile(gson.toJson(results), configuration, customFile) 54 | } 55 | def getCustomResults(uuid: String): CustomResult = { 56 | logger.debug("Reading custom results for query: " + uuid) 57 | val customFile = getCustomResultsFilePaths(uuid) 58 | if (Utils.checkFileExistence(customFile, configuration)) { 59 | val gson = new GsonBuilder().create() 60 | gson.fromJson(Utils.readFile(configuration, customFile), classOf[CustomResult]) 61 | } else new CustomResult() 62 | } 63 | 64 | def deleteResults(uuid: String) { 65 | logger.debug(s"Deleting results for query $uuid") 66 | val (schemaFile, resultsFile) = getAvroResultsFilePaths(uuid) 67 | val customFile = getCustomResultsFilePaths(uuid) 68 | Utils.deleteFile(configuration, schemaFile) 69 | Utils.deleteFile(configuration, resultsFile) 70 | Utils.deleteFile(configuration, customFile) 71 | 72 | } 73 | 74 | def getResultsFilePath(queryId: String): String = { 75 | s"${configuration.get(Utils.RESULTS_FOLDER)}/$queryId" 76 | } 77 | 78 | def getAvroResultsFilePaths(queryId: String): Tuple2[String, String] = { 79 | val route = s"${configuration.get(Utils.RESULTS_FOLDER)}/avro/${queryId}_" 80 | (s"${route}schema", s"${route}results") 81 | } 82 | 83 | def getCustomResultsFilePaths(queryId: String) = { 84 | s"${configuration.get(Utils.RESULTS_FOLDER)}/custom/${queryId}" 85 | } 86 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/CustomConverter.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.utils 2 | 3 | import org.apache.spark.sql.catalyst.expressions.Row 4 | import spray.json.DefaultJsonProtocol._ 5 | import com.xpatterns.jaws.data.DTO.CustomResult 6 | import com.xpatterns.jaws.data.DTO.Column 7 | import com.google.gson.GsonBuilder 8 | import java.sql.Timestamp 9 | import collection.JavaConversions._ 10 | import org.apache.spark.sql.types._ 11 | import org.apache.spark.sql.catalyst.expressions.GenericRow 12 | 13 | object CustomConverter { 14 | 15 | def getCustomSchema(schema: StructType): Array[Column] = { 16 | schema.fields map (field => getCustomSchema(field.dataType, field.name)) toArray 17 | } 18 | 19 | private def getCustomSchema(fieldType: DataType, fieldName: String): Column = { 20 | fieldType match { 21 | case ArrayType(elementType, _) => new Column(fieldName, "ArrayType", "", Array(getCustomSchema(elementType, "items"))) 22 | case MapType(StringType, valueType, _) => new Column(fieldName, "MapType", "", Array(getCustomSchema(valueType, "values"))) 23 | case structType: StructType => new Column(fieldName, "StructType", "", structType.fields map (field => getCustomSchema(field.dataType, field.name)) toArray) 24 | case _ => new Column(fieldName, fieldType.toString(), "", Array.empty) 25 | } 26 | } 27 | 28 | def getCustomResult(result: Array[Row], schema: StructType) = { 29 | val converter = createConverter(schema) 30 | result map (row => converter(row).asInstanceOf[Array[Any]]) 31 | } 32 | 33 | private def createConverter( 34 | dataType: DataType): (Any) => Any = { 35 | dataType match { 36 | case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | StringType | 37 | BinaryType | BooleanType => 38 | (item: Any) => item 39 | 40 | case DecimalType() => 41 | (item: Any) => if (item == null) null else item.toString 42 | 43 | case TimestampType => 44 | (item: Any) => { 45 | if (item == null) null else item.asInstanceOf[Timestamp].getTime 46 | } 47 | 48 | case ArrayType(elementType, _) => 49 | val elementConverter = createConverter(elementType) 50 | (item: Any) => { 51 | if (item == null) { 52 | null 53 | } else { 54 | 55 | 56 | 57 | val sourceArray = if (item.isInstanceOf[Seq[Any]]) item.asInstanceOf[Seq[Any]] else item.asInstanceOf[GenericRow].toSeq 58 | val destination = sourceArray map { element => elementConverter(element) } 59 | destination.toArray 60 | } 61 | } 62 | 63 | case MapType(StringType, valueType, _) => 64 | val valueConverter = createConverter(valueType) 65 | 66 | (item: Any) => { 67 | if (item == null) { 68 | null 69 | } else { 70 | val smap = item.asInstanceOf[Map[String, Any]] map { 71 | case (key, value) => 72 | (key -> valueConverter(value)) 73 | } 74 | mapAsJavaMap(smap) 75 | } 76 | } 77 | 78 | case structType: StructType => 79 | val fieldConverters = structType.fields.map(field => 80 | createConverter(field.dataType)) 81 | 82 | (item: Any) => { 83 | if (item == null) { 84 | null 85 | } else { 86 | 87 | val row = item.asInstanceOf[Row].toSeq 88 | val valueConverter = row zip fieldConverters 89 | valueConverter map (value => value match { 90 | case (field, converter) => converter(field) 91 | }) toArray 92 | } 93 | } 94 | } 95 | } 96 | 97 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/GsonHelper.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.utils 2 | 3 | import java.lang.reflect.Type 4 | import com.google.gson.Gson 5 | import com.google.gson.GsonBuilder 6 | import com.google.gson.JsonDeserializationContext 7 | import com.google.gson.JsonDeserializer 8 | import com.google.gson.JsonElement 9 | import com.google.gson.JsonParseException 10 | import com.google.gson.JsonPrimitive 11 | import com.google.gson.JsonSerializationContext 12 | import com.google.gson.JsonSerializer 13 | import javax.xml.bind.DatatypeConverter 14 | import org.apache.avro.util.Utf8 15 | 16 | object GsonHelper { 17 | 18 | val customGson = new GsonBuilder().registerTypeHierarchyAdapter(classOf[Array[Byte]], 19 | new ByteArrayToBase64TypeAdapter()) 20 | .registerTypeHierarchyAdapter(classOf[Utf8], 21 | new Utf8toStrAdapter()).create(); 22 | 23 | class ByteArrayToBase64TypeAdapter extends JsonSerializer[Array[Byte]] with JsonDeserializer[Array[Byte]] { 24 | def deserialize(json: JsonElement, typeOfT: Type, context: JsonDeserializationContext) = { 25 | DatatypeConverter.parseBase64Binary(json.getAsString()) 26 | } 27 | 28 | def serialize(src: Array[Byte], typeOfSrc: Type, context: JsonSerializationContext): JsonElement = { 29 | new JsonPrimitive(DatatypeConverter.printBase64Binary(src)); 30 | } 31 | } 32 | 33 | class Utf8toStrAdapter extends JsonSerializer[Utf8] with JsonDeserializer[Utf8] { 34 | def deserialize(json: JsonElement, typeOfT: Type, context: JsonDeserializationContext) = { 35 | new Utf8(json.getAsString) 36 | } 37 | 38 | def serialize(src: Utf8, typeOfSrc: Type, context: JsonSerializationContext): JsonElement = { 39 | new JsonPrimitive(src.toString()); 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/QueryState.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.utils 2 | 3 | object QueryState extends Enumeration { 4 | type QueryState = Value 5 | val DONE, IN_PROGRESS, FAILED, NOT_FOUND = Value 6 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/Randomizer.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.utils 2 | 3 | import java.util.ArrayList 4 | import org.apache.commons.lang.RandomStringUtils 5 | import org.apache.commons.lang.math.RandomUtils 6 | import com.xpatterns.jaws.data.DTO.Column 7 | import com.xpatterns.jaws.data.DTO.Log 8 | import com.xpatterns.jaws.data.DTO.QueryMetaInfo 9 | import com.xpatterns.jaws.data.DTO.ParquetTable 10 | import com.xpatterns.jaws.data.DTO.AvroResult 11 | import org.apache.spark.sql.catalyst.expressions.Row 12 | import org.apache.spark.sql.types._ 13 | 14 | 15 | 16 | object Randomizer { 17 | 18 | def getRandomString(nr : Int) : String = { 19 | return RandomStringUtils.randomAlphabetic(nr) 20 | } 21 | 22 | def getRandomLong : Long = { 23 | return RandomUtils.nextLong() 24 | } 25 | 26 | 27 | def getParquetTable : ParquetTable ={ 28 | new ParquetTable(Randomizer.getRandomString(5), Randomizer.getRandomString(5), Randomizer.getRandomString(5)) 29 | } 30 | 31 | def getParquetTables (size : Int): Array[ParquetTable] = { 32 | val result : Array[ParquetTable] = new Array(size) 33 | for (i <- 0 until size){ 34 | result(i) = getParquetTable 35 | } 36 | result 37 | } 38 | 39 | def getResultsConverter : ResultsConverter = { 40 | 41 | val intField = new StructField("int", IntegerType, false) 42 | val strField = new StructField("str", StringType, true) 43 | val structType = new StructType(Array(intField, strField)) 44 | 45 | val structTypeRow = Array(Row.fromSeq(Seq(1, "a")), Row.fromSeq(Seq(2, "b"))) 46 | new ResultsConverter(structType, structTypeRow) 47 | 48 | } 49 | 50 | def getLogDTO: Log = { 51 | return new Log(Randomizer.getRandomString(5000), Randomizer.getRandomString(10), Randomizer.getRandomLong) 52 | } 53 | 54 | def createQueryMetainfo : QueryMetaInfo = { 55 | return new QueryMetaInfo(RandomUtils.nextLong(), RandomUtils.nextLong(), RandomUtils.nextInt(3), RandomUtils.nextBoolean()) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/main/scala/com/xpatterns/jaws/data/utils/ResultsConverter.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.utils 2 | 3 | import org.apache.spark.sql.catalyst.expressions.Row 4 | import org.apache.avro.generic.GenericDatumWriter 5 | import org.apache.avro.generic.GenericRecord 6 | import java.io.ByteArrayOutputStream 7 | import org.apache.avro.io.EncoderFactory 8 | import spray.json.DefaultJsonProtocol._ 9 | import com.xpatterns.jaws.data.DTO.AvroResult 10 | import com.xpatterns.jaws.data.DTO.CustomResult 11 | import com.xpatterns.jaws.data.DTO.Column 12 | import spray.json._ 13 | import com.google.gson.GsonBuilder 14 | import java.sql.Timestamp 15 | import collection.JavaConversions._ 16 | import com.xpatterns.jaws.data.DTO.AvroBinaryResult 17 | import org.apache.spark.sql.types.StructType 18 | 19 | class ResultsConverter(val schema: StructType, val result: Array[Row]) { 20 | 21 | def toAvroResults(): AvroResult = { 22 | val avroSchema = AvroConverter.getAvroSchema(schema) 23 | val avroResults = AvroConverter.getAvroResult(result, schema) 24 | new AvroResult(avroSchema, avroResults) 25 | } 26 | 27 | def toCustomResults(): CustomResult = { 28 | val gson = new GsonBuilder().create() 29 | val customSchema = CustomConverter.getCustomSchema(schema) 30 | 31 | new CustomResult(customSchema, CustomConverter.getCustomResult(result, schema)) 32 | } 33 | 34 | def toAvroBinaryResults(): AvroBinaryResult = { 35 | new AvroBinaryResult(toAvroResults()) 36 | } 37 | 38 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | 2 | ########## hadoop configuration - skip this if you are using cassandra logging ######## 3 | hadoopConf { 4 | namenode="hdfs://devbox.local:8020" 5 | replicationFactor=1 6 | # set on true if you want to start fresh (all the existing folders will be recreated) 7 | forcedMode=true 8 | # folder where to write the logs 9 | loggingFolder=jawsLogs 10 | # folder where to write the jobs states 11 | stateFolder=jawsStates 12 | # folder where to write the jobs details 13 | detailsFolder=jawsDetails 14 | # folder where to write the jobs results 15 | resultsFolder=jawsResultsFolder 16 | # folder where to write the jobs meta information 17 | metaInfoFolder=jawsMetainfoFolder 18 | # folder where to write the name of query information 19 | queryNameFolder=jawsQueryNameFolder 20 | # folder where to write the published queries 21 | queryPublishedFolder=jawsQueryPublishedFolder 22 | # folder where to write the unpublished queries 23 | queryUnpublishedFolder=jawsQueryUnpublishedFolder 24 | # folder where to write the parquet tables information 25 | parquetTablesFolder=parquetTablesFolder 26 | } 27 | 28 | ########## cassandra configuration - skip this if you are using hdfs logging ########## 29 | cassandraConf { 30 | cassandra.host="devbox.local:9160" 31 | cassandra.keyspace=xpatterns_jaws 32 | cassandra.cluster.name=Jaws 33 | } 34 | -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsCassandraParquetTablesTest.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables 4 | import org.scalatest.FunSuite 5 | import org.scalatest.BeforeAndAfter 6 | import com.typesafe.config.ConfigFactory 7 | import me.prettyprint.cassandra.service.CassandraHostConfigurator 8 | import me.prettyprint.cassandra.service.ThriftCluster 9 | import me.prettyprint.hector.api.factory.HFactory 10 | import me.prettyprint.cassandra.model.AllOneConsistencyLevelPolicy 11 | import com.xpatterns.jaws.data.DTO.ParquetTable 12 | import org.junit.runner.RunWith 13 | import org.scalatest.junit.JUnitRunner 14 | import com.xpatterns.jaws.data.utils.Randomizer 15 | 16 | @RunWith(classOf[JUnitRunner]) 17 | class JawsCassandraParquetTablesTest extends FunSuite with BeforeAndAfter { 18 | 19 | var pTablesDal: TJawsParquetTables = _ 20 | 21 | before { 22 | if (pTablesDal == null) { 23 | 24 | val conf = ConfigFactory.load 25 | 26 | val cassandraConf = conf.getConfig("cassandraConf").withFallback(conf) 27 | 28 | // cassandra configuration 29 | val cassandraHost = cassandraConf.getString("cassandra.host") 30 | val cassandraKeyspace = cassandraConf.getString("cassandra.keyspace") 31 | val cassandraClusterName = cassandraConf.getString("cassandra.cluster.name") 32 | 33 | val cassandraHostConfigurator = new CassandraHostConfigurator(cassandraHost) 34 | val cluster = new ThriftCluster(cassandraClusterName, cassandraHostConfigurator) 35 | val keyspace = HFactory.createKeyspace(cassandraKeyspace, cluster, new AllOneConsistencyLevelPolicy) 36 | 37 | //!!!!!!! ATTENTION !!!! truncating CF 38 | cluster.truncate(keyspace.getKeyspaceName(), "parquet_tables") 39 | 40 | pTablesDal = new JawsCassandraParquetTables(keyspace) 41 | } 42 | 43 | pTablesDal 44 | } 45 | 46 | test("testAddReadTable") { 47 | val table = Randomizer.getParquetTable 48 | 49 | pTablesDal.addParquetTable(table) 50 | val resultTable = pTablesDal.readParquetTable(table.name) 51 | assert(table === resultTable) 52 | pTablesDal.deleteParquetTable(table.name) 53 | 54 | } 55 | 56 | test("testDeleteTable") { 57 | val table = Randomizer.getParquetTable 58 | 59 | pTablesDal.addParquetTable(table) 60 | val tableBeforeDeletion = pTablesDal.readParquetTable(table.name) 61 | pTablesDal.deleteParquetTable(table.name) 62 | val tableAfterDeletion = pTablesDal.readParquetTable(table.name) 63 | 64 | assert(table === tableBeforeDeletion) 65 | assert(new ParquetTable === tableAfterDeletion) 66 | 67 | } 68 | 69 | test("testDeleteUnexistingTable") { 70 | val tName = Randomizer.getRandomString(5) 71 | pTablesDal.deleteParquetTable(tName) 72 | val tableAfterDeletion = pTablesDal.readParquetTable(tName) 73 | 74 | assert(new ParquetTable === tableAfterDeletion) 75 | 76 | } 77 | 78 | test("testTableDoesntExist") { 79 | val tName = Randomizer.getRandomString(5) 80 | assert(false === pTablesDal.tableExists(tName)) 81 | } 82 | 83 | test("testTableExists") { 84 | val table = Randomizer.getParquetTable 85 | pTablesDal.addParquetTable(table) 86 | assert(true === pTablesDal.tableExists(table.name)) 87 | pTablesDal.deleteParquetTable(table.name) 88 | } 89 | 90 | test("testGetTables Empty") { 91 | val result = pTablesDal.listParquetTables 92 | assert(false === (result == null)) 93 | assert(0 === result.size) 94 | } 95 | 96 | test("testGetTables") { 97 | val tables = Randomizer.getParquetTables(5) 98 | tables.foreach(table => pTablesDal.addParquetTable(table)) 99 | val result = pTablesDal.listParquetTables 100 | tables.foreach(table => pTablesDal.deleteParquetTable(table.name)) 101 | 102 | assert(false === (result == null)) 103 | assert(5 === result.size) 104 | tables.foreach(table => assert(true === result.contains(table))) 105 | } 106 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsHdfsParquetTablesTest.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.junit.JUnitRunner 5 | import org.scalatest.FunSuite 6 | import org.scalatest.BeforeAndAfter 7 | import com.xpatterns.jaws.data.contracts.TJawsParquetTables 8 | import com.typesafe.config.ConfigFactory 9 | import com.xpatterns.jaws.data.utils.Utils 10 | import com.xpatterns.jaws.data.utils.Randomizer 11 | import com.xpatterns.jaws.data.DTO.ParquetTable 12 | 13 | @RunWith(classOf[JUnitRunner]) 14 | class JawsHdfsParquetTablesTest extends FunSuite with BeforeAndAfter { 15 | 16 | 17 | var pTablesDal: TJawsParquetTables = _ 18 | 19 | before { 20 | if (pTablesDal == null) { val conf = ConfigFactory.load 21 | 22 | val hadoopConf = conf.getConfig("hadoopConf").withFallback(conf) 23 | 24 | //hadoop conf 25 | val replicationFactor = Option(hadoopConf.getString("replicationFactor")) 26 | val forcedMode = Option(hadoopConf.getString("forcedMode")) 27 | val loggingFolder = Option(hadoopConf.getString("loggingFolder")) 28 | val stateFolder = Option(hadoopConf.getString("stateFolder")) 29 | val detailsFolder = Option(hadoopConf.getString("detailsFolder")) 30 | val resultsFolder = Option(hadoopConf.getString("resultsFolder")) 31 | val metaInfoFolder = Option(hadoopConf.getString("metaInfoFolder")) 32 | val queryNameFolder = Option(hadoopConf.getString("queryNameFolder")) 33 | val parquetTablesFolder = Option(hadoopConf.getString("parquetTablesFolder")) 34 | val namenode = Option(hadoopConf.getString("namenode")) 35 | 36 | val configuration = new org.apache.hadoop.conf.Configuration() 37 | configuration.setBoolean(Utils.FORCED_MODE, forcedMode.getOrElse("false").toBoolean) 38 | 39 | // set hadoop name node and job tracker 40 | namenode match { 41 | case None => { 42 | throw new RuntimeException("You need to set the namenode! ") 43 | } 44 | case _ => configuration.set("fs.defaultFS", namenode.get) 45 | 46 | } 47 | 48 | configuration.set("dfs.replication", replicationFactor.getOrElse("1")) 49 | configuration.set(Utils.PARQUET_TABLES_FOLDER, parquetTablesFolder.getOrElse("parquetTablesFolder")) 50 | 51 | pTablesDal = new JawsHdfsParquetTables(configuration) 52 | } 53 | 54 | pTablesDal 55 | } 56 | 57 | test("testAddReadTable") { 58 | val table = Randomizer.getParquetTable 59 | 60 | pTablesDal.addParquetTable(table) 61 | val resultTable = pTablesDal.readParquetTable(table.name) 62 | assert(table === resultTable) 63 | pTablesDal.deleteParquetTable(table.name) 64 | 65 | } 66 | 67 | test("testDeleteTable") { 68 | val table = Randomizer.getParquetTable 69 | 70 | pTablesDal.addParquetTable(table) 71 | val tableBeforeDeletion = pTablesDal.readParquetTable(table.name) 72 | pTablesDal.deleteParquetTable(table.name) 73 | val tableAfterDeletion = pTablesDal.readParquetTable(table.name) 74 | 75 | assert(table === tableBeforeDeletion) 76 | assert(new ParquetTable === tableAfterDeletion) 77 | 78 | } 79 | 80 | test("testDeleteUnexistingTable") { 81 | val tName = Randomizer.getRandomString(5) 82 | pTablesDal.deleteParquetTable(tName) 83 | val tableAfterDeletion = pTablesDal.readParquetTable(tName) 84 | 85 | assert(new ParquetTable === tableAfterDeletion) 86 | 87 | } 88 | 89 | test("testTableDoesntExist") { 90 | val tName = Randomizer.getRandomString(5) 91 | assert(false === pTablesDal.tableExists(tName)) 92 | } 93 | 94 | test("testTableExists") { 95 | val table = Randomizer.getParquetTable 96 | pTablesDal.addParquetTable(table) 97 | assert(true === pTablesDal.tableExists(table.name)) 98 | pTablesDal.deleteParquetTable(table.name) 99 | } 100 | 101 | test("testGetTables Empty") { 102 | val result = pTablesDal.listParquetTables 103 | assert(false === (result == null)) 104 | assert(0 === result.size) 105 | } 106 | 107 | test("testGetTables") { 108 | val tables = Randomizer.getParquetTables(5) 109 | tables.foreach(table => pTablesDal.addParquetTable(table)) 110 | val result = pTablesDal.listParquetTables 111 | tables.foreach(table => pTablesDal.deleteParquetTable(table.name)) 112 | 113 | assert(false === (result == null)) 114 | assert(5 === result.size) 115 | tables.foreach(table => assert(true === result.contains(table))) 116 | } 117 | 118 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsResultsOnHdfsTest.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import org.scalatest.{ BeforeAndAfter, FunSuite } 4 | import com.typesafe.config.ConfigFactory 5 | import com.xpatterns.jaws.data.utils.{ Randomizer, Utils } 6 | import com.xpatterns.jaws.data.contracts.TJawsResults 7 | import org.junit.runner.RunWith 8 | import org.scalatest.junit.JUnitRunner 9 | import com.xpatterns.jaws.data.DTO.AvroResult 10 | import com.xpatterns.jaws.data.DTO.CustomResult 11 | 12 | /** 13 | * Created by emaorhian on 7/28/14. 14 | */ 15 | @RunWith(classOf[JUnitRunner]) 16 | class JawsResultsOnHdfsTest extends FunSuite with BeforeAndAfter { 17 | 18 | var resultsDal: TJawsResults = _ 19 | 20 | before { 21 | if (resultsDal == null) { 22 | 23 | val conf = ConfigFactory.load 24 | 25 | val hadoopConf = conf.getConfig("hadoopConf").withFallback(conf) 26 | 27 | //hadoop conf 28 | val replicationFactor = Option(hadoopConf.getString("replicationFactor")) 29 | val forcedMode = Option(hadoopConf.getString("forcedMode")) 30 | val loggingFolder = Option(hadoopConf.getString("loggingFolder")) 31 | val stateFolder = Option(hadoopConf.getString("stateFolder")) 32 | val detailsFolder = Option(hadoopConf.getString("detailsFolder")) 33 | val resultsFolder = Option(hadoopConf.getString("resultsFolder")) 34 | val metaInfoFolder = Option(hadoopConf.getString("metaInfoFolder")) 35 | val queryNameFolder = Option(hadoopConf.getString("queryNameFolder")) 36 | val queryPublishedFolder = Option(hadoopConf.getString("queryPublishedFolder")) 37 | val queryUnpublishedFolder = Option(hadoopConf.getString("queryUnpublishedFolder")) 38 | val namenode = Option(hadoopConf.getString("namenode")) 39 | 40 | val configuration = new org.apache.hadoop.conf.Configuration() 41 | configuration.setBoolean(Utils.FORCED_MODE, forcedMode.getOrElse("false").toBoolean) 42 | 43 | // set hadoop name node and job tracker 44 | namenode match { 45 | case None => { 46 | throw new RuntimeException("You need to set the namenode! ") 47 | } 48 | case _ => configuration.set("fs.defaultFS", namenode.get) 49 | 50 | } 51 | 52 | configuration.set("dfs.replication", replicationFactor.getOrElse("1")) 53 | 54 | configuration.set(Utils.LOGGING_FOLDER, loggingFolder.getOrElse("jawsLogs")) 55 | configuration.set(Utils.STATUS_FOLDER, stateFolder.getOrElse("jawsStates")) 56 | configuration.set(Utils.DETAILS_FOLDER, detailsFolder.getOrElse("jawsDetails")) 57 | configuration.set(Utils.METAINFO_FOLDER, metaInfoFolder.getOrElse("jawsMetainfoFolder")) 58 | configuration.set(Utils.QUERY_NAME_FOLDER, queryNameFolder.getOrElse("jawsQueryNameFolder")) 59 | configuration.set(Utils.QUERY_PUBLISHED_FOLDER, queryPublishedFolder.getOrElse("jawsQueryPublishedFolder")) 60 | configuration.set(Utils.QUERY_UNPUBLISHED_FOLDER, queryUnpublishedFolder.getOrElse("jawsQueryUnpublishedFolder")) 61 | configuration.set(Utils.RESULTS_FOLDER, resultsFolder.getOrElse("jawsResultsFolder")) 62 | resultsDal = new JawsHdfsResults(configuration) 63 | } 64 | 65 | resultsDal 66 | } 67 | 68 | test("testWriteReadResults") { 69 | val uuid = Randomizer.getRandomString(10) 70 | val resultsConverter = Randomizer.getResultsConverter 71 | resultsDal.setResults(uuid, resultsConverter) 72 | 73 | val avroResults = resultsDal.getAvroResults(uuid) 74 | val customResults = resultsDal.getCustomResults(uuid) 75 | 76 | assert(resultsConverter.toAvroResults() === avroResults) 77 | assert(resultsConverter.toCustomResults() === customResults) 78 | 79 | } 80 | 81 | test("testDeleteResults") { 82 | val uuid = Randomizer.getRandomString(10) 83 | val resultsConverter = Randomizer.getResultsConverter 84 | resultsDal.setResults(uuid, resultsConverter) 85 | 86 | val avroResults = resultsDal.getAvroResults(uuid) 87 | val customResults = resultsDal.getCustomResults(uuid) 88 | 89 | resultsDal.deleteResults(uuid) 90 | 91 | val avroResultsDeleted = resultsDal.getAvroResults(uuid) 92 | val customResultsDeleted = resultsDal.getCustomResults(uuid) 93 | 94 | assert(resultsConverter.toAvroResults() === avroResults) 95 | assert(resultsConverter.toCustomResults() === customResults) 96 | assert(new AvroResult() === avroResultsDeleted) 97 | assert(new CustomResult() === customResultsDeleted) 98 | 99 | } 100 | 101 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/impl/JawsResultsTest.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.impl 2 | 3 | import org.scalatest.FunSuite 4 | import com.xpatterns.jaws.data.DTO.Column 5 | import org.apache.commons.lang.RandomStringUtils 6 | import com.xpatterns.jaws.data.utils.Randomizer 7 | import com.xpatterns.jaws.data.contracts.TJawsResults 8 | import org.scalatest.BeforeAndAfter 9 | import me.prettyprint.cassandra.service.CassandraHostConfigurator 10 | import org.junit.Assert 11 | import com.typesafe.config.ConfigFactory 12 | import me.prettyprint.cassandra.service.ThriftCluster 13 | import me.prettyprint.hector.api.factory.HFactory 14 | import me.prettyprint.cassandra.model.AllOneConsistencyLevelPolicy 15 | import org.junit.runner.RunWith 16 | import org.scalatest.junit.JUnitRunner 17 | import com.xpatterns.jaws.data.DTO.AvroResult 18 | import com.xpatterns.jaws.data.DTO.CustomResult 19 | import scala.collection.mutable.ArrayBuffer 20 | import org.apache.spark.sql.catalyst.expressions.Row 21 | 22 | @RunWith(classOf[JUnitRunner]) 23 | class JawsResultsTest extends FunSuite with BeforeAndAfter { 24 | 25 | var resultsDal: TJawsResults = _ 26 | 27 | before { 28 | if (resultsDal == null) { 29 | 30 | val conf = ConfigFactory.load 31 | 32 | val cassandraConf = conf.getConfig("cassandraConf").withFallback(conf) 33 | 34 | // cassandra configuration 35 | val cassandraHost = cassandraConf.getString("cassandra.host") 36 | val cassandraKeyspace = cassandraConf.getString("cassandra.keyspace") 37 | val cassandraClusterName = cassandraConf.getString("cassandra.cluster.name") 38 | 39 | val cassandraHostConfigurator = new CassandraHostConfigurator(cassandraHost) 40 | val cluster = new ThriftCluster(cassandraClusterName, cassandraHostConfigurator) 41 | val keyspace = HFactory.createKeyspace(cassandraKeyspace, cluster, new AllOneConsistencyLevelPolicy) 42 | 43 | resultsDal = new JawsCassandraResults(keyspace) 44 | } 45 | 46 | resultsDal 47 | } 48 | 49 | test("testWriteReadResults") { 50 | val uuid = Randomizer.getRandomString(10) 51 | val resultsConverter = Randomizer.getResultsConverter 52 | resultsDal.setResults(uuid, resultsConverter) 53 | 54 | val avroResults = resultsDal.getAvroResults(uuid) 55 | val customResults = resultsDal.getCustomResults(uuid) 56 | 57 | assert(resultsConverter.toAvroResults() === avroResults) 58 | assert(resultsConverter.toCustomResults() === customResults) 59 | 60 | } 61 | 62 | test("testDeleteResults") { 63 | val uuid = Randomizer.getRandomString(10) 64 | val resultsConverter = Randomizer.getResultsConverter 65 | resultsDal.setResults(uuid, resultsConverter) 66 | 67 | val avroResults = resultsDal.getAvroResults(uuid) 68 | val customResults = resultsDal.getCustomResults(uuid) 69 | 70 | resultsDal.deleteResults(uuid) 71 | 72 | val avroResultsDeleted = resultsDal.getAvroResults(uuid) 73 | val customResultsDeleted = resultsDal.getCustomResults(uuid) 74 | 75 | assert(resultsConverter.toAvroResults() === avroResults) 76 | assert(resultsConverter.toCustomResults() === customResults) 77 | assert(new AvroResult() === avroResultsDeleted) 78 | assert(new CustomResult() === customResultsDeleted) 79 | 80 | } 81 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/utils/AvroConverterComplexTest.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.utils 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.junit.JUnitRunner 5 | import org.scalatest.FunSuite 6 | import org.apache.spark.SparkConf 7 | import org.apache.spark.SparkContext 8 | import org.apache.spark.sql.SQLContext 9 | import org.apache.spark.sql.types.StructType 10 | import org.apache.spark.sql.types.DataType 11 | import com.xpatterns.jaws.data.DTO.AvroResult 12 | 13 | case class Obj(myString: String, myInteger: Int) 14 | case class ObjString(s1: String, s2: String) 15 | case class ComplexObj(s: String, obj: Obj) 16 | 17 | case class CompleteObj( 18 | myByte: Byte, 19 | myShort: Short, 20 | myLong: Long, 21 | myFloat: Float, 22 | myDouble: Double, 23 | myBoolean: Boolean, 24 | myObj: Obj, 25 | myMap: Map[String, Int], 26 | mySequence: Seq[Int], 27 | myObjSequence: Seq[Obj], 28 | myByteArray: Array[Byte]) 29 | 30 | case class CompleteObj2( 31 | myByte: Byte, 32 | myShort: Short, 33 | myLong: Long, 34 | myFloat: Float, 35 | myDouble: Double, 36 | myObj: Obj, 37 | myMap: Map[String, Int], 38 | mySequence: Seq[Int], 39 | myObjSequence: Seq[Obj], 40 | myByteArray: Array[Byte]) 41 | case class ComplObject( 42 | myString: String, 43 | myInt: Int, 44 | myByte: Byte, 45 | myShort: Short, 46 | myLong: Long, 47 | myFloat: Float, 48 | myDouble: Double, 49 | myBoolean: Boolean, 50 | myObj: Obj, 51 | myMap1: Map[String, Obj], 52 | myMap2: Map[String, ObjString], 53 | myMap3: Map[String, ComplexObj], 54 | mySequence: Seq[Seq[Array[Seq[Array[Array[Seq[CompleteObj]]]]]]], 55 | myArray: Array[ComplexObj]) 56 | 57 | case class Obj1(array: Array[Array[Array[Obj]]]) 58 | case class Obj4(array: Seq[Seq[Seq[Obj]]]) 59 | case class Obj2(map: Map[String, Map[String, Map[String, Obj]]]) 60 | case class Obj3(map: Map[String, Map[String, Obj]]) 61 | 62 | @RunWith(classOf[JUnitRunner]) 63 | class AvroConverterComplexTest extends FunSuite { 64 | 65 | def newObj(i: Int) = new Obj("s1" + i, i) 66 | def newObjString(i: Int) = new ObjString("s1_" + i, "s2_" + i) 67 | def newComplexObj(i: Int) = new ComplexObj("s_" + i, newObj(i)) 68 | def newCompleteObj(i: Int) = new CompleteObj( 69 | Byte.MaxValue, 70 | Short.MaxValue, 71 | i, 72 | 0.3f, 73 | 0.6d, 74 | false, 75 | newObj(i), 76 | Map(("key1", i), ("key2", i + 1)), 77 | List(i, i + 1, i + 2), 78 | List(newObj(i + 1), newObj(i + 2)), 79 | Array((65 + i).toByte, (66 + i).toByte)) 80 | 81 | def newComplObj(i: Int) = new ComplObject( 82 | "newComplObj " + i, 83 | Int.MaxValue, 84 | Byte.MinValue, 85 | Short.MinValue, 86 | Long.MaxValue, 87 | Float.PositiveInfinity, 88 | Double.MinPositiveValue, 89 | i % 2 == 0, 90 | newObj(i + 100), 91 | Map(("str11", newObj(i + 10)), ("str12", newObj(i + 11)), ("str13", newObj(i + 12)), ("str14", newObj(i + 13)), ("str15", newObj(i + 14)), ("str16", newObj(i + 15)), ("str17", newObj(i + 16)), ("str18", newObj(i + 17))), 92 | Map(("str21", newObjString(i + 20)), ("str22", newObjString(i + 21)), ("str23", newObjString(i + 22)), ("str24", newObjString(i + 23)), ("str25", newObjString(i + 24)), ("str26", newObjString(i + 25))), 93 | Map(("str31", newComplexObj(i + 30)), ("str32", newComplexObj(i + 31)), ("str33", newComplexObj(i + 32)), ("str34", newComplexObj(i + 33)), ("str35", newComplexObj(i + 34))), 94 | Seq(Seq(Array(Seq(Array(Array(Seq(newCompleteObj(i), newCompleteObj(i + 1), newCompleteObj(i + 2), newCompleteObj(i + 3), newCompleteObj(i + 4), newCompleteObj(i + 5)), 95 | Seq(newCompleteObj(i + 7), newCompleteObj(i + 8), newCompleteObj(i + 9))), 96 | Array(Seq(newCompleteObj(i + 10), newCompleteObj(11)), 97 | Seq(newCompleteObj(i + 12), newCompleteObj(i + 13)))), 98 | Array(Array(Seq(newCompleteObj(i), newCompleteObj(i + 1), newCompleteObj(i + 2), newCompleteObj(i + 3), newCompleteObj(i + 4), newCompleteObj(i + 5)), 99 | Seq(newCompleteObj(i + 7), newCompleteObj(i + 8), newCompleteObj(i + 9))), 100 | Array(Seq(newCompleteObj(i + 10), newCompleteObj(11)), 101 | Seq(newCompleteObj(i + 12), newCompleteObj(i + 13)))))))), 102 | Array(newComplexObj(i), newComplexObj(i / 2), newComplexObj(i / 3))) 103 | 104 | test("complex") { 105 | val listInt = (1 to 10).toList 106 | 107 | val conf = new SparkConf().setAppName("Simple Application").setMaster("local") 108 | val sc = new SparkContext(conf) 109 | 110 | val sqlContext = new SQLContext(sc) 111 | import sqlContext.implicits._ 112 | 113 | 114 | val df = sc.parallelize(listInt.map(newComplObj(_))).toDF() 115 | 116 | val result = AvroConverter.getAvroResult(df.collect, df.schema) 117 | val schema = AvroConverter.getAvroSchema(df.schema) 118 | val avroResult = new AvroResult(schema, result) 119 | val serialized = avroResult.serializeResult 120 | val deserialized = AvroResult.deserializeResult(serialized, schema) 121 | sc.stop() 122 | } 123 | 124 | test("complex custom") { 125 | val listInt = (1 to 10).toList 126 | 127 | val conf = new SparkConf().setAppName("Simple Application").setMaster("local") 128 | val sc = new SparkContext(conf) 129 | 130 | val sqlContext = new SQLContext(sc) 131 | import sqlContext.implicits._ 132 | 133 | 134 | val df = sc.parallelize(listInt.map(newComplObj(_))).toDF() 135 | 136 | val result = CustomConverter.getCustomResult(df.collect, df.schema) 137 | val schema = CustomConverter.getCustomSchema(df.schema) 138 | sc.stop() 139 | } 140 | } -------------------------------------------------------------------------------- /jaws-spark-sql-data/src/test/scala/com/xpatterns/jaws/data/utils/AvroConverterCustomTest.scala: -------------------------------------------------------------------------------- 1 | package com.xpatterns.jaws.data.utils 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.SparkContext 5 | import org.apache.spark.sql.SQLContext 6 | import org.junit.runner.RunWith 7 | import org.scalatest.junit.JUnitRunner 8 | import org.scalatest.FunSuite 9 | import com.xpatterns.jaws.data.utils.ResultsConverter 10 | import com.xpatterns.jaws.data.DTO.AvroResult 11 | //import org.apache.spark.sql.parquet.SparkParquetUtility._ 12 | 13 | case class Positions( 14 | start: Int, 15 | end: Int) 16 | 17 | case class Terms( 18 | name: String, 19 | score: Double, 20 | positions: Seq[Positions]) 21 | 22 | case class AnnotatedTerms( 23 | name: String, 24 | category: String, 25 | score: Double, 26 | positions: Seq[Positions]) 27 | 28 | case class Categories( 29 | name: String, 30 | score: Double) 31 | 32 | case class DocMetainfo( 33 | categories: Seq[Categories], 34 | annotated_terms: Seq[AnnotatedTerms], 35 | terms: Seq[Terms]) 36 | 37 | case class NewPubmed( 38 | authors: Seq[String], 39 | body: String, 40 | category: String, 41 | documentId: String, 42 | doc_metainfo: DocMetainfo, 43 | publicationDate: String, 44 | publicationYear: Int, 45 | title: String) 46 | 47 | @RunWith(classOf[JUnitRunner]) 48 | class AvroConverterCustomTest extends FunSuite { 49 | 50 | test("result with map of strings") { 51 | val conf = new SparkConf().setAppName("Simple Application").setMaster("local[2]") 52 | val sc = new SparkContext(conf) 53 | 54 | val sqlContext = new SQLContext(sc) 55 | import sqlContext.implicits._ 56 | 57 | val pbList = List(1) 58 | val df = sc.parallelize(pbList).map(_ => new NewPubmed( 59 | List("ana", "ion"), 60 | "body", 61 | "category", 62 | "documentId", 63 | new DocMetainfo( 64 | List(new Categories("name", 1.1)), 65 | List(new AnnotatedTerms("", "category", 1.3, Seq(new Positions(1, 1)))), 66 | List(new Terms("name", 1.5, List(new Positions(1, 2))))), 67 | "publicationDate", 68 | 2015, 69 | "title")).toDF 70 | 71 | val values = df.collect 72 | val result = AvroConverter.getAvroResult(values, df.schema) 73 | val schema = AvroConverter.getAvroSchema(df.schema) 74 | val ar = new AvroResult(schema, result) 75 | val serialized = ar.serializeResult() 76 | val deserialized = AvroResult.deserializeResult(serialized, schema) 77 | 78 | sc.stop() 79 | print("done") 80 | } 81 | 82 | } -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2014 Atigeo, LLC. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | jaws-spark-sql-rest-integration-tests 7 | 1.1.0 8 | jar 9 | 10 | jaws-spark-sql-rest-integration-tests 11 | http://maven.apache.org 12 | 13 | 14 | 15 | mvnrepository 16 | http://repo1.maven.org/maven2 17 | 18 | false 19 | 20 | 21 | true 22 | 23 | 24 | 25 | cloudera-repo-releases 26 | https://repository.cloudera.com/artifactory/repo/ 27 | 28 | 29 | Akka repository 30 | http://repo.akka.io/releases 31 | 32 | 33 | 34 | 35 | UTF-8 36 | 2.10 37 | 1.2.3 38 | 2.10.3 39 | 2.0.5 40 | 1.2.3 41 | 1.2.1 42 | 43 | 44 | 45 | 46 | org.scalatest 47 | scalatest_2.10 48 | 2.2.4 49 | 50 | 51 | com.typesafe 52 | config 53 | 1.2.1 54 | 55 | 56 | junit 57 | junit 58 | 4.4 59 | 60 | 61 | io.spray 62 | spray-client 63 | 1.3.1 64 | 65 | 66 | com.typesafe.akka 67 | akka-actor_2.10 68 | 2.3.0 69 | 70 | 71 | 72 | com.xpatterns 73 | jaws-spark-sql-data 74 | 1.1.0-spark1.1.0 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | src/test/resources 83 | 84 | 85 | ${project.artifactId} 86 | 87 | 88 | org.apache.maven.plugins 89 | maven-compiler-plugin 90 | 91 | 92 | 1.6 93 | 1.6 94 | 95 | 96 | 97 | org.scala-tools 98 | maven-scala-plugin 99 | 100 | 101 | scala-compile-first 102 | process-resources 103 | 104 | add-source 105 | compile 106 | 107 | 108 | 109 | scala-test-compile 110 | process-test-resources 111 | 112 | testCompile 113 | 114 | 115 | 116 | 117 | 118 | org.apache.maven.plugins 119 | maven-shade-plugin 120 | 2.2 121 | 122 | 123 | 124 | package 125 | 126 | shade 127 | 128 | 129 | 130 | 131 | 132 | *:* 133 | 134 | META-INF/*.SF 135 | META-INF/*.DSA 136 | META-INF/*.RSA 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | org.apache.maven.plugins 147 | maven-surefire-plugin 148 | 2.7 149 | 150 | true 151 | 152 | 153 | 154 | 155 | org.scalatest 156 | scalatest-maven-plugin 157 | 1.0 158 | 159 | ${project.build.directory}/surefire-reports 160 | . 161 | WDF TestSuite.txt 162 | 163 | 164 | 165 | test 166 | 167 | test 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | ######### application configuration ################### 2 | appConf{ 3 | jawsUrl="http://devbox.local:9080/jaws/" 4 | jawsHiveUrl="http://devbox.local:7080/jaws/hive/" 5 | namenodeIp="devbox.local" 6 | hdfsInputFolder="jawsTestFolder" 7 | database="testJawsDatabase" 8 | table="testPersons" 9 | runTachyon=true 10 | parquetFolder=jawsTest.parquet 11 | parquetTable=jawsTestParquet 12 | 13 | } 14 | 15 | 16 | -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/_metadata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/jaws-spark-sql-rest/e5b2d422f135d9307c54857d558b9022610a293c/jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/_metadata -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/part-r-1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/jaws-spark-sql-rest/e5b2d422f135d9307c54857d558b9022610a293c/jaws-spark-sql-integration-tests/src/test/resources/jawsTest.parquet/part-r-1.parquet -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/resources/people.txt: -------------------------------------------------------------------------------- 1 | Ana,5,f 2 | George,10,m 3 | Alina,20,f 4 | Paul,12,m 5 | Pavel,16,m 6 | Ioana,30,f -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/scala/api/GetDatabasesApiTest.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import com.google.gson.Gson 4 | import com.xpatterns.jaws.data.DTO.{Tables, Databases} 5 | import org.junit.runner.RunWith 6 | import org.scalatest.junit.JUnitRunner 7 | import spray.client.pipelining._ 8 | import scala.concurrent.Future 9 | import scala.concurrent.Await 10 | import scala.concurrent.duration.Duration._ 11 | import scala.util.Success 12 | import scala.util.Failure 13 | import spray.http._ 14 | import spray.httpx.SprayJsonSupport._ 15 | import foundation.TestBase 16 | import scala.concurrent._ 17 | import ExecutionContext.Implicits.global 18 | 19 | @RunWith(classOf[JUnitRunner]) 20 | class GetDatabasesApiTest extends TestBase { 21 | 22 | test(" get databases ") { 23 | val url = s"${jawsUrl}hive/databases" 24 | 25 | val pipeline: HttpRequest => Future[Databases] = ( 26 | addHeader("X-My-Special-Header", "fancy-value") 27 | ~> sendReceive 28 | ~> unmarshal[Databases]) 29 | 30 | val response: Future[Databases] = pipeline(Get(url)) 31 | Await.ready(response, Inf).value.get match { 32 | case Success(r: Databases) => 33 | assert(r != null) 34 | assert(r.databases.contains("default")) 35 | 36 | case Failure(e) => 37 | println(e.getMessage) 38 | fail() 39 | } 40 | } 41 | 42 | test("tables api") { 43 | val response = get(s"${jawsUrl}hive/tables") 44 | 45 | Await.ready(response, Inf).value.get match { 46 | case Success(r: HttpResponse) => 47 | assert(r.status.isSuccess) 48 | val responseText = r.entity.data.asString 49 | val gson = new Gson() 50 | val tables = gson.fromJson(responseText, classOf[Array[Tables]]) 51 | assert(tables.nonEmpty, "There is no table") 52 | 53 | case Failure(e) => 54 | println(e.getMessage) 55 | fail() 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/scala/api/JawsIsUpTest.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.junit.JUnitRunner 5 | import foundation.TestBase 6 | import com.xpatterns.jaws.data.utils.Utils 7 | import org.apache.hadoop.fs.FileUtil 8 | import org.apache.hadoop.fs.FileSystem 9 | import java.io.File 10 | import org.apache.hadoop.fs.Path 11 | import akka.io.IO 12 | import akka.pattern.ask 13 | import spray.can.Http 14 | import spray.http._ 15 | import spray.client.pipelining._ 16 | import akka.actor.ActorSystem 17 | import scala.concurrent.Future 18 | import scala.concurrent.Await 19 | import scala.concurrent.duration._ 20 | import scala.concurrent.duration.Duration._ 21 | import scala.util.Success 22 | import scala.util.Failure 23 | 24 | @RunWith(classOf[JUnitRunner]) 25 | class JawsIsUpTest extends TestBase { 26 | 27 | test(" Jaws is up and running ") { 28 | implicit val system = ActorSystem() 29 | import system.dispatcher // execution context for futures 30 | 31 | val pipeline: HttpRequest => Future[HttpResponse] = sendReceive 32 | val response: Future[HttpResponse] = pipeline(Get(s"${jawsUrl}index")) 33 | 34 | Await.ready(response, Inf).value.get match { 35 | case Success(r : HttpResponse) => { 36 | assert(r.status.isSuccess) 37 | assert(r.entity.data.asString === "Jaws is up and running!", "Jaws is not Up!") 38 | } 39 | case Failure(e) => {println(e.getMessage) 40 | fail() 41 | } 42 | } 43 | } 44 | 45 | } -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/scala/api/ParquetManagementApiTest.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.junit.JUnitRunner 5 | import foundation.TestBase 6 | import com.xpatterns.jaws.data.utils.Utils 7 | import org.apache.hadoop.fs.FileUtil 8 | import org.apache.hadoop.fs.FileSystem 9 | import java.io.File 10 | import org.apache.hadoop.fs.Path 11 | import spray.http._ 12 | import scala.concurrent.Await 13 | import scala.concurrent.duration.Duration._ 14 | import scala.util.Success 15 | import scala.util.Failure 16 | @RunWith(classOf[JUnitRunner]) 17 | class ParquetManagementApiTest extends TestBase { 18 | 19 | override def beforeAll() { 20 | println("creating parquet folder on hdfs") 21 | Utils.createFolderIfDoesntExist(hadoopConf, parquetFolder, true) 22 | val fs = FileSystem.newInstance(hadoopConf) 23 | val metadataFile = new File(getClass().getResource("/jawsTest.parquet/_metadata").getPath()) 24 | val dataFile = new File(getClass().getResource("/jawsTest.parquet/part-r-1.parquet").getPath()) 25 | FileUtil.copy(metadataFile, fs, new Path(parquetFolder), false, hadoopConf) 26 | FileUtil.copy(dataFile, fs, new Path(parquetFolder), false, hadoopConf) 27 | } 28 | 29 | test(" register test table ") { 30 | 31 | val username = System.getProperties().get("user.name") 32 | val url = s"${jawsUrl}parquet/tables?path=/user/$username/$parquetFolder/&pathType=hdfs&name=$parquetTable&overwrite=true" 33 | 34 | val postResult = post(url, "") 35 | 36 | Await.ready(postResult, Inf).value.get match { 37 | case Success(r: HttpResponse) => { 38 | assert(r.status.isSuccess) 39 | assert(r.entity.data.asString.equals(s"Table $parquetTable was registered")) 40 | } 41 | case Failure(e) => { 42 | println(e.getMessage) 43 | fail() 44 | } 45 | 46 | } 47 | } 48 | 49 | test(" register test table overwrite false ") { 50 | 51 | val username = System.getProperties().get("user.name") 52 | val url = s"${jawsUrl}parquet/tables?path=/user/$username/$parquetFolder/&pathType=hdfs&name=$parquetTable&overwrite=false" 53 | 54 | val postResult = post(url, "") 55 | 56 | Await.ready(postResult, Inf).value.get match { 57 | case Success(r: HttpResponse) => { 58 | assert(r.status.isFailure) 59 | assert(r.entity.data.asString.equals(s"The table already exists!")) 60 | } 61 | case Failure(e) => { 62 | println(e.getMessage) 63 | fail() 64 | } 65 | 66 | } 67 | } 68 | 69 | test(" select * from parquet table ") { 70 | 71 | val url = s"${jawsUrl}run?limited=true" 72 | val body = s"select * from $parquetTable" 73 | 74 | val queryId = postRun(url, body) 75 | val queryStatus = waitforCompletion(queryId, 100) 76 | assert(queryStatus === "DONE", "Query is not DONE!") 77 | validataAllResultsFromParquetTable(queryId) 78 | 79 | } 80 | 81 | test(" unregister test table ") { 82 | 83 | val username = System.getProperties().get("user.name") 84 | val url = s"${jawsUrl}parquet/tables/$parquetTable" 85 | 86 | val deleteResult = delete(url) 87 | 88 | Await.ready(deleteResult, Inf).value.get match { 89 | case Success(r: HttpResponse) => { 90 | assert(r.status.isSuccess) 91 | assert(r.entity.data.asString.equals(s"Table $parquetTable was unregistered")) 92 | } 93 | case Failure(e) => { 94 | println(e.getMessage) 95 | fail() 96 | } 97 | 98 | } 99 | } 100 | } -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/scala/api/RunHiveApiTest.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.junit.JUnitRunner 5 | import foundation.TestBase 6 | import com.xpatterns.jaws.data.utils.Utils 7 | import org.apache.hadoop.fs.FileUtil 8 | import org.apache.hadoop.fs.FileSystem 9 | import java.io.File 10 | import org.apache.hadoop.fs.Path 11 | import akka.io.IO 12 | import akka.pattern.ask 13 | import spray.can.Http 14 | import spray.http._ 15 | import spray.client.pipelining._ 16 | import akka.actor.ActorSystem 17 | import scala.concurrent.Future 18 | import scala.concurrent.Await 19 | import scala.concurrent.duration._ 20 | import scala.concurrent.duration.Duration._ 21 | import scala.util.Success 22 | import scala.util.Failure 23 | import scala.collection.GenSeq 24 | 25 | @RunWith(classOf[JUnitRunner]) 26 | class RunHiveApiTest extends TestBase { 27 | 28 | test(" select count ") { 29 | 30 | val url = s"${jawsHiveUrl}run?limit=10" 31 | val body = s"use $database;\nselect count(*) from $table" 32 | 33 | val queryId = postRun(url, body) 34 | val queryStatus = waitforCompletion(queryId, 100) 35 | assert(queryStatus === "DONE", "Query is not DONE!") 36 | val results = getResults(queryId, 0, 200) 37 | assert(1 === results.result.length, "Different number of rows") 38 | assert(1 === results.result(0).length, "Different number of rows2") 39 | assert("6" === results.result(0)(0), "Different count") 40 | } 41 | 42 | test(" select * limited") { 43 | 44 | val url = s"${jawsHiveUrl}run?" 45 | val queryID = selectAllFromTable(url, table) 46 | validataAllResultsFromNormalTable(queryID, true) 47 | } 48 | 49 | test(" select * unlimited") { 50 | 51 | val url = s"${jawsHiveUrl}run" 52 | val queryID = selectAllFromTable(url, table) 53 | validataAllResultsFromNormalTable(queryID, true) 54 | } 55 | } -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/scala/api/TestSuite.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.junit.JUnitRunner 5 | import org.scalatest.Suites 6 | 7 | @RunWith(classOf[JUnitRunner]) 8 | class TestSuite extends Suites(new JawsIsUpTest, new RunApiTest, new GetDatabasesApiTest, new ParquetManagementApiTest, 9 | new RunHiveApiTest) { 10 | } -------------------------------------------------------------------------------- /jaws-spark-sql-integration-tests/src/test/scala/foundation/UtilOperations.scala: -------------------------------------------------------------------------------- 1 | package foundation 2 | 3 | class UtilOperations{ 4 | 5 | } 6 | object UtilOperations { 7 | 8 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2014 Atigeo, LLC. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/assembly/archive.xml: -------------------------------------------------------------------------------- 1 | 5 | archive 6 | 7 | tar.gz 8 | 9 | 10 | 11 | ${basedir}/src/main/webapp 12 | resources/webapp 13 | 14 | 15 | ${project.build.directory}/temp_build 16 | / 17 | 18 | 19 | ${basedir}/conf 20 | /conf 21 | 22 | 23 | ${project.build.directory} 24 | /target/ 25 | 26 | jaws-spark-sql-rest.jar 27 | 28 | 29 | 30 | 31 | 32 | ${basedir}/src/main/scripts/start-jaws.sh 33 | /bin/ 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | spray.can.server { 2 | # uncomment the next line for making this an HTTPS example 3 | # ssl-encryption = on 4 | idle-timeout = 301 s 5 | request-timeout = 300 s 6 | } 7 | 8 | remote{ 9 | akka { 10 | //loglevel = "DEBUG" 11 | actor { 12 | provider = "akka.remote.RemoteActorRefProvider" 13 | } 14 | remote { 15 | enabled-transports = ["akka.remote.netty.tcp"] 16 | log-sent-messages = on 17 | log-received-messages = on 18 | netty.tcp { 19 | transport-class = "akka.remote.transport.netty.NettyTransport" 20 | hostname = "devbox.local" 21 | port = 4042 22 | } 23 | } 24 | } 25 | } 26 | 27 | ############ spark configuration - see spark documentation #################### 28 | sparkConfiguration { 29 | spark-executor-memory=2g 30 | spark-mesos-coarse=false 31 | spark-scheduler-mode=FAIR 32 | spark-cores-max=2 33 | spark-master="spark://devbox.local:7077" 34 | spark-path="/home/ubuntu/latest-mssh/spark-1.1.0" 35 | spark-mesos-executor-home="/home/ubuntu/latest-mssh/spark-1.1.0" 36 | spark-default-parallelism=384 37 | spark-storage-memoryFraction=0.3 38 | spark-shuffle-memoryFraction=0.6 39 | spark-shuffle-compress=true 40 | spark-shuffle-spill-compress=true 41 | spark-reducer-maxMbInFlight=48 42 | spark-akka-frameSize=10000 43 | spark-akka-threads=4 44 | spark-akka-timeout=100 45 | spark-task-maxFailures=4 46 | spark-shuffle-consolidateFiles=true 47 | spark-deploy-spreadOut=true 48 | spark-shuffle-spill=false 49 | #Serialization settings commented until more tests are performed 50 | #spark-serializer="org.apache.spark.serializer.KryoSerializer" 51 | #spark-kryoserializer-buffer-mb=10 52 | #spark-kryoserializer-buffer-max-mb=64 53 | spark-kryo-referenceTracking=false 54 | 55 | 56 | } 57 | 58 | ######### application configuration ################### 59 | appConf{ 60 | # the interface on which to start the spray server : localhost/ip/hostname 61 | server.interface=localhost 62 | # the cors filter allowed hosts 63 | cors-filter-allowed-hosts="*" 64 | # the default number of results retrieved on queries 65 | nr.of.results=100 66 | # the ip of the destination namenode - it is used when querying with unlimited number of results. 67 | rdd.destination.ip="devbox.local" 68 | # where to store the results in the case of an unlimited query. Possible results : hdfs/tachyon. Default hdfs 69 | rdd.destination.location="hdfs" 70 | # the remote doamain actor address 71 | remote.domain.actor="" 72 | #remote.domain.actor="devbox.local:port,devbox2.local:port" 73 | # application name 74 | application.name="Jaws" 75 | # the port on which to deploy the apis 76 | web.services.port=9080 77 | # the port on which to deploy the web sockets api (logs) 78 | web.sockets.port=8182 79 | # the number of threads used to execute shark commands 80 | nr.of.threads=10 81 | # implicit akka timeout 82 | timeout=1000000 83 | #where to log: app.logging.type = cassandra/hdfs 84 | app.logging.type=cassandra 85 | # folder where to write the results schema 86 | schemaFolder=jawsSchemaFolder 87 | # the path to the xpatterns-jaws in target folder 88 | jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/jaws-spark-sql-rest.jar 89 | # the path to the hdfs namenode 90 | hdfs-namenode-path="hdfs://devbox.local:8020" 91 | # the path to the tachyon namenode 92 | tachyon-namenode-path="tachyon://devbox.local:19998" 93 | #jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/test-app.jar 94 | } 95 | 96 | ########## hadoop configuration - skip this if you are using cassandra logging ######## 97 | hadoopConf { 98 | namenode="hdfs://devbox.local:8020" 99 | replicationFactor=1 100 | # set on true if you want to start fresh (all the existing folders will be recreated) 101 | forcedMode=false 102 | # folder where to write the logs 103 | loggingFolder=jawsLogs 104 | # folder where to write the jobs states 105 | stateFolder=jawsStates 106 | # folder where to write the jobs details 107 | detailsFolder=jawsDetails 108 | # folder where to write the jobs results 109 | resultsFolder=jawsResultsFolder 110 | # folder where to write the jobs meta information 111 | metaInfoFolder=jawsMetainfoFolder 112 | # folder where to write the name of query information 113 | queryNameFolder=jawsQueryNameFolder 114 | # folder where to write the published queries 115 | queryPublishedFolder=jawsQueryPublishedFolder 116 | # folder where to write the unpublished queries 117 | queryUnpublishedFolder=jawsQueryUnpublishedFolder 118 | # folder where to write the parquet tables information 119 | parquetTablesFolder=parquetTablesFolder 120 | } 121 | 122 | ########## cassandra configuration - skip this if you are using hdfs logging ########## 123 | cassandraConf { 124 | cassandra.host="devbox.local:9160" 125 | cassandra.keyspace=xpatterns_jaws 126 | cassandra.cluster.name=Jaws 127 | } 128 | 129 | 130 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/resources/cassandra-schema.txt: -------------------------------------------------------------------------------- 1 | create keyspace xpatterns_jaws 2 | with strategy_options={replication_factor:1} 3 | and placement_strategy = 'NetworkTopologyStrategy' 4 | and strategy_options = {DC1:2,DC2:2}; 5 | 6 | use xpatterns_jaws; 7 | 8 | create column family logs 9 | with comparator = 'CompositeType(Int32Type,UTF8Type,LongType)' 10 | AND key_validation_class = 'Int32Type' 11 | AND default_validation_class = 'BytesType'; 12 | 13 | 14 | create column family results 15 | with comparator = 'CompositeType(UTF8Type,UTF8Type,Int32Type)' 16 | AND key_validation_class = 'Int32Type' 17 | AND default_validation_class = 'BytesType'; 18 | 19 | 20 | create column family parquet_tables 21 | with comparator = 'UTF8Type' 22 | AND key_validation_class = 'UTF8Type' 23 | AND default_validation_class = 'BytesType'; 24 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | fs.defaultFS 7 | hdfs://devbox.local:8020 8 | 9 | 10 | fs.trash.interval 11 | 1 12 | 13 | 14 | io.compression.codecs 15 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec 16 | 17 | 18 | hadoop.security.authentication 19 | simple 20 | 21 | 22 | hadoop.security.authorization 23 | false 24 | 25 | 26 | hadoop.rpc.protection 27 | authentication 28 | 29 | 30 | hadoop.security.auth_to_local 31 | DEFAULT 32 | 33 | 34 | hadoop.proxyuser.oozie.hosts 35 | * 36 | 37 | 38 | hadoop.proxyuser.oozie.groups 39 | * 40 | 41 | 42 | hadoop.proxyuser.mapred.hosts 43 | * 44 | 45 | 46 | hadoop.proxyuser.mapred.groups 47 | * 48 | 49 | 50 | hadoop.proxyuser.flume.hosts 51 | * 52 | 53 | 54 | hadoop.proxyuser.flume.groups 55 | * 56 | 57 | 58 | hadoop.proxyuser.HTTP.hosts 59 | * 60 | 61 | 62 | hadoop.proxyuser.HTTP.groups 63 | * 64 | 65 | 66 | hadoop.proxyuser.hive.hosts 67 | * 68 | 69 | 70 | hadoop.proxyuser.hive.groups 71 | * 72 | 73 | 74 | hadoop.proxyuser.hue.hosts 75 | * 76 | 77 | 78 | hadoop.proxyuser.hue.groups 79 | * 80 | 81 | 82 | hadoop.proxyuser.httpfs.hosts 83 | * 84 | 85 | 86 | hadoop.proxyuser.httpfs.groups 87 | * 88 | 89 | 90 | hadoop.proxyuser.hdfs.groups 91 | * 92 | 93 | 94 | hadoop.proxyuser.hdfs.hosts 95 | * 96 | 97 | 98 | hadoop.security.group.mapping 99 | org.apache.hadoop.security.ShellBasedUnixGroupsMapping 100 | 101 | 102 | hadoop.security.instrumentation.requires.admin 103 | false 104 | 105 | 106 | io.file.buffer.size 107 | 65536 108 | 109 | 110 | hadoop.ssl.enabled 111 | false 112 | 113 | 114 | hadoop.ssl.require.client.cert 115 | false 116 | true 117 | 118 | 119 | hadoop.ssl.keystores.factory.class 120 | org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory 121 | true 122 | 123 | 124 | hadoop.ssl.server.conf 125 | ssl-server.xml 126 | true 127 | 128 | 129 | hadoop.ssl.client.conf 130 | ssl-client.xml 131 | true 132 | 133 | 134 | fs.tachyon.impl 135 | tachyon.hadoop.TFS 136 | 137 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/resources/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | hive.metastore.local 7 | false 8 | 9 | 10 | hive.metastore.uris 11 | thrift://flaviusi-VirtualBox.local:9083 12 | 13 | 14 | hive.metastore.client.socket.timeout 15 | 300 16 | 17 | 18 | hive.metastore.warehouse.dir 19 | /user/hive/warehouse 20 | 21 | 22 | hive.warehouse.subdir.inherit.perms 23 | true 24 | 25 | 26 | mapred.reduce.tasks 27 | -1 28 | 29 | 30 | hive.exec.reducers.bytes.per.reducer 31 | 1073741824 32 | 33 | 34 | hive.exec.reducers.max 35 | 999 36 | 37 | 38 | hive.metastore.execute.setugi 39 | true 40 | 41 | 42 | hive.support.concurrency 43 | false 44 | 45 | 46 | hive.zookeeper.quorum 47 | flaviusi-VirtualBox.local 48 | 49 | 50 | hive.zookeeper.client.port 51 | 2181 52 | 53 | 54 | hbase.zookeeper.quorum 55 | flaviusi-VirtualBox.local 56 | 57 | 58 | hbase.zookeeper.property.clientPort 59 | 2181 60 | 61 | 62 | hive.zookeeper.namespace 63 | hive_zookeeper_namespace_hive 64 | 65 | 66 | hive.server2.enable.doAs 67 | true 68 | 69 | 70 | fs.hdfs.impl.disable.cache 71 | true 72 | 73 | 74 | hive.server2.use.SSL 75 | false 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/resources/jaws-env.sh: -------------------------------------------------------------------------------- 1 | export TACHYON_WAREHOUSE_PATH=/sharktables 2 | export TACHYON_MASTER=tachyon://devbox.local:19998 3 | export MESOS_NATIVE_LIBRARY=/home/user/mesos-0.19.0/lib/libmesos.so 4 | export LOGGING_OPTS="-Dlog4j.configuration=log4j.properties -DJAWS_LOG_FOLDER=$logsFolder" 5 | export JAVA_OPTS="$LOGGING_OPTS -XX:PermSize=1g -XX:MaxPermSize=1g -Djava.library.path=/home/user/mesos-0.19.0/lib/libmesos.so:/home/user/hadoopNativeLibs" 6 | 7 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # 3 | # The following properties set the logging levels and log appender. The 4 | # log4j.rootCategory variable defines the default log level and one or more 5 | # appenders. For the console, use 'S'. For the daily rolling file, use 'R'. 6 | # For an HTML formatted log, use 'H'. 7 | # 8 | # To override the default (rootCategory) log level, define a property of the 9 | # form (see below for available values): 10 | # 11 | # log4j.logger. = 12 | # 13 | # Available logger names: 14 | # TODO 15 | # 16 | # Possible Log Levels: 17 | # FATAL, ERROR, WARN, INFO, DEBUG 18 | # 19 | #------------------------------------------------------------------------------ 20 | 21 | log4j.rootCategory = INFO, defaultConsole, defaultFile 22 | 23 | #------------------------------------------------------------------------------ 24 | # 25 | # The following properties configure the console (stdout) appender. 26 | # See http://logging.apache.org/log4j/docs/api/index.html for details. 27 | # 28 | #------------------------------------------------------------------------------ 29 | log4j.appender.defaultConsole = org.apache.log4j.ConsoleAppender 30 | log4j.appender.defaultConsole.layout = org.apache.log4j.PatternLayout 31 | log4j.appender.defaultConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 32 | 33 | #------------------------------------------------------------------------------ 34 | # 35 | # The following properties configure the Daily Rolling File appender. 36 | # See http://logging.apache.org/log4j/docs/api/index.html for details. 37 | # 38 | #------------------------------------------------------------------------------ 39 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender 40 | log4j.appender.defaultFile.File = ${JAWS_LOG_FOLDER}/jaws-spark-sql-rest.log 41 | log4j.appender.defaultFile.Append = true 42 | log4j.appender.defaultFile.DatePattern = '.'yyy-MM-dd 43 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout 44 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 45 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/resources/sharkSettings.txt: -------------------------------------------------------------------------------- 1 | set spark.sql.shuffle.partitions=12 2 | set hive.column.compress=true -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/ActorUtils.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import akka.actor.ActorRef 4 | import messages.ErrorMessage 5 | 6 | import scala.util.{Failure, Success, Try} 7 | 8 | /** 9 | * Created by emaorhian 10 | */ 11 | 12 | object ActorsPaths { 13 | 14 | def REMOTE_ACTOR_SYSTEM_PREFIX_PATH = "akka.tcp://remoteSystem@" 15 | 16 | def LOCAL_SUPERVISOR_ACTOR_NAME = "LocalSupervisor" 17 | def LOCAL_SUPERVISOR_ACTOR_PATH = s"/user/$LOCAL_SUPERVISOR_ACTOR_NAME" 18 | 19 | def REMOTE_SUPERVISOR_ACTOR_NAME = "RemoteSupervisor" 20 | def REMOTE_SUPERVISOR_ACTOR_PATH = s"/user/$REMOTE_SUPERVISOR_ACTOR_NAME" 21 | 22 | def GET_QUERIES_ACTOR_NAME = "GetQueries" 23 | def GET_QUERIES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_QUERIES_ACTOR_NAME" 24 | 25 | def GET_TABLES_ACTOR_NAME = "GetTables" 26 | def GET_TABLES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_TABLES_ACTOR_NAME" 27 | 28 | def RUN_SCRIPT_ACTOR_NAME = "RunScript" 29 | def RUN_SCRIPT_ACTOR_PATH = s"$REMOTE_SUPERVISOR_ACTOR_PATH/$RUN_SCRIPT_ACTOR_NAME" 30 | 31 | def GET_LOGS_ACTOR_NAME = "GetLogs" 32 | def GET_LOGS_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_LOGS_ACTOR_NAME" 33 | 34 | def LOGS_WEBSOCKETS_ACTOR_NAME = "LogsWebsockets" 35 | def LOGS_WEBSOCKETS_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$LOGS_WEBSOCKETS_ACTOR_NAME" 36 | 37 | def GET_RESULTS_ACTOR_NAME = "GetResults" 38 | def GET_RESULTS_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_RESULTS_ACTOR_NAME" 39 | 40 | def GET_DATABASES_ACTOR_NAME = "GetDatabases" 41 | def GET_DATABASES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_DATABASES_ACTOR_NAME" 42 | 43 | def GET_DATASOURCE_SCHEMA_ACTOR_NAME = "GetDatasourceSchemaActor" 44 | def GET_DATASOURCE_SCHEMA_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_DATASOURCE_SCHEMA_ACTOR_NAME" 45 | 46 | def BALANCER_ACTOR_NAME = "Balancer" 47 | def BALANCER_ACTOR_PATH = s"$REMOTE_SUPERVISOR_ACTOR_NAME/$BALANCER_ACTOR_NAME" 48 | 49 | def DELETE_QUERY_ACTOR_NAME = "DeleteQuery" 50 | def DELETE_QUERY_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$DELETE_QUERY_ACTOR_NAME" 51 | 52 | def QUERY_NAME_ACTOR_NAME = "QueryName" 53 | def QUERY_NAME_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$QUERY_NAME_ACTOR_NAME" 54 | 55 | def REGISTER_PARQUET_TABLE_ACTOR_NAME = "RegisterParquetTable" 56 | def REGISTER_PARQUET_TABLE_ACTOR_PATH = s"$REMOTE_SUPERVISOR_ACTOR_PATH/$REGISTER_PARQUET_TABLE_ACTOR_NAME" 57 | 58 | def GET_PARQUET_TABLES_ACTOR_NAME = "GetParquetTables" 59 | def GET_PARQUET_TABLES_ACTOR_PATH = s"$LOCAL_SUPERVISOR_ACTOR_PATH/$GET_PARQUET_TABLES_ACTOR_NAME" 60 | } 61 | 62 | object ActorOperations { 63 | def returnResult (tryResult : Try[Any], results : Any, errorMessage : String, senderActor: ActorRef){ 64 | tryResult match { 65 | case Success(v) => senderActor ! results 66 | case Failure(e) => senderActor ! ErrorMessage(s"$errorMessage ${e.getMessage}") 67 | } 68 | } 69 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/BalancerActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import akka.actor.ActorRef 4 | import akka.actor.Actor 5 | import akka.actor.ActorSelection 6 | import server.Configuration 7 | import messages.CancelMessage 8 | import server.JawsController 9 | import apiactors.ActorsPaths._ 10 | import messages.RegisterTableMessage 11 | import akka.pattern._ 12 | import akka.util.Timeout 13 | import messages.UnregisterTableMessage 14 | 15 | class BalancerActor extends Actor { 16 | var runActors: Array[ActorSelection] = null 17 | var registerParquetTableActors: Array[ActorSelection] = null 18 | implicit val timeout = Timeout(Configuration.timeout.toInt) 19 | 20 | if (!Configuration.remoteDomainActor.getOrElse("").isEmpty) { 21 | Configuration.log4j.info(s"There are remote actors at: ${Configuration.remoteDomainActor}") 22 | runActors = for (actorIp <- Configuration.remoteDomainActor.get.split(",")) yield context.actorSelection(s"$REMOTE_ACTOR_SYSTEM_PREFIX_PATH$actorIp$RUN_SCRIPT_ACTOR_PATH") 23 | registerParquetTableActors = for (actorIp <- Configuration.remoteDomainActor.get.split(",")) yield context.actorSelection(s"$REMOTE_ACTOR_SYSTEM_PREFIX_PATH$actorIp$REGISTER_PARQUET_TABLE_ACTOR_PATH") 24 | } 25 | 26 | def receive = { 27 | case message: CancelMessage => 28 | JawsController.runScriptActor ! message 29 | Option(runActors) match { 30 | case None => Configuration.log4j.info("[BalancerActor] There aren't any remote run actors to send the cancel message to!") 31 | case _ => runActors.foreach { dom => dom ! message } 32 | } 33 | 34 | case message @ (_: RegisterTableMessage | _: UnregisterTableMessage) => { 35 | Option(registerParquetTableActors) match { 36 | case None => Configuration.log4j.info("[BalancerActor] There aren't any remote register parquet actors to send the register table message to!") 37 | case _ => registerParquetTableActors.foreach { dom => 38 | { 39 | Configuration.log4j.info(s"Sending message to the registering actor at ${dom}") 40 | dom ! message 41 | } 42 | } 43 | } 44 | 45 | sender ! JawsController.registerParquetTableActor ? message 46 | } 47 | 48 | } 49 | 50 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/DeleteQueryApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import com.xpatterns.jaws.data.contracts.DAL 4 | import akka.actor.Actor 5 | import messages.DeleteQueryMessage 6 | import server.Configuration 7 | import com.xpatterns.jaws.data.utils.QueryState 8 | import scala.concurrent._ 9 | import ExecutionContext.Implicits.global 10 | import scala.util.{ Success, Failure } 11 | import messages.ErrorMessage 12 | 13 | class DeleteQueryApiActor(dals: DAL) extends Actor { 14 | override def receive = { 15 | 16 | case message: DeleteQueryMessage => { 17 | 18 | Configuration.log4j.info(s"[DeleteQueryApiActor]: deleting query with id ${message.queryID}") 19 | 20 | val currentSender = sender 21 | 22 | val deleteQueryFuture = future { 23 | dals.loggingDal.getState(message.queryID) match { 24 | case QueryState.IN_PROGRESS => throw new Exception(s"The query ${message.queryID} is IN_PROGRESS. Please wait for its completion or cancel it") 25 | case QueryState.NOT_FOUND => throw new Exception(s"The query ${message.queryID} was not found. Please provide a valid query id") 26 | case _ => { 27 | dals.loggingDal.deleteQuery(message.queryID) 28 | dals.resultsDal.deleteResults(message.queryID) 29 | s"Query ${message.queryID} was deleted" 30 | } 31 | } 32 | } 33 | 34 | deleteQueryFuture onComplete { 35 | case Success(successfulMessage) => currentSender ! successfulMessage 36 | case Failure(e) => currentSender ! ErrorMessage(s"DELETE query failed with the following message: ${e.getMessage}") 37 | } 38 | } 39 | } 40 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/GetDatabasesApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import akka.actor.Actor 4 | import akka.actor.actorRef2Scala 5 | import apiactors.ActorOperations._ 6 | import com.google.common.base.Preconditions 7 | import server.LogsActor 8 | import akka.actor.ActorLogging 9 | import com.xpatterns.jaws.data.contracts.DAL 10 | import messages.GetDatabasesMessage 11 | import java.util.UUID 12 | import server.Configuration 13 | import org.apache.spark.sql.hive.HiveContext 14 | import org.apache.spark.scheduler.HiveUtils 15 | import implementation.HiveContextWrapper 16 | import scala.concurrent._ 17 | import ExecutionContext.Implicits.global 18 | import scala.util.{ Success, Failure } 19 | import messages.ErrorMessage 20 | import scala.util.Try 21 | import com.xpatterns.jaws.data.DTO.Column 22 | import com.xpatterns.jaws.data.DTO.Databases 23 | 24 | /** 25 | * Created by emaorhian 26 | */ 27 | class GetDatabasesApiActor(hiveContext: HiveContextWrapper, dals: DAL) extends Actor { 28 | 29 | override def receive = { 30 | 31 | case message: GetDatabasesMessage => { 32 | Configuration.log4j.info("[GetDatabasesApiActor]: showing databases") 33 | val currentSender = sender 34 | 35 | val getDatabasesFuture = future { 36 | val uuid = System.currentTimeMillis() + UUID.randomUUID().toString() 37 | val metadataQueryResult = HiveUtils.runMetadataCmd(hiveContext, "show databases").flatten 38 | new Databases(metadataQueryResult) 39 | 40 | } 41 | 42 | getDatabasesFuture onComplete { 43 | case Success(result) => currentSender ! result 44 | case Failure(e) => currentSender ! ErrorMessage(s"GET databases failed with the following message: ${e.getMessage}") 45 | } 46 | } 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/GetDatasourceSchemaActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import akka.actor.Actor 4 | import implementation.SchemaSettingsFactory.{ Hdfs, Hive, Parquet, Tachyon } 5 | import implementation.HiveContextWrapper 6 | import messages.GetDatasourceSchemaMessage 7 | import org.apache.spark.scheduler.HiveUtils 8 | import org.apache.spark.sql.catalyst.analysis.NoSuchTableException 9 | import org.apache.spark.sql.parquet.SparkParquetUtility._ 10 | import server.Configuration 11 | import scala.concurrent._ 12 | import ExecutionContext.Implicits.global 13 | import scala.util.{ Success, Failure } 14 | import messages.ErrorMessage 15 | import com.xpatterns.jaws.data.utils.{Utils, AvroConverter} 16 | import org.apache.spark.sql.types.StructType 17 | import com.xpatterns.jaws.data.utils.Utils._ 18 | /** 19 | * Handles the operations used for getting the schema 20 | */ 21 | class GetDatasourceSchemaActor(hiveContext: HiveContextWrapper) extends Actor { 22 | 23 | def receive = { 24 | case request: GetDatasourceSchemaMessage => 25 | val hostname: String = Configuration.rddDestinationIp.get 26 | val path: String = s"${request.path}" 27 | Configuration.log4j.info(s"Getting the data source schema for path $path, sourceType ${request.sourceType}, storageType ${request.storageType}") 28 | val currentSender = sender() 29 | 30 | val getDatasourceSchemaFuture = future { 31 | var result: StructType = null 32 | request.sourceType match { 33 | case Hive() => 34 | 35 | try { 36 | val table = hiveContext.table(path) 37 | result = table.schema 38 | } catch { 39 | // When the table doesn't exists, throw a new exception with a better message. 40 | case _:NoSuchTableException => throw new Exception("Table does not exist") 41 | } 42 | case Parquet() => 43 | request.storageType match { 44 | case Hdfs() => 45 | val hdfsURL = HiveUtils.getHdfsPath(hostname) 46 | 47 | // Make sure that file exists 48 | checkFileExistence(request.hdfsConf, hdfsURL, path) 49 | 50 | result = hiveContext.readXPatternsParquet(hdfsURL, path).schema 51 | case Tachyon() => 52 | val tachyonURL = HiveUtils.getTachyonPath(hostname) 53 | 54 | // Make sure that file exists 55 | checkFileExistence(request.hdfsConf, tachyonURL, path) 56 | 57 | result = hiveContext.readXPatternsParquet(tachyonURL, path).schema 58 | } 59 | } 60 | 61 | Configuration.log4j.info("Reading the avro schema from result df") 62 | 63 | val avroSchema = AvroConverter.getAvroSchema(result).toString(true) 64 | Configuration.log4j.debug(avroSchema) 65 | avroSchema 66 | } 67 | 68 | getDatasourceSchemaFuture onComplete { 69 | case Success(result) => currentSender ! result 70 | case Failure(e) => currentSender ! ErrorMessage(s"GET data source schema failed with the following message: ${getCompleteStackTrace(e)}") 71 | } 72 | 73 | case request: Any => Configuration.log4j.error(request.toString) 74 | } 75 | 76 | /** 77 | * Checks the file existence on the sent file system. If the file is not found an exception is thrown 78 | * @param hdfsConfiguration the hdfs configuration 79 | * @param defaultFSUrl the file system default path. It is different for hdfs and for tachyon. 80 | * @param filePath the path for the file for which the existence is checked 81 | */ 82 | private def checkFileExistence(hdfsConfiguration: org.apache.hadoop.conf.Configuration, defaultFSUrl:String, filePath:String) = { 83 | val newConf = new org.apache.hadoop.conf.Configuration(hdfsConfiguration) 84 | newConf.set("fs.defaultFS", defaultFSUrl) 85 | if (!Utils.checkFileExistence(defaultFSUrl + filePath, newConf)) { 86 | throw new Exception("File path does not exist") 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/GetLogsApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import akka.actor.Actor 4 | import akka.actor.actorRef2Scala 5 | import apiactors.ActorOperations._ 6 | import com.google.common.base.Preconditions 7 | import server.LogsActor 8 | import akka.actor.ActorLogging 9 | import com.xpatterns.jaws.data.contracts.DAL 10 | import messages.GetLogsMessage 11 | import org.joda.time.DateTime 12 | import java.util.Collection 13 | import server.Configuration 14 | import com.xpatterns.jaws.data.DTO.Logs 15 | import com.xpatterns.jaws.data.DTO.Log 16 | import scala.concurrent._ 17 | import ExecutionContext.Implicits.global 18 | import scala.util.{ Success, Failure } 19 | import messages.ErrorMessage 20 | 21 | /** 22 | * Created by emaorhian 23 | */ 24 | class GetLogsApiActor(dals: DAL) extends Actor { 25 | 26 | override def receive = { 27 | 28 | case message: GetLogsMessage => { 29 | Configuration.log4j.info("[GetLogsApiActor]: retrieving logs for: " + message.queryID) 30 | val currentSender = sender 31 | 32 | val getLogsFuture = future { 33 | val limit = Option(message.limit) getOrElse(100) 34 | val startDate = Option(message.startDate) getOrElse(new DateTime(1977, 1, 1, 1, 1, 1, 1).getMillis()) 35 | 36 | dals.loggingDal.getLogs(message.queryID, startDate, limit) 37 | } 38 | getLogsFuture onComplete { 39 | case Success(result) => currentSender ! result 40 | case Failure(e) => currentSender ! ErrorMessage(s"GET logs failed with the following message: ${e.getMessage}") 41 | } 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/GetParquetTablesApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import messages._ 4 | import scala.concurrent._ 5 | import ExecutionContext.Implicits.global 6 | import scala.util.{ Success, Failure } 7 | import messages.ErrorMessage 8 | import spray.http.StatusCodes 9 | import scala.concurrent.Await 10 | import com.xpatterns.jaws.data.contracts.DAL 11 | import java.util.UUID 12 | import akka.util.Timeout 13 | import server.Configuration 14 | import akka.pattern.ask 15 | import org.apache.spark.scheduler.HiveUtils 16 | import implementation.HiveContextWrapper 17 | import akka.actor.Actor 18 | import com.xpatterns.jaws.data.DTO.Tables 19 | import scala.util.{ Try, Success, Failure } 20 | import apiactors.ActorOperations._ 21 | import com.xpatterns.jaws.data.DTO.Column 22 | import com.xpatterns.jaws.data.DTO.Table 23 | import com.xpatterns.jaws.data.utils.CustomConverter 24 | /** 25 | * Created by emaorhian 26 | */ 27 | 28 | class GetParquetTablesApiActor(hiveContext: HiveContextWrapper, dals: DAL) extends Actor { 29 | 30 | override def receive = { 31 | 32 | case message: GetParquetTablesMessage => { 33 | val currentSender = sender 34 | 35 | val getTablesFuture = future { 36 | if (message.tables.isEmpty) { 37 | val tables = dals.parquetTableDal.listParquetTables 38 | message.describe match { 39 | case true => Array(Tables("None", tables map (pTable => getFields(pTable.name)))) 40 | case false => Array(Tables("None", tables map (pTable => Table(pTable.name, Array.empty, Array.empty)))) 41 | } 42 | 43 | } else { 44 | var tablesMap = message.tables.map(table => { 45 | if (dals.parquetTableDal.tableExists(table) == false) 46 | throw new Exception(s" Table $table does not exist") 47 | getFields(table) 48 | }) 49 | Array(Tables("None", tablesMap)) 50 | } 51 | } 52 | 53 | getTablesFuture onComplete { 54 | case Success(result) => currentSender ! result 55 | case Failure(e) => currentSender ! ErrorMessage(s"GET tables failed with the following message: ${e.getMessage}") 56 | } 57 | } 58 | } 59 | 60 | def getFields(tableName: String): Table = { 61 | val tableSchemaRDD = hiveContext.table(tableName) 62 | val schema = CustomConverter.getCustomSchema(tableSchemaRDD.schema) 63 | 64 | Table(tableName, schema, Array.empty) 65 | } 66 | 67 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/GetQueriesApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import akka.actor.Actor 4 | import com.xpatterns.jaws.data.contracts.DAL 5 | import server.Configuration 6 | import scala.concurrent._ 7 | import ExecutionContext.Implicits.global 8 | import scala.util.{ Success, Failure } 9 | import messages._ 10 | /** 11 | * Created by emaorhian 12 | */ 13 | class GetQueriesApiActor(dals: DAL) extends Actor { 14 | 15 | override def receive = { 16 | 17 | case message: GetPaginatedQueriesMessage => 18 | 19 | Configuration.log4j.info("[GetQueriesApiActor]: retrieving " + message.limit + " number of queries starting with " + message.startQueryID) 20 | val currentSender = sender() 21 | val getQueriesFuture = future { 22 | dals.loggingDal.getQueries(message.startQueryID, message.limit) 23 | } 24 | 25 | getQueriesFuture onComplete { 26 | case Success(result) => currentSender ! result 27 | case Failure(e) => currentSender ! ErrorMessage(s"GET queries failed with the following message: ${e.getMessage}") 28 | } 29 | 30 | case message: GetQueriesMessage => 31 | Configuration.log4j.info("[GetQueryInfoApiActor]: retrieving the query information for " + message.queryIDs) 32 | 33 | val currentSender = sender() 34 | 35 | val getQueryInfoFuture = future { 36 | dals.loggingDal.getQueries(message.queryIDs) 37 | } 38 | 39 | getQueryInfoFuture onComplete { 40 | case Success(result) => currentSender ! result 41 | case Failure(e) => currentSender ! ErrorMessage(s"GET query info failed with the following message: ${e.getMessage}") 42 | } 43 | 44 | case message: GetQueriesByName => 45 | Configuration.log4j.info("[GetQueryInfoApiActor]: retrieving the queries for " + message.name) 46 | 47 | val currentSender = sender() 48 | 49 | val getQueryInfoFuture = future { 50 | dals.loggingDal.getQueriesByName(message.name) 51 | } 52 | 53 | getQueryInfoFuture onComplete { 54 | case Success(result) => currentSender ! result 55 | case Failure(e) => currentSender ! ErrorMessage(s"GET query info failed with the following message: ${e.getMessage}") 56 | } 57 | 58 | case _: GetPublishedQueries => 59 | Configuration.log4j.info("[GetQueryInfoApiActor]: retrieving the published queries ") 60 | 61 | val currentSender = sender() 62 | 63 | val getQueryInfoFuture = future { 64 | dals.loggingDal.getPublishedQueries() 65 | } 66 | 67 | getQueryInfoFuture onComplete { 68 | case Success(result) => currentSender ! result 69 | case Failure(e) => currentSender ! ErrorMessage(s"GET published queries failed with the following message: ${e.getMessage}") 70 | } 71 | } 72 | 73 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/GetResultsApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import apiactors.ActorOperations._ 4 | import scala.concurrent._ 5 | import org.apache.spark.rdd.RDD 6 | import org.apache.spark.sql.hive.HiveContext 7 | import com.google.common.base.Preconditions 8 | import com.xpatterns.jaws.data.DTO.Column 9 | import com.xpatterns.jaws.data.utils.Utils 10 | import server.Configuration 11 | import akka.actor.Actor 12 | import akka.actor.actorRef2Scala 13 | import messages.GetResultsMessage 14 | import net.liftweb.json._ 15 | import net.liftweb.json.DefaultFormats 16 | import com.xpatterns.jaws.data.contracts.DAL 17 | import org.apache.spark.scheduler.HiveUtils 18 | import ExecutionContext.Implicits.global 19 | import scala.util.Try 20 | import scala.util.Success 21 | import scala.util.Failure 22 | import messages.ErrorMessage 23 | import messages.ResultFormat._ 24 | import com.xpatterns.jaws.data.DTO.AvroResult 25 | import com.xpatterns.jaws.data.DTO.CustomResult 26 | import com.xpatterns.jaws.data.utils.ResultsConverter 27 | import org.apache.spark.sql.catalyst.expressions.Row 28 | import com.xpatterns.jaws.data.DTO.AvroBinaryResult 29 | 30 | /** 31 | * Created by emaorhian 32 | */ 33 | class GetResultsApiActor(hdfsConf: org.apache.hadoop.conf.Configuration, hiveContext: HiveContext, dals: DAL) extends Actor { 34 | implicit val formats = DefaultFormats 35 | override def receive = { 36 | 37 | case message: GetResultsMessage => 38 | { 39 | Configuration.log4j.info(s"[GetResultsMessage]: retrieving results for: ${message.queryID} in the ${message.format}") 40 | val currentSender = sender 41 | val getResultsFuture = future { 42 | 43 | val (offset, limit) = getOffsetAndLimit(message) 44 | val metaInfo = dals.loggingDal.getMetaInfo(message.queryID) 45 | 46 | metaInfo.resultsDestination match { 47 | // cassandra 48 | case 0 => { 49 | var endIndex = offset + limit 50 | message.format match { 51 | case AVRO_BINARY_FORMAT => new AvroBinaryResult(getDBAvroResults(message.queryID, offset, endIndex)) 52 | case AVRO_JSON_FORMAT => getDBAvroResults(message.queryID, offset, endIndex).result 53 | case _ => getCustomResults(message.queryID, offset, endIndex) 54 | } 55 | 56 | } 57 | //hdfs 58 | case 1 => { 59 | val destinationPath = HiveUtils.getHdfsPath(Configuration.rddDestinationIp.get) 60 | getFormattedResult(message.format, getResults(offset, limit, destinationPath)) 61 | 62 | } 63 | //tachyon 64 | case 2 => { 65 | val destinationPath = HiveUtils.getTachyonPath(Configuration.rddDestinationIp.get) 66 | getFormattedResult(message.format, getResults(offset, limit, destinationPath)) 67 | 68 | } 69 | case _ => { 70 | Configuration.log4j.info("[GetResultsMessage]: Unidentified results path : " + metaInfo.resultsDestination) 71 | null 72 | } 73 | } 74 | } 75 | 76 | getResultsFuture onComplete { 77 | case Success(results) => currentSender ! results 78 | case Failure(e) => currentSender ! ErrorMessage(s"GET results failed with the following message: ${e.getMessage}") 79 | } 80 | 81 | } 82 | 83 | def getResults(offset: Int, limit: Int, destinationPath: String): ResultsConverter = { 84 | val schemaBytes = Utils.readBytes(hdfsConf, Configuration.schemaFolder.getOrElse("jawsSchemaFolder") + "/" + message.queryID) 85 | val schema = HiveUtils.deserializaSchema(schemaBytes) 86 | 87 | val resultsRDD: RDD[Tuple2[Object, Array[Object]]] = hiveContext.sparkContext.objectFile(HiveUtils.getRddDestinationPath(message.queryID, destinationPath)) 88 | 89 | val filteredResults = resultsRDD.filter(tuple => tuple._1.asInstanceOf[Long] >= offset && tuple._1.asInstanceOf[Long] < offset + limit).collect() 90 | 91 | val resultRows = filteredResults map { case (index, row) => Row.fromSeq(row) } 92 | 93 | new ResultsConverter(schema, resultRows) 94 | 95 | } 96 | } 97 | 98 | def getOffsetAndLimit(message: GetResultsMessage): Tuple2[Int, Int] = { 99 | var offset = message.offset 100 | var limit = message.limit 101 | 102 | Option(offset) match { 103 | case None => { 104 | Configuration.log4j.info("[GetResultsMessage]: offset null... setting it on 0") 105 | offset = 0 106 | } 107 | case _ => { 108 | Configuration.log4j.info("[GetResultsMessage]: offset = " + offset) 109 | } 110 | } 111 | 112 | Option(limit) match { 113 | case None => { 114 | Configuration.log4j.info("[GetResultsMessage]: limit null... setting it on 100") 115 | limit = 100 116 | } 117 | case _ => { 118 | Configuration.log4j.info("[GetResultsMessage]: limit = " + limit) 119 | } 120 | } 121 | (offset, limit) 122 | } 123 | 124 | private def getDBAvroResults(queryID: String, offset: Int, limit: Int) = { 125 | val result = dals.resultsDal.getAvroResults(queryID) 126 | val lastResultIndex = if (limit > result.result.length) result.result.length else limit 127 | new AvroResult(result.schema, result.result.slice(offset, lastResultIndex)) 128 | } 129 | 130 | private def getCustomResults(queryID: String, offset: Int, limit: Int) = { 131 | val result = dals.resultsDal.getCustomResults(queryID) 132 | val lastResultIndex = if (limit > result.result.length) result.result.length else limit 133 | new CustomResult(result.schema, result.result.slice(offset, lastResultIndex)) 134 | } 135 | 136 | private def getFormattedResult(format: String, resultsConverter: ResultsConverter) = { 137 | format match { 138 | case AVRO_BINARY_FORMAT => resultsConverter.toAvroBinaryResults() 139 | case AVRO_JSON_FORMAT => resultsConverter.toAvroResults().result 140 | case _ => resultsConverter.toCustomResults() 141 | } 142 | } 143 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/QueryPropertiesApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import akka.actor.Actor 4 | import com.xpatterns.jaws.data.contracts.DAL 5 | import com.xpatterns.jaws.data.utils.QueryState 6 | import messages.{UpdateQueryPropertiesMessage, ErrorMessage} 7 | import server.Configuration 8 | import scala.concurrent._ 9 | import ExecutionContext.Implicits.global 10 | 11 | import scala.concurrent._ 12 | import scala.util.{Failure, Success} 13 | 14 | /** 15 | * Handles the properties operation on a query 16 | */ 17 | class QueryPropertiesApiActor (dals: DAL) extends Actor { 18 | override def receive = { 19 | case message: UpdateQueryPropertiesMessage => 20 | 21 | Configuration.log4j.info(s"[QueryPropertiesApiActor]: updating query id ${message.queryID} with name ${message.name}") 22 | 23 | val currentSender = sender() 24 | 25 | val updateQueryFuture = future { 26 | dals.loggingDal.getState(message.queryID) match { 27 | case QueryState.NOT_FOUND => throw new Exception(s"The query ${message.queryID} was not found. Please provide a valid query id") 28 | case _ => 29 | dals.loggingDal.setQueryProperties(message.queryID, message.name, message.description, message.published, message.overwrite) 30 | s"Query information for ${message.queryID} has been updated" 31 | } 32 | } 33 | 34 | updateQueryFuture onComplete { 35 | case Success(successfulMessage) => currentSender ! successfulMessage 36 | case Failure(e) => currentSender ! ErrorMessage(s"Updating query failed with the following message: ${e.getMessage}") 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/apiactors/RegisterParquetTableApiActor.scala: -------------------------------------------------------------------------------- 1 | package apiactors 2 | 3 | import implementation.HiveContextWrapper 4 | import com.xpatterns.jaws.data.contracts.DAL 5 | import akka.actor.Actor 6 | import messages.RegisterTableMessage 7 | import server.Configuration 8 | import org.apache.spark.scheduler.HiveUtils 9 | import messages.UnregisterTableMessage 10 | import scala.concurrent._ 11 | import ExecutionContext.Implicits.global 12 | import scala.util.{ Success, Failure } 13 | import messages.ErrorMessage 14 | 15 | class RegisterParquetTableApiActor(hiveContext: HiveContextWrapper, dals: DAL) extends Actor { 16 | override def receive = { 17 | 18 | case message: RegisterTableMessage => { 19 | Configuration.log4j.info(s"[RegisterParquetTableApiActor]: registering table ${message.name} at ${message.path}") 20 | val currentSender = sender 21 | 22 | val registerTableFuture = future { 23 | val (namenode, folderPath) = if (message.namenode.isEmpty) HiveUtils.splitPath(message.path) else (message.namenode, message.path) 24 | HiveUtils.registerParquetTable(hiveContext, message.name, namenode, folderPath, dals) 25 | } 26 | 27 | registerTableFuture onComplete { 28 | case Success(_) => currentSender ! s"Table ${message.name} was registered" 29 | case Failure(e) => currentSender ! ErrorMessage(s"RegisterTable failed with the following message: ${e.getMessage}") 30 | } 31 | } 32 | 33 | case message: UnregisterTableMessage => { 34 | Configuration.log4j.info(s"[RegisterParquetTableApiActor]: Unregistering table ${message.name}") 35 | val currentSender = sender 36 | 37 | val unregisterTableFuture = future { 38 | // unregister table 39 | hiveContext.getCatalog.unregisterTable(Seq(message.name)) 40 | dals.parquetTableDal.deleteParquetTable(message.name) 41 | } 42 | 43 | unregisterTableFuture onComplete { 44 | case Success(result) => currentSender ! s"Table ${message.name} was unregistered" 45 | case Failure(e) => currentSender ! ErrorMessage(s"UnregisterTable failed with the following message: ${e.getMessage}") 46 | } 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/customs/CORSDirectives.scala: -------------------------------------------------------------------------------- 1 | package customs 2 | 3 | import spray.http._ 4 | import spray.routing._ 5 | import spray.http.HttpHeaders._ 6 | import spray.http.HttpMethod 7 | /** 8 | * Created by emaorhian 9 | */ 10 | trait CORSDirectives { this: HttpService => 11 | private def respondWithCORSHeaders(origin: String, rh: Seq[HttpHeader]) = { 12 | var headers: List[HttpHeader] = List( 13 | HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(List(origin))), 14 | HttpHeaders.`Access-Control-Allow-Credentials`(true), 15 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid") 16 | ) ++ rh.toList 17 | 18 | respondWithHeaders(headers) 19 | } 20 | private def respondWithCORSHeadersAllOrigins(rh: Seq[HttpHeader]) = { 21 | var headers: List[HttpHeader] = List( 22 | HttpHeaders.`Access-Control-Allow-Origin`(AllOrigins), 23 | HttpHeaders.`Access-Control-Allow-Credentials`(true), 24 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid") 25 | ) ++ rh.toList 26 | 27 | respondWithHeaders(headers) 28 | } 29 | 30 | def corsFilter(origins: List[String], rh: HttpHeader*)(route: Route) = 31 | if (origins.contains("*")) 32 | respondWithCORSHeadersAllOrigins(rh)(route) 33 | else 34 | optionalHeaderValueByName("Origin") { 35 | case None => 36 | route 37 | case Some(clientOrigin) => { 38 | if (origins.contains(clientOrigin)) 39 | respondWithCORSHeaders(clientOrigin, rh)(route) 40 | else { 41 | // Maybe, a Rejection will fit better 42 | complete(StatusCodes.Forbidden, "Invalid origin") 43 | } 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/customs/CustomDirectives.scala: -------------------------------------------------------------------------------- 1 | package customs 2 | 3 | import spray.routing._ 4 | import Directives._ 5 | import spray.http.StatusCodes.ClientError 6 | 7 | object CustomDirectives { 8 | 9 | def validateCondition(condition: Boolean, message: String, rejectStatusCode: ClientError): Directive0 = { 10 | if (condition == false) { 11 | complete(rejectStatusCode, message) 12 | } else 13 | pass 14 | } 15 | 16 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/customs/CustomIndexer.scala: -------------------------------------------------------------------------------- 1 | package customs 2 | import scala.Array.canBuildFrom 3 | import scala.Iterator 4 | import org.apache.spark.rdd.RDD 5 | import server.Configuration 6 | 7 | /** 8 | * Created by emaorhian 9 | */ 10 | class CustomIndexer { 11 | 12 | def indexRdd(rdd: RDD[Array[Any]]): RDD[Tuple2[Long, Array[Any]]] = { 13 | val partitionCount = rdd.mapPartitionsWithIndex { (pid, iter) => Iterator((pid, iter.size)) }.collect 14 | 15 | var indexes = Array[Int](0) 16 | Configuration.log4j.debug("NumberOfPartitions is: " + partitionCount.size) 17 | 18 | val resultsNumber = partitionCount.foldLeft(0)((sizeSum, partInfo) => { 19 | indexes = indexes :+ (sizeSum + partInfo._2) 20 | sizeSum + partInfo._2 21 | }) 22 | 23 | Configuration.log4j.debug("Number of results is: " + resultsNumber) 24 | 25 | val broadcastedIndexes = rdd.sparkContext.broadcast(indexes) 26 | 27 | //index each row 28 | val indexedRdd = rdd.mapPartitionsWithIndex { (index, iterator) => 29 | var z = Array[Tuple2[Long, Array[Any]]]() 30 | var startIndex: Long = broadcastedIndexes.value(index) 31 | for (element <- iterator) { 32 | z = z ++ Array((startIndex, element)) 33 | startIndex = startIndex + 1 34 | } 35 | z.iterator 36 | } 37 | 38 | return indexedRdd 39 | } 40 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/implementation/HiveContextWrapper.scala: -------------------------------------------------------------------------------- 1 | package implementation 2 | 3 | import org.apache.spark.sql.hive.HiveContext 4 | import org.apache.spark.SparkContext 5 | 6 | class HiveContextWrapper(sc: SparkContext) extends HiveContext(sc: SparkContext){ 7 | 8 | def runMetadataSql(sql: String): Seq[String] = { 9 | runSqlHive(sql) 10 | } 11 | 12 | def getSparkContext() : SparkContext = { 13 | sc 14 | } 15 | 16 | def getCatalog = { 17 | catalog 18 | } 19 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/implementation/SchemaSettingsFactory.scala: -------------------------------------------------------------------------------- 1 | package implementation 2 | 3 | import server.Configuration 4 | 5 | /** 6 | * Created by lucianm on 12.02.2015. 7 | */ 8 | object SchemaSettingsFactory { 9 | 10 | trait SourceType 11 | 12 | case class Parquet() extends SourceType 13 | 14 | case class Hive() extends SourceType 15 | 16 | trait StorageType 17 | 18 | case class Hdfs() extends StorageType 19 | 20 | case class Tachyon() extends StorageType 21 | 22 | val HIVE: String = "hive" 23 | 24 | val PARQUET: String = "parquet" 25 | 26 | def getSourceType(sourceType: String): SourceType = { 27 | if (sourceType.equalsIgnoreCase(HIVE)) new Hive 28 | else if (sourceType.equalsIgnoreCase(PARQUET)) new Parquet 29 | else throw new Exception(Configuration.UNSUPPORTED_SOURCE_TYPE) 30 | } 31 | 32 | val HDFS: String = "hdfs" 33 | 34 | val TACHYON: String = "tachyon" 35 | 36 | def getStorageType(storageType: String): StorageType = { 37 | if (storageType.equalsIgnoreCase(HDFS)) new Hdfs 38 | else if (storageType.equalsIgnoreCase(TACHYON)) new Tachyon 39 | else throw new Exception(Configuration.UNSUPPORTED_STORAGE_TYPE) 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/messages/Messages.scala: -------------------------------------------------------------------------------- 1 | package messages 2 | 3 | import implementation.SchemaSettingsFactory.{ StorageType, SourceType } 4 | import org.apache.hadoop.conf.Configuration 5 | 6 | /** 7 | * Created by emaorhian 8 | */ 9 | case class CancelMessage(queryID: String) extends Serializable 10 | case class GetDatabasesMessage() 11 | case class GetQueriesMessage(queryIDs: Seq[String]) 12 | case class GetQueriesByName(name: String) 13 | case class GetPublishedQueries() 14 | case class GetPaginatedQueriesMessage(startQueryID: String, limit: Int) 15 | case class GetLogsMessage(queryID: String, startDate: Long, limit: Int) 16 | case class GetResultsMessage(queryID: String, offset: Int, limit: Int, format : String) 17 | case class GetTablesMessage(database: String, describe: Boolean, tables: Array[String]) 18 | case class GetExtendedTablesMessage(database: String, tables: Array[String]) 19 | case class GetFormattedTablesMessage(database: String, tables: Array[String]) 20 | case class RunQueryMessage(name: String) 21 | case class RunScriptMessage(script: String, limited: Boolean, maxNumberOfResults: Long, rddDestination: String) 22 | case class RunParquetMessage(script: String, tablePath: String, namenode:String, table: String, limited: Boolean, maxNumberOfResults: Long, rddDestination: String) 23 | case class GetDatasourceSchemaMessage(path: String, sourceType: SourceType, storageType: StorageType, hdfsConf:Configuration) 24 | case class ErrorMessage(message: String) 25 | case class DeleteQueryMessage(queryID: String) 26 | case class RegisterTableMessage(name: String, path: String, namenode: String) 27 | case class UnregisterTableMessage(name: String) 28 | case class GetParquetTablesMessage(tables: Array[String], describe: Boolean) 29 | case class UpdateQueryPropertiesMessage(queryID:String, name:Option[String], description:Option[String], published:Option[Boolean], overwrite:Boolean) 30 | 31 | 32 | object ResultFormat { 33 | val AVRO_BINARY_FORMAT = "avrobinary" 34 | val AVRO_JSON_FORMAT = "avrojson" 35 | val DEFAULT_FORMAT = "default" 36 | } 37 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/Configuration.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.log4j.Logger 5 | 6 | /** 7 | * Holds the configuration properties for Jaws 8 | */ 9 | object Configuration { 10 | 11 | import com.typesafe.config.ConfigFactory 12 | 13 | val log4j = Logger.getLogger(Configuration.getClass) 14 | 15 | private val conf = ConfigFactory.load 16 | conf.checkValid(ConfigFactory.defaultReference) 17 | 18 | val remote = conf.getConfig("remote") 19 | val sparkConf = conf.getConfig("sparkConfiguration") 20 | val appConf = conf.getConfig("appConf") 21 | val hadoopConf = conf.getConfig("hadoopConf") 22 | val cassandraConf = conf.getConfig("cassandraConf") 23 | 24 | // cassandra configuration 25 | val cassandraHost = getStringConfiguration(cassandraConf, "cassandra.host") 26 | val cassandraKeyspace = getStringConfiguration(cassandraConf, "cassandra.keyspace") 27 | val cassandraClusterName = getStringConfiguration(cassandraConf, "cassandra.cluster.name") 28 | 29 | //hadoop conf 30 | val replicationFactor = getStringConfiguration(hadoopConf, "replicationFactor") 31 | val forcedMode = getStringConfiguration(hadoopConf, "forcedMode") 32 | val loggingFolder = getStringConfiguration(hadoopConf, "loggingFolder") 33 | val stateFolder = getStringConfiguration(hadoopConf, "stateFolder") 34 | val detailsFolder = getStringConfiguration(hadoopConf, "detailsFolder") 35 | val resultsFolder = getStringConfiguration(hadoopConf, "resultsFolder") 36 | val metaInfoFolder = getStringConfiguration(hadoopConf, "metaInfoFolder") 37 | val queryNameFolder = getStringConfiguration(hadoopConf, "queryNameFolder") 38 | val queryPublishedFolder = getStringConfiguration(hadoopConf, "queryPublishedFolder") 39 | val queryUnpublishedFolder = getStringConfiguration(hadoopConf, "queryUnpublishedFolder") 40 | val namenode = getStringConfiguration(hadoopConf, "namenode") 41 | val parquetTablesFolder = getStringConfiguration(hadoopConf, "parquetTablesFolder") 42 | 43 | //app configuration 44 | val serverInterface = getStringConfiguration(appConf, "server.interface") 45 | val loggingType = getStringConfiguration(appConf, "app.logging.type") 46 | val rddDestinationIp = getStringConfiguration(appConf, "rdd.destination.ip") 47 | val rddDestinationLocation = getStringConfiguration(appConf, "rdd.destination.location") 48 | val remoteDomainActor = getStringConfiguration(appConf, "remote.domain.actor") 49 | val applicationName = getStringConfiguration(appConf, "application.name") 50 | val webServicesPort = getStringConfiguration(appConf, "web.services.port") 51 | val webSocketsPort = getStringConfiguration(appConf, "web.sockets.port") 52 | val nrOfThreads = getStringConfiguration(appConf, "nr.of.threads") 53 | val timeout = getStringConfiguration(appConf, "timeout").getOrElse("10000").toInt 54 | val schemaFolder = getStringConfiguration(appConf, "schemaFolder") 55 | val numberOfResults = getStringConfiguration(appConf, "nr.of.results") 56 | val corsFilterAllowedHosts = getStringConfiguration(appConf, "cors-filter-allowed-hosts") 57 | val jarPath = getStringConfiguration(appConf, "jar-path") 58 | val hdfsNamenodePath = getStringConfiguration(appConf, "hdfs-namenode-path").getOrElse("") 59 | val tachyonNamenodePath = getStringConfiguration(appConf, "tachyon-namenode-path").getOrElse("") 60 | 61 | val LIMIT_EXCEPTION_MESSAGE = "The limit is null!" 62 | val SCRIPT_EXCEPTION_MESSAGE = "The script is empty or null!" 63 | val UUID_EXCEPTION_MESSAGE = "The uuid is empty or null!" 64 | val META_INFO_EXCEPTION_MESSAGE = "The metainfo is null!" 65 | val LIMITED_EXCEPTION_MESSAGE = "The limited flag is null!" 66 | val RESULTS_NUMBER_EXCEPTION_MESSAGE = "The results number is null!" 67 | val FILE_EXCEPTION_MESSAGE = "The file is null or empty!" 68 | val QUERY_NAME_MESSAGE = "The query name is null or empty!" 69 | val FILE_PATH_TYPE_EXCEPTION_MESSAGE = "The file path must be hdfs or tachyon" 70 | val DATABASE_EXCEPTION_MESSAGE = "The database is null or empty!" 71 | val TABLE_EXCEPTION_MESSAGE = "The table name is null or empty!" 72 | val PATH_IS_EMPTY = "Request parameter \'path\' must not be empty!" 73 | val TABLE_ALREADY_EXISTS_EXCEPTION_MESSAGE = "The table already exists!" 74 | val UNSUPPORTED_SOURCE_TYPE = "Unsupported value for parameter \'sourceType\' !" 75 | val UNSUPPORTED_STORAGE_TYPE = "Unsupported value for parameter \'storageType\' !" 76 | 77 | def getStringConfiguration(configuration: Config, configurationPath: String): Option[String] = { 78 | if (configuration.hasPath(configurationPath)) Option(configuration.getString(configurationPath).trim) else Option(null) 79 | } 80 | 81 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/JawsController.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import java.net.InetAddress 4 | import server.api._ 5 | import scala.collection.JavaConverters._ 6 | import com.typesafe.config.Config 7 | import com.xpatterns.jaws.data.utils.Utils 8 | import akka.actor.ActorSystem 9 | import customs.CORSDirectives 10 | import com.xpatterns.jaws.data.impl.CassandraDal 11 | import com.xpatterns.jaws.data.impl.HdfsDal 12 | import spray.routing.Directive.pimpApply 13 | import spray.routing.SimpleRoutingApp 14 | import com.xpatterns.jaws.data.contracts.DAL 15 | import org.apache.spark.scheduler.HiveUtils 16 | import implementation.HiveContextWrapper 17 | import org.apache.spark.SparkContext 18 | import org.apache.spark.scheduler.LoggingListener 19 | import org.apache.spark.SparkConf 20 | 21 | /** 22 | * Created by emaorhian 23 | */ 24 | object JawsController extends App with UIApi with IndexApi with ParquetApi with MetadataApi with QueryManagementApi 25 | with SimpleRoutingApp with CORSDirectives { 26 | initialize() 27 | 28 | // initialize parquet tables 29 | initializeParquetTables() 30 | 31 | implicit val spraySystem: ActorSystem = ActorSystem("spraySystem") 32 | 33 | startServer(interface = Configuration.serverInterface.getOrElse(InetAddress.getLocalHost.getHostName), 34 | port = Configuration.webServicesPort.getOrElse("8080").toInt) { 35 | pathPrefix("jaws") { 36 | uiRoute ~ indexRoute ~ runLogsResultsQueriesCancelRoute ~ parquetRoute ~ hiveSchemaRoute 37 | } 38 | } 39 | 40 | private val reactiveServer = new ReactiveServer(Configuration.webSocketsPort.getOrElse("8081").toInt, MainActors.logsActor) 41 | reactiveServer.start() 42 | 43 | def initialize() = { 44 | Configuration.log4j.info("Initializing...") 45 | 46 | hdfsConf = getHadoopConf 47 | Utils.createFolderIfDoesntExist(hdfsConf, Configuration.schemaFolder.getOrElse("jawsSchemaFolder"), forcedMode = false) 48 | 49 | Configuration.loggingType.getOrElse("cassandra") match { 50 | case "cassandra" => dals = new CassandraDal(Configuration.cassandraHost.get, Configuration.cassandraClusterName.get, Configuration.cassandraKeyspace.get) 51 | case _ => dals = new HdfsDal(hdfsConf) 52 | } 53 | 54 | hiveContext = createHiveContext(dals) 55 | } 56 | 57 | def createHiveContext(dal: DAL): HiveContextWrapper = { 58 | val jars = Array(Configuration.jarPath.get) 59 | 60 | def configToSparkConf(config: Config, contextName: String, jars: Array[String]): SparkConf = { 61 | val sparkConf = new SparkConf().setAppName(contextName).setJars(jars) 62 | for ( 63 | property <- config.entrySet().asScala if property.getKey.startsWith("spark") && property.getValue != null 64 | ) { 65 | val key = property.getKey.replaceAll("-", ".") 66 | println(key + " | " + property.getValue.unwrapped()) 67 | sparkConf.set(key, property.getValue.unwrapped().toString) 68 | } 69 | sparkConf 70 | } 71 | 72 | val hContext: HiveContextWrapper = { 73 | val sparkConf = configToSparkConf(Configuration.sparkConf, Configuration.applicationName.getOrElse("Jaws"), jars) 74 | val sContext = new SparkContext(sparkConf) 75 | 76 | val hContext = new HiveContextWrapper(sContext) 77 | hContext.sparkContext.addSparkListener(new LoggingListener(dal)) 78 | 79 | HiveUtils.setSharkProperties(hContext, this.getClass.getClassLoader.getResourceAsStream("sharkSettings.txt")) 80 | //make sure that lazy variable hiveConf gets initialized 81 | hContext.runMetadataSql("use default") 82 | hContext 83 | } 84 | hContext 85 | } 86 | 87 | def getHadoopConf: org.apache.hadoop.conf.Configuration = { 88 | val configuration = new org.apache.hadoop.conf.Configuration() 89 | configuration.setBoolean(Utils.FORCED_MODE, Configuration.forcedMode.getOrElse("false").toBoolean) 90 | 91 | // set hadoop name node and job tracker 92 | Configuration.namenode match { 93 | case None => 94 | val message = "You need to set the namenode! " 95 | Configuration.log4j.error(message) 96 | throw new RuntimeException(message) 97 | 98 | case _ => configuration.set("fs.defaultFS", Configuration.namenode.get) 99 | 100 | } 101 | 102 | configuration.set("dfs.replication", Configuration.replicationFactor.getOrElse("1")) 103 | 104 | configuration.set(Utils.LOGGING_FOLDER, Configuration.loggingFolder.getOrElse("jawsLogs")) 105 | configuration.set(Utils.STATUS_FOLDER, Configuration.stateFolder.getOrElse("jawsStates")) 106 | configuration.set(Utils.DETAILS_FOLDER, Configuration.detailsFolder.getOrElse("jawsDetails")) 107 | configuration.set(Utils.METAINFO_FOLDER, Configuration.metaInfoFolder.getOrElse("jawsMetainfoFolder")) 108 | configuration.set(Utils.QUERY_NAME_FOLDER, Configuration.queryNameFolder.getOrElse("jawsQueryNameFolder")) 109 | configuration.set(Utils.QUERY_PUBLISHED_FOLDER, Configuration.queryPublishedFolder.getOrElse("jawsQueryPublishedFolder")) 110 | configuration.set(Utils.QUERY_UNPUBLISHED_FOLDER, Configuration.queryUnpublishedFolder.getOrElse("jawsQueryUnpublishedFolder")) 111 | configuration.set(Utils.RESULTS_FOLDER, Configuration.resultsFolder.getOrElse("jawsResultsFolder")) 112 | configuration.set(Utils.PARQUET_TABLES_FOLDER, Configuration.parquetTablesFolder.getOrElse("parquetTablesFolder")) 113 | 114 | configuration 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/LogsActor.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import akka.actor.{ Actor, ActorLogging } 4 | import scala.collection._ 5 | import org.java_websocket.WebSocket 6 | import server.ReactiveServer.Close 7 | import server.ReactiveServer.Error 8 | import server.ReactiveServer.Open 9 | import akka.actor.actorRef2Scala 10 | 11 | /** 12 | * Created by emaorhian 13 | */ 14 | object LogsActor { 15 | sealed trait LogsMessage 16 | 17 | case class Unregister(ws: Option[WebSocket]) extends LogsMessage 18 | case class PushLogs(uuid: String, msg: String) extends LogsMessage 19 | 20 | } 21 | 22 | class LogsActor extends Actor with ActorLogging { 23 | import LogsActor._ 24 | import server.ReactiveServer._ 25 | 26 | val uuidToClients = mutable.Map[String, mutable.ListBuffer[WebSocket]]() 27 | 28 | override def receive = { 29 | case Open(uuid, ws, hs) => { 30 | var webSockets = mutable.ListBuffer[WebSocket]() 31 | uuidToClients.get(uuid) match { 32 | case None => webSockets = mutable.ListBuffer[WebSocket]() 33 | case Some(wss) => webSockets = wss 34 | } 35 | uuidToClients += ((uuid, (webSockets :+ ws))) 36 | log.info("registered monitor for {}", ws.getLocalSocketAddress()) 37 | } 38 | 39 | case Close(ws, code, reason, ext) => self ! Unregister(ws) 40 | 41 | case Error(ws, ex) => self ! Unregister(ws) 42 | 43 | case PushLogs(uuid, msg) => 44 | log.debug("received msg '{}'", msg) 45 | val webSockets = uuidToClients.get(uuid) 46 | webSockets match { 47 | case None => log.debug("There is no such uuid") 48 | case Some(wss) => wss.foreach(ws => ws.send(msg)) 49 | } 50 | 51 | case Unregister(ws) => { 52 | ws match { 53 | case None => log.info("There is nothing to unregister") 54 | case Some(wss) => 55 | log.info("unregister monitor") 56 | uuidToClients.foreach(tuple => { 57 | val clients = tuple._2 58 | clients -= wss 59 | uuidToClients.put(tuple._1, clients) 60 | }) 61 | } 62 | 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/MainActors.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import akka.actor.{ActorSystem, Props, ActorRef} 4 | import apiactors.ActorsPaths._ 5 | import apiactors.ActorsPaths 6 | import scala.concurrent.Await 7 | import akka.util.Timeout 8 | import akka.pattern.ask 9 | 10 | /** 11 | * Created by emaorhian 12 | */ 13 | object MainActors { 14 | // self: Systems => 15 | val localSystem: ActorSystem = ActorSystem("localSystem") 16 | val remoteSystem: ActorSystem = ActorSystem("remoteSystem", Configuration.remote) 17 | val localSupervisor = localSystem.actorOf(Props(classOf[Supervisor]), ActorsPaths.LOCAL_SUPERVISOR_ACTOR_NAME) 18 | val remoteSupervisor = remoteSystem.actorOf(Props(classOf[Supervisor]), ActorsPaths.REMOTE_SUPERVISOR_ACTOR_NAME) 19 | val logsActor = createActor(Props(new LogsActor), LOGS_WEBSOCKETS_ACTOR_NAME, localSupervisor) 20 | 21 | 22 | def createActor(props: Props, name: String, supervisor: ActorRef): ActorRef = { 23 | implicit val timeout = Timeout(Configuration.timeout) 24 | val future = ask(supervisor, (props, name)) 25 | val actor = Await.result(future, timeout.duration).asInstanceOf[ActorRef] 26 | actor 27 | } 28 | } 29 | 30 | 31 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/ReactiveServer.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import akka.actor.ActorRef 4 | import java.net.InetSocketAddress 5 | import org.java_websocket.WebSocket 6 | import org.java_websocket.server.WebSocketServer 7 | import org.java_websocket.handshake.ClientHandshake 8 | import akka.actor.actorRef2Scala 9 | 10 | /** 11 | * Created by emaorhian 12 | */ 13 | object ReactiveServer { 14 | sealed trait ReactiveServerMessage 15 | case class Message(ws: WebSocket, msg: String) 16 | extends ReactiveServerMessage 17 | case class Open(uuid: String, ws: WebSocket, hs: ClientHandshake) 18 | extends ReactiveServerMessage 19 | case class Close(ws: Option[WebSocket], code: Int, reason: String, external: Boolean) 20 | extends ReactiveServerMessage 21 | case class Error(ws: Option[WebSocket], ex: Exception) 22 | extends ReactiveServerMessage 23 | 24 | } 25 | class ReactiveServer(val port: Int, val reactor: ActorRef) 26 | extends WebSocketServer(new InetSocketAddress(port)) { 27 | 28 | val urlPattern = """^\/jaws\/logs\?.*(?<=&|\?)uuid=([^&]+)""".r 29 | 30 | final override def onMessage(ws: WebSocket, msg: String) { 31 | } 32 | 33 | final override def onOpen(ws: WebSocket, hs: ClientHandshake) { 34 | Option(ws) match { 35 | case None => Configuration.log4j.debug("[ReactiveServer] the ws is null") 36 | case _ => { 37 | var description = hs.getResourceDescriptor() 38 | val urlPattern(uuid) = description 39 | reactor ! ReactiveServer.Open(uuid, ws, hs) 40 | } 41 | } 42 | } 43 | final override def onClose(ws: WebSocket, code: Int, reason: String, external: Boolean) { 44 | Option(ws) match { 45 | case None => Configuration.log4j.debug("[ReactiveServer] the ws is null") 46 | case _ => { 47 | reactor ! ReactiveServer.Close(Option(ws), code, reason, external) 48 | 49 | } 50 | } 51 | } 52 | final override def onError(ws: WebSocket, ex: Exception) { 53 | reactor ! ReactiveServer.Error(Option(ws), ex) 54 | 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/Supervisor.scala: -------------------------------------------------------------------------------- 1 | package server 2 | import akka.actor.Actor 3 | import akka.actor.OneForOneStrategy 4 | import akka.actor.SupervisorStrategy._ 5 | import scala.concurrent.duration._ 6 | import akka.actor.Props 7 | import akka.actor.actorRef2Scala 8 | import akka.actor.ActorSystem 9 | 10 | /** 11 | * Created by emaorhian 12 | */ 13 | class Supervisor extends Actor { 14 | 15 | override val supervisorStrategy = 16 | OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange = 1 minute) { 17 | 18 | case ex : Throwable => { 19 | Resume 20 | } 21 | } 22 | 23 | def receive = { 24 | case (p: Props, name: String) => sender ! context.actorOf(p, name) 25 | } 26 | 27 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/api/BaseApi.scala: -------------------------------------------------------------------------------- 1 | package server.api 2 | 3 | import akka.actor.{Props, ActorRef} 4 | import akka.util.Timeout 5 | import apiactors.{BalancerActor, RunScriptApiActor} 6 | import com.xpatterns.jaws.data.contracts.DAL 7 | import implementation.HiveContextWrapper 8 | import server.Configuration 9 | import spray.routing.HttpService 10 | import server.MainActors._ 11 | import apiactors.ActorsPaths._ 12 | 13 | /** 14 | * The base trait api. It contains the common data used by the other api classes. 15 | */ 16 | trait BaseApi extends HttpService { 17 | // The default timeout for the futures 18 | implicit val timeout = Timeout(Configuration.timeout.toInt) 19 | 20 | // The hdfs configuration that is initialized when the server starts 21 | var hdfsConf: org.apache.hadoop.conf.Configuration = _ 22 | 23 | // The hive context that is initialized when the server starts 24 | var hiveContext: HiveContextWrapper = _ 25 | 26 | // Holds the DAL. It is initialized when the server starts 27 | var dals: DAL = _ 28 | 29 | // The actor that is handling the scripts that are run on Hive or Spark SQL. This field is lazy because the hdfs 30 | // configuration and the hive context are not initialized at the moment of creating the object. 31 | lazy val runScriptActor = createActor(Props(new RunScriptApiActor(hdfsConf, hiveContext, dals)), RUN_SCRIPT_ACTOR_NAME, remoteSupervisor) 32 | 33 | // The actor that is handling the parquet tables 34 | lazy val balancerActor = createActor(Props(classOf[BalancerActor]), BALANCER_ACTOR_NAME, remoteSupervisor) 35 | 36 | /** 37 | * @param pathType the path type of the requested name node 38 | * @return the proper namenode path 39 | */ 40 | protected def getNamenodeFromPathType(pathType:String):String = { 41 | if ("hdfs".equals(pathType)) { 42 | Configuration.hdfsNamenodePath 43 | } else if ("tachyon".equals(pathType)) { 44 | Configuration.tachyonNamenodePath 45 | } else { 46 | "" 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/api/IndexApi.scala: -------------------------------------------------------------------------------- 1 | package server.api 2 | 3 | import customs.CORSDirectives 4 | import server.Configuration 5 | import spray.http.{HttpMethods, HttpHeaders} 6 | import spray.routing.{HttpService, Route} 7 | 8 | /** 9 | * Handles the calls to index page 10 | */ 11 | trait IndexApi extends HttpService with CORSDirectives { 12 | /** 13 | * Handles the /jaws/index call. If the server starts successfully, this call returns a proper message. 14 | */ 15 | def indexRoute: Route = path("index") { 16 | get { 17 | corsFilter(List(Configuration.corsFilterAllowedHosts.getOrElse("*"))) { 18 | complete { 19 | "Jaws is up and running!" 20 | } 21 | } 22 | 23 | } ~ 24 | options { 25 | corsFilter(List(Configuration.corsFilterAllowedHosts.getOrElse("*")), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) { 26 | complete { 27 | "OK" 28 | } 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scala/server/api/UIApi.scala: -------------------------------------------------------------------------------- 1 | package server.api 2 | 3 | 4 | import customs.CORSDirectives 5 | import spray.http.{StatusCodes, HttpMethods, HttpHeaders} 6 | import spray.routing._ 7 | 8 | /** 9 | * Handles the calls for getting the ui stored in webapp. 10 | */ 11 | trait UIApi extends HttpService with CORSDirectives { 12 | /** 13 | * Handles the call /jaws/ui/ for getting the ui. 14 | */ 15 | def uiRoute: Route = pathPrefix("ui") { 16 | // Handles the call made to /ui/ by returning the index page stored in webapp folder. 17 | pathSingleSlash { 18 | get { 19 | getFromResource("webapp/index.html") 20 | } ~ options { 21 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) { 22 | complete { 23 | "OK" 24 | } 25 | } 26 | } 27 | } ~ 28 | // When a request is made to other resource from ui, the call is redirected to he default path 29 | pathEnd { 30 | redirect("ui/", StatusCodes.PermanentRedirect) 31 | } ~ 32 | get { 33 | getFromResourceDirectory("webapp") 34 | } ~ 35 | options { 36 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) { 37 | complete { 38 | "OK" 39 | } 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/scripts/start-jaws.sh: -------------------------------------------------------------------------------- 1 | get_abs_script_path() { 2 | pushd . >/dev/null 3 | cd $(dirname $0) 4 | dir=$(pwd) 5 | popd >/dev/null 6 | } 7 | 8 | get_abs_script_path 9 | parentdir="$(dirname "$dir")" 10 | logsFolder=$parentdir/logs 11 | if [ ! -d "$logsFolder" ]; then 12 | echo "Creating logs folder"$logsFolder 13 | mkdir $logsFolder 14 | fi 15 | 16 | 17 | export CLASSPATH_PREFIX=$parentdir"/resources" 18 | 19 | 20 | echo "Exporting system variables..." 21 | . $parentdir/conf/jaws-env.sh 22 | 23 | 24 | export CLASSPATH_PREFIX=$parentdir"/resources" 25 | 26 | 27 | echo $TACHYON_WAREHOUSE_PATH 28 | echo $TACHYON_MASTER 29 | echo $MESOS_NATIVE_LIBRARY 30 | echo $JAVA_OPTS 31 | echo $CLASSPATH_PREFIX 32 | 33 | echo "Deploying jaws..." 34 | $dir/main-jaws.sh 35 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/main/webapp/hello.txt: -------------------------------------------------------------------------------- 1 | Hello Jaws! 2 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | spray.can.server { 2 | # uncomment the next line for making this an HTTPS example 3 | # ssl-encryption = on 4 | idle-timeout = 301 s 5 | request-timeout = 300 s 6 | } 7 | 8 | remote{ 9 | akka { 10 | //loglevel = "DEBUG" 11 | actor { 12 | provider = "akka.remote.RemoteActorRefProvider" 13 | } 14 | remote { 15 | enabled-transports = ["akka.remote.netty.tcp"] 16 | log-sent-messages = on 17 | log-received-messages = on 18 | netty.tcp { 19 | transport-class = "akka.remote.transport.netty.NettyTransport" 20 | hostname = "devbox.local" 21 | port = 4042 22 | } 23 | } 24 | } 25 | } 26 | 27 | ############ spark configuration - see spark documentation #################### 28 | sparkConfiguration { 29 | spark-executor-memory=2g 30 | spark-mesos-coarse=false 31 | spark-scheduler-mode=FAIR 32 | spark-cores-max=2 33 | spark-master="spark://devbox.local:7077" 34 | spark-path="/home/ubuntu/latest-mssh/spark-1.0.1" 35 | spark-default-parallelism=384 36 | spark-storage-memoryFraction=0.3 37 | spark-shuffle-memoryFraction=0.6 38 | spark-shuffle-compress=true 39 | spark-shuffle-spill-compress=true 40 | spark-reducer-maxMbInFlight=48 41 | spark-akka-frameSize=10000 42 | spark-akka-threads=4 43 | spark-akka-timeout=100 44 | spark-task-maxFailures=4 45 | spark-shuffle-consolidateFiles=true 46 | spark-deploy-spreadOut=true 47 | spark-shuffle-spill=false 48 | #Serialization settings commented until more tests are performed 49 | #spark-serializer="org.apache.spark.serializer.KryoSerializer" 50 | #spark-kryoserializer-buffer-mb=10 51 | #spark-kryoserializer-buffer-max-mb=64 52 | spark-kryo-referenceTracking=false 53 | 54 | 55 | } 56 | 57 | ######### application configuration ################### 58 | appConf{ 59 | # the interface on which to start the spray server : localhost/ip/hostname 60 | server.interface=localhost 61 | # the cors filter allowed hosts 62 | cors-filter-allowed-hosts="*" 63 | # the default number of results retrieved on queries 64 | nr.of.results=100 65 | # the ip of the destination namenode - it is used when querying with unlimited number of results. 66 | rdd.destination.ip="devbox.local" 67 | # where to store the results in the case of an unlimited query. Possible results : hdfs/tachyon. Default hdfs 68 | rdd.destination.location="hdfs" 69 | # the remote doamain actor address 70 | remote.domain.actor="" 71 | #remote.domain.actor="devbox.local:port,devbox2.local:port" 72 | # application name 73 | application.name="Jaws" 74 | # the port on which to deploy the apis 75 | web.services.port=9080 76 | # the port on which to deploy the web sockets api (logs) 77 | web.sockets.port=8182 78 | # the number of threads used to execute shark commands 79 | nr.of.threads=10 80 | # implicit akka timeout 81 | timeout=1000000 82 | #where to log: app.logging.type = cassandra/hdfs 83 | app.logging.type=cassandra 84 | # folder where to write the results schema 85 | schemaFolder=jawsSchemaFolder 86 | # the path to the xpatterns-jaws in target folder 87 | jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/jaws-spark-sql-rest.jar 88 | #jar-path=/home/user/http-spark-sql-server/jaws-spark-sql-rest/target/test-app.jar 89 | # the path to the hdfs namenode 90 | hdfs-namenode-path="hdfs://devbox.local:8020" 91 | # the path to the tachyon namenode 92 | tachyon-namenode-path="tachyon://devbox.local:19998" 93 | } 94 | 95 | ########## hadoop configuration - skip this if you are using cassandra logging ######## 96 | hadoopConf { 97 | namenode="hdfs://devbox.local:8020" 98 | replicationFactor=1 99 | # set on true if you want to start fresh (all the existing folders will be recreated) 100 | forcedMode=false 101 | # folder where to write the logs 102 | loggingFolder=jawsLogs 103 | # folder where to write the jobs states 104 | stateFolder=jawsStates 105 | # folder where to write the jobs details 106 | detailsFolder=jawsDetails 107 | # folder where to write the jobs results 108 | resultsFolder=jawsResultsFolder 109 | # folder where to write the jobs meta information 110 | metaInfoFolder=jawsMetainfoFolder 111 | # folder where to write the name of query information 112 | queryNameFolder=jawsQueryNameFolder 113 | # folder where to write the published queries 114 | queryPublishedFolder=jawsQueryPublishedFolder 115 | # folder where to write the unpublished queries 116 | queryUnpublishedFolder=jawsQueryUnpublishedFolder 117 | # folder where to write the parquet tables information 118 | parquetTablesFolder=parquetTablesFolder 119 | } 120 | 121 | ########## cassandra configuration - skip this if you are using hdfs logging ########## 122 | cassandraConf { 123 | cassandra.host="devbox.local:9160" 124 | cassandra.keyspace=xpatterns_jaws 125 | cassandra.cluster.name=Jaws 126 | } 127 | 128 | 129 | 130 | 131 | test{ 132 | dataFilePath=/src/test/resources 133 | } 134 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | fs.defaultFS 7 | hdfs://devbox.local:8020 8 | 9 | 10 | fs.trash.interval 11 | 1 12 | 13 | 14 | io.compression.codecs 15 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec 16 | 17 | 18 | hadoop.security.authentication 19 | simple 20 | 21 | 22 | hadoop.security.authorization 23 | false 24 | 25 | 26 | hadoop.rpc.protection 27 | authentication 28 | 29 | 30 | hadoop.security.auth_to_local 31 | DEFAULT 32 | 33 | 34 | hadoop.proxyuser.oozie.hosts 35 | * 36 | 37 | 38 | hadoop.proxyuser.oozie.groups 39 | * 40 | 41 | 42 | hadoop.proxyuser.mapred.hosts 43 | * 44 | 45 | 46 | hadoop.proxyuser.mapred.groups 47 | * 48 | 49 | 50 | hadoop.proxyuser.flume.hosts 51 | * 52 | 53 | 54 | hadoop.proxyuser.flume.groups 55 | * 56 | 57 | 58 | hadoop.proxyuser.HTTP.hosts 59 | * 60 | 61 | 62 | hadoop.proxyuser.HTTP.groups 63 | * 64 | 65 | 66 | hadoop.proxyuser.hive.hosts 67 | * 68 | 69 | 70 | hadoop.proxyuser.hive.groups 71 | * 72 | 73 | 74 | hadoop.proxyuser.hue.hosts 75 | * 76 | 77 | 78 | hadoop.proxyuser.hue.groups 79 | * 80 | 81 | 82 | hadoop.proxyuser.httpfs.hosts 83 | * 84 | 85 | 86 | hadoop.proxyuser.httpfs.groups 87 | * 88 | 89 | 90 | hadoop.proxyuser.hdfs.groups 91 | * 92 | 93 | 94 | hadoop.proxyuser.hdfs.hosts 95 | * 96 | 97 | 98 | hadoop.security.group.mapping 99 | org.apache.hadoop.security.ShellBasedUnixGroupsMapping 100 | 101 | 102 | hadoop.security.instrumentation.requires.admin 103 | false 104 | 105 | 106 | io.file.buffer.size 107 | 65536 108 | 109 | 110 | hadoop.ssl.enabled 111 | false 112 | 113 | 114 | hadoop.ssl.require.client.cert 115 | false 116 | true 117 | 118 | 119 | hadoop.ssl.keystores.factory.class 120 | org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory 121 | true 122 | 123 | 124 | hadoop.ssl.server.conf 125 | ssl-server.xml 126 | true 127 | 128 | 129 | hadoop.ssl.client.conf 130 | ssl-client.xml 131 | true 132 | 133 | 134 | fs.tachyon.impl 135 | tachyon.hadoop.TFS 136 | 137 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/resources/data/kv1.txt: -------------------------------------------------------------------------------- 1 | 1,a 2 | 2,b 3 | 3,c 4 | 4,d -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/resources/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | hive.metastore.local 7 | false 8 | 9 | 10 | hive.metastore.uris 11 | thrift://devbox.local:9083 12 | 13 | 14 | hive.metastore.client.socket.timeout 15 | 300 16 | 17 | 18 | hive.metastore.warehouse.dir 19 | /user/hive/warehouse 20 | 21 | 22 | hive.warehouse.subdir.inherit.perms 23 | true 24 | 25 | 26 | mapred.reduce.tasks 27 | -1 28 | 29 | 30 | hive.exec.reducers.bytes.per.reducer 31 | 1073741824 32 | 33 | 34 | hive.exec.reducers.max 35 | 999 36 | 37 | 38 | hive.metastore.execute.setugi 39 | true 40 | 41 | 42 | hive.support.concurrency 43 | false 44 | 45 | 46 | hive.zookeeper.quorum 47 | devbox.local 48 | 49 | 50 | hive.zookeeper.client.port 51 | 2181 52 | 53 | 54 | hbase.zookeeper.quorum 55 | devbox.local 56 | 57 | 58 | hbase.zookeeper.property.clientPort 59 | 2181 60 | 61 | 62 | hive.zookeeper.namespace 63 | hive_zookeeper_namespace_hive 64 | 65 | 66 | hive.server2.enable.doAs 67 | true 68 | 69 | 70 | fs.hdfs.impl.disable.cache 71 | true 72 | 73 | 74 | hive.server2.use.SSL 75 | false 76 | 77 | 78 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/resources/jaws-env.sh: -------------------------------------------------------------------------------- 1 | export TACHYON_WAREHOUSE_PATH=/sharktables 2 | export TACHYON_MASTER=tachyon://devbox.local:19998 3 | export MESOS_NATIVE_LIBRARY=/home/user/mesos-0.19.0/lib/libmesos.so 4 | export JAVA_OPTS="$JAVA_OPTS -Djava.library.path=/home/user/mesos-0.19.0/lib/libmesos.so:/home/user/hadoopNativeLibs" 5 | 6 | -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # 3 | # The following properties set the logging levels and log appender. The 4 | # log4j.rootCategory variable defines the default log level and one or more 5 | # appenders. For the console, use 'S'. For the daily rolling file, use 'R'. 6 | # For an HTML formatted log, use 'H'. 7 | # 8 | # To override the default (rootCategory) log level, define a property of the 9 | # form (see below for available values): 10 | # 11 | # log4j.logger. = 12 | # 13 | # Available logger names: 14 | # TODO 15 | # 16 | # Possible Log Levels: 17 | # FATAL, ERROR, WARN, INFO, DEBUG 18 | # 19 | #------------------------------------------------------------------------------ 20 | 21 | #log4j.category.me.prettyprint.cassandra = INFO, dataConsole, dataFile 22 | #log4j.additivity.me.prettyprint.cassandra = false 23 | #log4j.category.DATA = INFO, dataConsole, dataFile 24 | #log4j.additivity.DATA = false 25 | 26 | log4j.rootCategory = INFO, defaultConsole, defaultFile 27 | 28 | #log4j.category.com.xpatterns.xrelevance.content.data = INFO, dataConsole, dataFile 29 | #log4j.additivity.com.xpatterns.xrelevance.content.data = false 30 | #log4j.category.com.xpatterns.xrelevance.configuration.data = INFO, dataConsole, dataFile 31 | #log4j.additivity.com.xpatterns.xrelevance.configuration.data = false 32 | #log4j.category.com.xpatterns.xrelevance.data = INFO, dataConsole, dataFile 33 | #log4j.additivity.com.xpatterns.xrelevance.data = false 34 | 35 | 36 | #------------------------------------------------------------------------------ 37 | # 38 | # The following properties configure the console (stdout) appender. 39 | # See http://logging.apache.org/log4j/docs/api/index.html for details. 40 | # 41 | #------------------------------------------------------------------------------ 42 | log4j.appender.defaultConsole = org.apache.log4j.ConsoleAppender 43 | log4j.appender.defaultConsole.layout = org.apache.log4j.PatternLayout 44 | log4j.appender.defaultConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 45 | 46 | #------------------------------------------------------------------------------ 47 | # 48 | # The following properties configure the Daily Rolling File appender. 49 | # See http://logging.apache.org/log4j/docs/api/index.html for details. 50 | # 51 | #------------------------------------------------------------------------------ 52 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender 53 | log4j.appender.defaultFile.File = jaws-spark-sql-rest.log 54 | log4j.appender.defaultFile.Append = true 55 | log4j.appender.defaultFile.DatePattern = '.'yyy-MM-dd 56 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout 57 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 58 | 59 | #console for data project 60 | #log4j.appender.dataConsole = org.apache.log4j.ConsoleAppender 61 | #log4j.appender.dataConsole.layout = org.apache.log4j.PatternLayout 62 | #log4j.appender.dataConsole.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n 63 | 64 | #file for data project 65 | #log4j.appender.dataFile = org.apache.log4j.DailyRollingFileAppender 66 | #log4j.appender.dataFile.File = ${catalina.home}/logs/xpatterns-api-data-4.0.log 67 | #log4j.appender.dataFile.Append = true 68 | #log4j.appender.dataFile.DatePattern = '.'yyy-MM-dd 69 | #log4j.appender.dataFile.layout = org.apache.log4j.PatternLayout 70 | #log4j.appender.dataFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %t %c{1} [%p] %m%n -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/resources/sharkSettings.txt: -------------------------------------------------------------------------------- 1 | set mapreduce.job.reduces=128 2 | set hive.column.compress=true -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/scala/api/DeleteQueryTest.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import scala.concurrent._ 4 | import org.scalatest.FunSuite 5 | import org.scalamock.scalatest.MockFactory 6 | import org.scalatest.BeforeAndAfter 7 | import org.scalamock.proxy.ProxyMockFactory 8 | import org.scalatest.WordSpecLike 9 | import org.scalatest.concurrent._ 10 | import server.JawsController 11 | import com.xpatterns.jaws.data.contracts.DAL 12 | import akka.actor.ActorRef 13 | import server.Configuration 14 | import com.xpatterns.jaws.data.impl.CassandraDal 15 | import com.xpatterns.jaws.data.impl.HdfsDal 16 | import scala.concurrent.ExecutionContext.Implicits.global 17 | import akka.actor.ActorSystem 18 | import akka.actor.Props 19 | import org.junit.runner.RunWith 20 | import org.scalatest.junit.JUnitRunner 21 | import akka.util.Timeout 22 | import akka.pattern.ask 23 | import com.xpatterns.jaws.data.DTO.Query 24 | import scala.concurrent.duration._ 25 | import akka.testkit.TestActorRef 26 | import akka.actor.Status.Success 27 | import com.xpatterns.jaws.data.contracts.TJawsLogging 28 | import com.xpatterns.jaws.data.utils.QueryState 29 | import java.util.UUID 30 | import apiactors.DeleteQueryApiActor 31 | import messages.DeleteQueryMessage 32 | import messages.ErrorMessage 33 | 34 | @RunWith(classOf[JUnitRunner]) 35 | class DeleteQueryTest extends FunSuite with BeforeAndAfter with ScalaFutures { 36 | 37 | val hdfsConf = JawsController.getHadoopConf 38 | var dals: DAL = _ 39 | 40 | implicit val timeout = Timeout(10000) 41 | implicit val system = ActorSystem("localSystem") 42 | 43 | before { 44 | Configuration.loggingType.getOrElse("cassandra") match { 45 | case "cassandra" => dals = new CassandraDal(Configuration.cassandraHost.get, Configuration.cassandraClusterName.get, Configuration.cassandraKeyspace.get) 46 | case _ => dals = new HdfsDal(hdfsConf) 47 | } 48 | } 49 | 50 | // **************** TESTS ********************* 51 | 52 | test(" not found ") { 53 | 54 | val tAct = TestActorRef(new DeleteQueryApiActor(dals)) 55 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString() 56 | val f = tAct ? DeleteQueryMessage(queryId) 57 | whenReady(f)(s => assert(s === new ErrorMessage(s"DELETE query failed with the following message: The query ${queryId} was not found. Please provide a valid query id"))) 58 | 59 | } 60 | 61 | 62 | test(" in progress ") { 63 | 64 | val tAct = TestActorRef(new DeleteQueryApiActor(dals)) 65 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString() 66 | dals.loggingDal.setState(queryId, QueryState.IN_PROGRESS) 67 | 68 | val f = tAct ? DeleteQueryMessage(queryId) 69 | whenReady(f)(s => assert(s === new ErrorMessage(s"DELETE query failed with the following message: The query ${queryId} is IN_PROGRESS. Please wait for its completion or cancel it"))) 70 | 71 | } 72 | 73 | test(" ok ") { 74 | 75 | val tAct = TestActorRef(new DeleteQueryApiActor(dals)) 76 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString() 77 | dals.loggingDal.setState(queryId, QueryState.DONE) 78 | 79 | val f = tAct ? DeleteQueryMessage(queryId) 80 | whenReady(f)(s => assert(s === s"Query ${queryId} was deleted")) 81 | 82 | } 83 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/scala/api/GetQueryInfoTest.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import scala.concurrent._ 4 | import org.scalatest.FunSuite 5 | import org.scalamock.scalatest.MockFactory 6 | import org.scalatest.BeforeAndAfter 7 | import org.scalamock.proxy.ProxyMockFactory 8 | import org.scalatest.WordSpecLike 9 | import org.scalatest.concurrent._ 10 | import server.JawsController 11 | import com.xpatterns.jaws.data.contracts.DAL 12 | import akka.actor.ActorRef 13 | import server.Configuration 14 | import com.xpatterns.jaws.data.impl.CassandraDal 15 | import com.xpatterns.jaws.data.impl.HdfsDal 16 | import scala.concurrent.ExecutionContext.Implicits.global 17 | import akka.actor.ActorSystem 18 | import akka.actor.Props 19 | import org.junit.runner.RunWith 20 | import org.scalatest.junit.JUnitRunner 21 | import akka.util.Timeout 22 | import akka.pattern.ask 23 | import com.xpatterns.jaws.data.DTO.Query 24 | import scala.concurrent.duration._ 25 | import akka.testkit.TestActorRef 26 | import akka.actor.Status.Success 27 | import com.xpatterns.jaws.data.contracts.TJawsLogging 28 | import com.xpatterns.jaws.data.utils.QueryState 29 | import java.util.UUID 30 | import com.xpatterns.jaws.data.DTO.QueryMetaInfo 31 | import apiactors.GetQueriesApiActor 32 | import messages.GetQueriesMessage 33 | import com.xpatterns.jaws.data.DTO.Queries 34 | 35 | @RunWith(classOf[JUnitRunner]) 36 | class GetQueryInfoTest extends FunSuite with BeforeAndAfter with ScalaFutures { 37 | 38 | val hdfsConf = JawsController.getHadoopConf 39 | var dals: DAL = _ 40 | 41 | implicit val timeout = Timeout(10000) 42 | implicit val system = ActorSystem("localSystem") 43 | 44 | before { 45 | Configuration.loggingType.getOrElse("cassandra") match { 46 | case "cassandra" => dals = new CassandraDal(Configuration.cassandraHost.get, Configuration.cassandraClusterName.get, Configuration.cassandraKeyspace.get) 47 | case _ => dals = new HdfsDal(hdfsConf) 48 | } 49 | } 50 | 51 | // **************** TESTS ********************* 52 | 53 | test(" not found ") { 54 | 55 | val tAct = TestActorRef(new GetQueriesApiActor(dals)) 56 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString() 57 | val f = tAct ? GetQueriesMessage(Seq(queryId)) 58 | whenReady(f)(s => s match { 59 | case queries: Queries => { 60 | assert(queries.queries.size === 1) 61 | assert(queries.queries(0) === new Query("NOT_FOUND", queryId, "", new QueryMetaInfo)) 62 | } 63 | case _ => fail 64 | }) 65 | } 66 | 67 | test(" found ") { 68 | 69 | val tAct = TestActorRef(new GetQueriesApiActor(dals)) 70 | val queryId = System.currentTimeMillis() + UUID.randomUUID().toString() 71 | val executionTime = 100L 72 | val currentTimestamp = System.currentTimeMillis() 73 | val metaInfo = new QueryMetaInfo(100, 150, 1, true) 74 | dals.loggingDal.setState(queryId, QueryState.IN_PROGRESS) 75 | dals.loggingDal.setScriptDetails(queryId, "test script") 76 | dals.loggingDal.setExecutionTime(queryId, executionTime) 77 | dals.loggingDal.setTimestamp(queryId, currentTimestamp) 78 | dals.loggingDal.setRunMetaInfo(queryId, metaInfo) 79 | metaInfo.timestamp = currentTimestamp 80 | metaInfo.executionTime = executionTime 81 | 82 | val f = tAct ? GetQueriesMessage(Seq(queryId)) 83 | whenReady(f)(s => s match { 84 | case queries: Queries => 85 | assert(queries.queries.length === 1) 86 | assert(queries.queries(0) === new Query("IN_PROGRESS", queryId, "test script", metaInfo)) 87 | case _ => fail() 88 | }) 89 | 90 | dals.loggingDal.deleteQuery(queryId) 91 | 92 | } 93 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/scala/implementation/HiveUtilsTest.scala: -------------------------------------------------------------------------------- 1 | package implementation 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.junit.JUnitRunner 5 | import org.scalatest.FunSuite 6 | import org.apache.spark.scheduler.HiveUtils 7 | import scala.util.Try 8 | 9 | @RunWith(classOf[JUnitRunner]) 10 | class HiveUtilsTest extends FunSuite { 11 | 12 | test("split path: ok hdfs") { 13 | val (namenode, path) = HiveUtils.splitPath("hdfs://devbox:8020/user/ubuntu/testParquet.parquet") 14 | assert(namenode === "hdfs://devbox:8020") 15 | assert(path === "/user/ubuntu/testParquet.parquet") 16 | 17 | } 18 | 19 | test("split path: ok tachyon") { 20 | val (namenode, path) = HiveUtils.splitPath("tachyon://devbox:19998/user/ubuntu/testParquet.parquet") 21 | assert(namenode === "tachyon://devbox:19998") 22 | assert(path === "/user/ubuntu/testParquet.parquet") 23 | 24 | } 25 | 26 | test("split path: empty") { 27 | val trySplit = Try(HiveUtils.splitPath("")) 28 | 29 | assert(trySplit.isFailure === true) 30 | assert("Invalid file path format : " === trySplit.failed.get.getMessage()) 31 | } 32 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/scala/utils/TestSuite.scala: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.Suites 5 | import org.scalatest.junit.JUnitRunner 6 | import api.{QueryPropertiesTest, DeleteQueryTest, GetQueryInfoTest} 7 | import implementation.HiveUtilsTest 8 | 9 | 10 | @RunWith(classOf[JUnitRunner]) 11 | class TestSuite extends Suites(new DeleteQueryTest, new QueryPropertiesTest, new GetQueryInfoTest, new HiveUtilsTest) { 12 | } -------------------------------------------------------------------------------- /jaws-spark-sql-rest/src/test/scala/utils/TestUtils.scala: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import org.apache.log4j.Logger 4 | import server.JawsController 5 | import java.text.SimpleDateFormat 6 | import java.util.Date 7 | 8 | object TestUtils { 9 | import com.typesafe.config.ConfigFactory 10 | 11 | 12 | val timestamp = new SimpleDateFormat("yyyyMMdd-HHmmss") 13 | 14 | def getWarehousePath(prefix: String): String = { 15 | System.getProperty("user.dir") + "/test_warehouses/" + prefix + "-warehouse-" + 16 | timestamp.format(new Date) 17 | } 18 | 19 | def getMetastorePath(prefix: String): String = { 20 | System.getProperty("user.dir") + "/test_warehouses/" + prefix + "-metastore-" + 21 | timestamp.format(new Date) 22 | } 23 | 24 | val log4j = Logger.getLogger(TestUtils.getClass()) 25 | 26 | private val conf = ConfigFactory.load 27 | conf.checkValid(ConfigFactory.defaultReference) 28 | 29 | val test = conf.getConfig("test").withFallback(conf) 30 | val dataFilePath = System.getProperty("user.dir") + Option(test.getString("dataFilePath")).getOrElse("") + "/data" 31 | } -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | jaws-spark-sql 7 | 1.1.0-spark1.3.1 8 | pom 9 | 10 | jaws-spark-sql 11 | http://maven.apache.org 12 | 13 | 14 | jaws-spark-sql-rest 15 | jaws-spark-sql-data 16 | jaws-hive-sql-rest 17 | 18 | 19 | --------------------------------------------------------------------------------