├── .github
    └── PULL_REQUEST_TEMPLATE
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── build
    └── mvn
├── examples
    └── zeepline_notebook
    │   ├── SAC_ Spark ML.json
    │   ├── SAC_ Spark SQL.json
    │   ├── SAC_ Spark Streaming and ML.json
    │   ├── Spark ETL_Lineage.png
    │   ├── Spark ML_Lineage.png
    │   └── Spark_ML_Streaming_Lineage.png
├── patch
    ├── 1100-spark_model.json
    ├── Spark_ML_Listener_2.3.diff
    ├── Spark_ML_Listener_2.3.patch
    ├── Spark_ML_Listener_2.4.diff
    └── Spark_ML_Listener_2.4.patch
├── pom.xml
├── scalastyle-config.xml
├── spark-atlas-connector-assembly
    └── pom.xml
└── spark-atlas-connector
    ├── pom.xml
    └── src
        ├── main
            └── scala
            │   ├── com
            │       └── hortonworks
            │       │   └── spark
            │       │       └── atlas
            │       │           ├── AbstractEventProcessor.scala
            │       │           ├── AtlasClient.scala
            │       │           ├── AtlasClientConf.scala
            │       │           ├── AtlasEntityCreationRequestHelper.scala
            │       │           ├── AtlasEntityReadHelper.scala
            │       │           ├── AtlasUtils.scala
            │       │           ├── KafkaAtlasClient.scala
            │       │           ├── RestAtlasClient.scala
            │       │           ├── SACAtlasEntity.scala
            │       │           ├── SparkAtlasEventTracker.scala
            │       │           ├── SparkAtlasStreamingQueryEventTracker.scala
            │       │           ├── ml
            │       │               └── MLPipelineEventProcessor.scala
            │       │           ├── sql
            │       │               ├── CommandsHarvester.scala
            │       │               ├── Harvester.scala
            │       │               ├── KafkaTopicInformation.scala
            │       │               ├── SparkCatalogEventProcessor.scala
            │       │               ├── SparkExecutionPlanProcessor.scala
            │       │               └── SparkExtension.scala
            │       │           ├── types
            │       │               ├── AtlasEntityUtils.scala
            │       │               ├── external.scala
            │       │               ├── internal.scala
            │       │               └── metadata.scala
            │       │           └── utils
            │       │               ├── CatalogUtils.scala
            │       │               ├── JdbcUtils.scala
            │       │               ├── Logging.scala
            │       │               ├── ReflectionHelper.scala
            │       │               └── SparkUtils.scala
            │   └── org
            │       └── apache
            │           └── spark
            │               └── sql
            │                   └── kafka010
            │                       └── atlas
            │                           └── ExtractFromDataSource.scala
        └── test
            ├── resources
                ├── atlas-application.properties
                ├── log4j.properties
                └── users.parquet
            └── scala
                └── com
                    ├── hortonworks
                        └── spark
                        │   └── atlas
                        │       ├── AtlasEntityCreationRequestHelperSuite.scala
                        │       ├── BaseResourceIT.scala
                        │       ├── KafkaClientIT.scala
                        │       ├── TestUtils.scala
                        │       ├── WithHDFSSupport.scala
                        │       ├── WithHiveSupport.scala
                        │       ├── WithRemoteHiveMetastoreServiceSupport.scala
                        │       ├── ml
                        │           ├── MLPipelineTrackerIT.scala
                        │           └── MLPipelineWithSaveIntoSuite.scala
                        │       ├── sql
                        │           ├── CatalogEventToAtlasIT.scala
                        │           ├── CreateDataSourceTableAsSelectHarvesterSuite.scala
                        │           ├── CreateHiveTableAsSelectHarvesterSuite.scala
                        │           ├── CreateViewHarvesterSuite.scala
                        │           ├── InsertIntoHarvesterSuite.scala
                        │           ├── InsertIntoHiveDirHarvesterSuite.scala
                        │           ├── LoadDataHarvesterSuite.scala
                        │           ├── SparkCatalogEventProcessorSuite.scala
                        │           ├── SparkExecutionPlanProcessForRdbmsQuerySuite.scala
                        │           ├── SparkExecutionPlanProcessorForBatchQuerySuite.scala
                        │           ├── SparkExecutionPlanProcessorForComplicatedQuerySuite.scala
                        │           ├── SparkExecutionPlanProcessorForStreamingQuerySuite.scala
                        │           ├── SparkExecutionPlanProcessorForViewSuite.scala
                        │           ├── SparkExecutionPlanProcessorWithRemoteHiveMetastoreServiceSuite.scala
                        │           └── testhelper
                        │           │   ├── AtlasQueryExecutionListener.scala
                        │           │   ├── AtlasStreamingQueryProgressListener.scala
                        │           │   ├── BaseHarvesterSuite.scala
                        │           │   ├── CreateEntitiesTrackingAtlasClient.scala
                        │           │   ├── DirectProcessSparkExecutionPlanProcessor.scala
                        │           │   ├── FsEntityValidator.scala
                        │           │   ├── KafkaTopicEntityValidator.scala
                        │           │   ├── ProcessEntityValidator.scala
                        │           │   └── TableEntityValidator.scala
                        │       ├── types
                        │           ├── AtlasExternalEntityUtilsSuite.scala
                        │           ├── MLAtlasEntityUtilsSuite.scala
                        │           └── SparkAtlasEntityUtilsSuite.scala
                        │       └── utils
                        │           ├── JdbcUtilsTest.scala
                        │           └── SparkUtilsSuite.scala
                    └── hotels
                        └── beeju
                            └── ThriftHiveMetaStoreTestUtil.scala


/.github/PULL_REQUEST_TEMPLATE:
--------------------------------------------------------------------------------
1 | ## What changes were proposed in this pull request?
2 | 
3 | (Please fill in changes proposed in this fix)
4 | 
5 | ## How was this patch tested?
6 | 
7 | (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | *.log
3 | .idea
4 | *.iml
5 | target
6 | metastore_db
7 | tmp
8 | dependency-reduced-pom.xml
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements. See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License. You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | sudo: false
17 | dist: trusty
18 | 
19 | language: java
20 | jdk:
21 |   - oraclejdk8
22 | 
23 | cache:
24 |   directories:
25 |   - $HOME/.m2
26 | 
27 | notifications:
28 |   email: false
29 | 
30 | install:
31 |   - mvn -q clean checkstyle:check scalastyle:check package -DskipTests
32 | 


--------------------------------------------------------------------------------
/build/mvn:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | #
  4 | # Licensed to the Apache Software Foundation (ASF) under one or more
  5 | # contributor license agreements.  See the NOTICE file distributed with
  6 | # this work for additional information regarding copyright ownership.
  7 | # The ASF licenses this file to You under the Apache License, Version 2.0
  8 | # (the "License"); you may not use this file except in compliance with
  9 | # the License.  You may obtain a copy of the License at
 10 | #
 11 | #    http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | # Determine the current working directory
 21 | _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 22 | # Preserve the calling directory
 23 | _CALLING_DIR="$(pwd)"
 24 | # Options used during compilation
 25 | _COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
 26 | 
 27 | # Installs any application tarball given a URL, the expected tarball name,
 28 | # and, optionally, a checkable binary path to determine if the binary has
 29 | # already been installed
 30 | ## Arg1 - URL
 31 | ## Arg2 - Tarball Name
 32 | ## Arg3 - Checkable Binary
 33 | install_app() {
 34 |   local remote_tarball="$1/$2"
 35 |   local local_tarball="${_DIR}/$2"
 36 |   local binary="${_DIR}/$3"
 37 | 
 38 |   # setup `curl` and `wget` silent options if we're running on Jenkins
 39 |   local curl_opts="-L"
 40 |   local wget_opts=""
 41 |   if [ -n "$AMPLAB_JENKINS" ]; then
 42 |     curl_opts="-s ${curl_opts}"
 43 |     wget_opts="--quiet ${wget_opts}"
 44 |   else
 45 |     curl_opts="--progress-bar ${curl_opts}"
 46 |     wget_opts="--progress=bar:force ${wget_opts}"
 47 |   fi
 48 | 
 49 |   if [ -z "$3" -o ! -f "$binary" ]; then
 50 |     # check if we already have the tarball
 51 |     # check if we have curl installed
 52 |     # download application
 53 |     [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \
 54 |       echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \
 55 |       curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
 56 |     # if the file still doesn't exist, lets try `wget` and cross our fingers
 57 |     [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \
 58 |       echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \
 59 |       wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
 60 |     # if both were unsuccessful, exit
 61 |     [ ! -f "${local_tarball}" ] && \
 62 |       echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
 63 |       echo "please install manually and try again." && \
 64 |       exit 2
 65 |     cd "${_DIR}" && tar -xzf "$2"
 66 |     rm -rf "$local_tarball"
 67 |   fi
 68 | }
 69 | 
 70 | # Determine the Maven version from the root pom.xml file and
 71 | # install maven under the build/ folder if needed.
 72 | install_mvn() {
 73 |   local MVN_VERSION=`grep "<maven.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
 74 |   MVN_BIN="$(command -v mvn)"
 75 |   if [ "$MVN_BIN" ]; then
 76 |     local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
 77 |   fi
 78 |   # See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers
 79 |   function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; }
 80 |   if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
 81 |     local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='}
 82 | 
 83 |     install_app \
 84 |       "${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \
 85 |       "apache-maven-${MVN_VERSION}-bin.tar.gz" \
 86 |       "apache-maven-${MVN_VERSION}/bin/mvn"
 87 | 
 88 |     MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn"
 89 |   fi
 90 | }
 91 | 
 92 | # Install the proper version of Scala, Zinc and Maven for the build
 93 | install_mvn
 94 | 
 95 | # Reset the current working directory
 96 | cd "${_CALLING_DIR}"
 97 | 
 98 | # Set any `mvn` options if not already present
 99 | export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"}
100 | 
101 | echo "Using \`mvn\` from path: $MVN_BIN" 1>&2
102 | "${MVN_BIN}" "$@"
103 | 


--------------------------------------------------------------------------------
/examples/zeepline_notebook/SAC_ Spark ML.json:
--------------------------------------------------------------------------------
1 | ﻿{"paragraphs":[{"text":"%conf\nspark.app.name Spark-ML\nspark.jars /tmp/atlas/spark-atlas-connector-assembly_2.11-0.1.0-SNAPSHOT.jar\nspark.jars.packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0\nspark.extraListeners com.hortonworks.spark.atlas.SparkAtlasEventTracker\nspark.sql.queryExecutionListeners com.hortonworks.spark.atlas.SparkAtlasEventTracker\nspark.sql.streaming.streamingQueryListeners com.hortonworks.spark.atlas.SparkAtlasStreamingQueryEventTracker\n","user":"admin","dateUpdated":"2018-06-21T22:26:50+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/text"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1529020561038_-89670889","id":"20180614-235601_1829676857","dateCreated":"2018-06-14T23:56:01+0000","dateStarted":"2018-06-21T22:26:50+0000","dateFinished":"2018-06-21T22:26:50+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:754"},{"title":" Step2: Build a Spark ML pipeline and store the trained model into HDFS","text":"%spark2\nimport org.apache.spark.ml.{Pipeline, PipelineModel}\nimport org.apache.spark.ml.feature.{StopWordsRemover, Tokenizer}\nimport org.apache.spark.sql.SparkSession\nimport org.apache.spark.sql.SparkSession\nimport spark.implicits._\n\nval training = spark.sql(\"select * from training_table\")\n\ntraining.show()\n\n// Configure an ML pipeline, which consists of three stages: tokenizer, remover.\nval tokenizer = new Tokenizer().setInputCol(\"text\").setOutputCol(\"words\")\n\nval remover = new StopWordsRemover().setInputCol(\"words\").setOutputCol(\"filtered\")\n\nval pipeline = new Pipeline().setStages(Array(tokenizer, remover))\n\nval model = pipeline.fit(training)\n\nval pipelineDir = \"/tmp/pipeline_streaming_dir\"\n\nval modelDir = \"/tmp/model_streaming_dir\"\n\npipeline.write.overwrite().save(pipelineDir)\n\nmodel.write.overwrite().save(modelDir)\n","user":"admin","dateUpdated":"2018-06-21T22:28:02+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","title":true,"results":{},"enabled":true,"fontSize":9},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"import org.apache.spark.ml.{Pipeline, PipelineModel}\nimport org.apache.spark.ml.feature.{StopWordsRemover, Tokenizer}\nimport org.apache.spark.sql.SparkSession\nimport org.apache.spark.sql.SparkSession\nimport spark.implicits._\ntraining: org.apache.spark.sql.DataFrame = [text: string]\n+--------------------+\n|                text|\n+--------------------+\n|\"Hortonworks is a...|\n|Temporary views i...|\n|\"Datasets are sim...|\n+--------------------+\n\ntokenizer: org.apache.spark.ml.feature.Tokenizer = tok_01d4ad905c67\nremover: org.apache.spark.ml.feature.StopWordsRemover = stopWords_c83b472c2ee9\npipeline: org.apache.spark.ml.Pipeline = pipeline_b4b3bb3719d6\nmodel: org.apache.spark.ml.PipelineModel = pipeline_b4b3bb3719d6\npipelineDir: String = /tmp/pipeline_streaming_dir\nmodelDir: String = /tmp/model_streaming_dir\n"}]},"runtimeInfos":{"jobUrl":{"propertyName":"jobUrl","label":"SPARK JOB","tooltip":"View in Spark web UI","group":"spark","values":["http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=0","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=1","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=2","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=3","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=4","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=5","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=6","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=7"],"interpreterSettingId":"spark2"}},"apps":[],"jobName":"paragraph_1529009689077_1168589782","id":"20180503-200931_1296064876","dateCreated":"2018-06-14T20:54:49+0000","dateStarted":"2018-06-21T22:28:02+0000","dateFinished":"2018-06-21T22:28:53+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:755"},{"text":"%spark2\n","user":"admin","dateUpdated":"2018-06-14T20:54:49+0000","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"fontSize":9},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1529009689079_-1579730663","id":"20180506-231703_1473196617","dateCreated":"2018-06-14T20:54:49+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:756"}],"name":"SAC: Spark ML","id":"2DF4E4NE9","noteParams":{},"noteForms":{},"angularObjects":{"spark2:shared_process":[]},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}}


--------------------------------------------------------------------------------
/examples/zeepline_notebook/SAC_ Spark SQL.json:
--------------------------------------------------------------------------------
1 | ﻿{"paragraphs":[{"text":"%conf\nspark.app.name Spark-ETL\nspark.jars /tmp/atlas/spark-atlas-connector-assembly_2.11-0.1.0-SNAPSHOT.jar\nspark.jars.packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0\nspark.extraListeners com.hortonworks.spark.atlas.SparkAtlasEventTracker\nspark.sql.queryExecutionListeners com.hortonworks.spark.atlas.SparkAtlasEventTracker\nspark.sql.streaming.streamingQueryListeners com.hortonworks.spark.atlas.SparkAtlasStreamingQueryEventTracker","user":"admin","dateUpdated":"2018-06-21T22:18:16+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/text"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1529009709294_238885199","id":"20180614-205509_1930236875","dateCreated":"2018-06-14T20:55:09+0000","dateStarted":"2018-06-21T22:18:16+0000","dateFinished":"2018-06-21T22:18:16+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:2470"},{"title":"Step1: Build a Hive Table","text":"%spark2\nimport org.apache.spark.sql.SparkSession\nimport spark.implicits._\nimport org.apache.spark.sql.{SaveMode, SparkSession}\n\nspark.sql(\"CREATE TABLE IF NOT EXISTS training_table (text STRING)  USING hive\")\n\nval trainData = Seq(\n  (\"Hortonworks is a big data software company based in Santa Clara, California.\"),\n  (\"Temporary views in Spark SQL are session-scoped and will disappear if the session that creates it terminates.\"),\n  (\"Datasets are similar to RDDs, however, instead of using Java serialization or Kryo they use a specialized Encoder.\")\n).toDF(\"text\")\n\ntrainData.write.mode(SaveMode.Overwrite).format(\"csv\").save(\"/tmp/training_table.csv\")\n\nspark.sql(\"LOAD DATA INPATH '/tmp/training_table.csv' INTO TABLE training_table\")\nspark.sql(\"select * from training_table\").show()\n","user":"admin","dateUpdated":"2018-06-21T22:18:24+0000","config":{"tableHide":false,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","editorHide":false,"title":true,"results":{},"enabled":true,"fontSize":9},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"import org.apache.spark.sql.SparkSession\nimport spark.implicits._\nimport org.apache.spark.sql.{SaveMode, SparkSession}\nres4: org.apache.spark.sql.DataFrame = []\ntrainData: org.apache.spark.sql.DataFrame = [text: string]\nres6: org.apache.spark.sql.DataFrame = []\n+--------------------+\n|                text|\n+--------------------+\n|\"Hortonworks is a...|\n|Temporary views i...|\n|\"Datasets are sim...|\n+--------------------+\n\n"}]},"runtimeInfos":{"jobUrl":{"propertyName":"jobUrl","label":"SPARK JOB","tooltip":"View in Spark web UI","group":"spark","values":["http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=0","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=1","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=2"],"interpreterSettingId":"spark2"}},"apps":[],"jobName":"paragraph_1529009617489_-1522856994","id":"20180503-195630_1514310671","dateCreated":"2018-06-14T20:53:37+0000","dateStarted":"2018-06-21T22:18:24+0000","dateFinished":"2018-06-21T22:19:18+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:2471"},{"text":"","user":"admin","dateUpdated":"2018-06-18T21:57:33+0000","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"fontSize":9},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1529009617505_1441244159","id":"20180503-200854_877477857","dateCreated":"2018-06-14T20:53:37+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:2472"}],"name":"SAC: Spark SQL","id":"2DGPRFBM9","noteParams":{},"noteForms":{},"angularObjects":{"spark2:shared_process":[]},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}}


--------------------------------------------------------------------------------
/examples/zeepline_notebook/SAC_ Spark Streaming and ML.json:
--------------------------------------------------------------------------------
1 | ﻿{"paragraphs":[{"text":"%conf\nspark.app.name Spark-Streaming\nspark.jars /tmp/atlas/spark-atlas-connector-assembly_2.11-0.1.0-SNAPSHOT.jar\nspark.jars.packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0\nspark.extraListeners com.hortonworks.spark.atlas.SparkAtlasEventTracker\nspark.sql.queryExecutionListeners com.hortonworks.spark.atlas.SparkAtlasEventTracker\nspark.sql.streaming.streamingQueryListeners com.hortonworks.spark.atlas.SparkAtlasStreamingQueryEventTracker","user":"admin","dateUpdated":"2018-06-21T22:40:38+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/text"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1529020694752_1363032102","id":"20180614-235814_690445349","dateCreated":"2018-06-14T23:58:14+0000","dateStarted":"2018-06-21T22:40:38+0000","dateFinished":"2018-06-21T22:40:38+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:3152"},{"title":"Step3: Build a Spark streaming processing pipeline with trained model for Kafka Streaming","text":"%spark2\n\nimport org.apache.spark.ml.{Pipeline, PipelineModel}\nimport org.apache.spark.sql.streaming.{OutputMode, Trigger}\n\nval kafkaServer = \"172.27.22.200:6667\"\n\nval sameModel = PipelineModel.load(\"/tmp/model_streaming_dir\")\n \nval df = spark.readStream.format(\"kafka\").option(\"kafka.bootstrap.servers\", kafkaServer).option(\"subscribe\", \"kafka_input\").load()\n\nval df2 =  df.selectExpr(\"CAST(key AS STRING)\", \"CAST(value AS STRING)\").as[(String, String)].toDF(\"id\", \"text\")\n\n//sink streaming data to other kafaka \nval output = sameModel.transform(df2).toDF(\"key\", \"value\", \"words\", \"filtered\").selectExpr(\"CAST(key AS STRING)\", \"CAST(value AS STRING)\")\n\noutput.writeStream.format(\"kafka\").option(\"kafka.bootstrap.servers\", kafkaServer).option(\"checkpointLocation\", \"/tmp/demo/chckpnt\").option(\"topic\", \"kafka_output\").start()\n","user":"admin","dateUpdated":"2018-06-21T22:40:41+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","title":true,"results":{},"enabled":true,"fontSize":9},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"import org.apache.spark.ml.{Pipeline, PipelineModel}\nimport org.apache.spark.sql.streaming.{OutputMode, Trigger}\nkafkaServer: String = 172.27.22.200:6667\nsameModel: org.apache.spark.ml.PipelineModel = pipeline_b4b3bb3719d6\ndf: org.apache.spark.sql.DataFrame = [key: binary, value: binary ... 5 more fields]\ndf2: org.apache.spark.sql.DataFrame = [id: string, text: string]\noutput: org.apache.spark.sql.DataFrame = [key: string, value: string]\nres5: org.apache.spark.sql.streaming.StreamingQuery = org.apache.spark.sql.execution.streaming.StreamingQueryWrapper@1d5473b0\n"}]},"runtimeInfos":{"jobUrl":{"propertyName":"jobUrl","label":"SPARK JOB","tooltip":"View in Spark web UI","group":"spark","values":["http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=0","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=1","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=2","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=3","http://ctr-e138-1518143905142-364859-01-000004.hwx.site:4041/jobs/job?id=4"],"interpreterSettingId":"spark2"}},"apps":[],"jobName":"paragraph_1529009681224_1846251547","id":"20180503-201028_932250986","dateCreated":"2018-06-14T20:54:41+0000","dateStarted":"2018-06-21T22:40:41+0000","dateFinished":"2018-06-21T22:41:24+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:3153"},{"text":"%spark2\n","user":"admin","dateUpdated":"2018-06-14T20:54:41+0000","config":{"colWidth":12,"editorMode":"ace/mode/scala","results":{},"enabled":true,"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"fontSize":9},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1529009681226_1853252567","id":"20180506-232424_172987971","dateCreated":"2018-06-14T20:54:41+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:3154"}],"name":"SAC: Spark Streaming and ML","id":"2DJSKKUA5","noteParams":{},"noteForms":{},"angularObjects":{"spark2:shared_process":[]},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}}


--------------------------------------------------------------------------------
/examples/zeepline_notebook/Spark ETL_Lineage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hortonworks-spark/spark-atlas-connector/0b10e337cdfd427744a92f8505d46297afb4c295/examples/zeepline_notebook/Spark ETL_Lineage.png


--------------------------------------------------------------------------------
/examples/zeepline_notebook/Spark ML_Lineage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hortonworks-spark/spark-atlas-connector/0b10e337cdfd427744a92f8505d46297afb4c295/examples/zeepline_notebook/Spark ML_Lineage.png


--------------------------------------------------------------------------------
/examples/zeepline_notebook/Spark_ML_Streaming_Lineage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hortonworks-spark/spark-atlas-connector/0b10e337cdfd427744a92f8505d46297afb4c295/examples/zeepline_notebook/Spark_ML_Streaming_Lineage.png


--------------------------------------------------------------------------------
/spark-atlas-connector-assembly/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 |   ~ Licensed to the Apache Software Foundation (ASF) under one or more
  4 |   ~ contributor license agreements.  See the NOTICE file distributed with
  5 |   ~ this work for additional information regarding copyright ownership.
  6 |   ~ The ASF licenses this file to You under the Apache License, Version 2.0
  7 |   ~ (the "License"); you may not use this file except in compliance with
  8 |   ~ the License.  You may obtain a copy of the License at
  9 |   ~
 10 |   ~    http://www.apache.org/licenses/LICENSE-2.0
 11 |   ~
 12 |   ~ Unless required by applicable law or agreed to in writing, software
 13 |   ~ distributed under the License is distributed on an "AS IS" BASIS,
 14 |   ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |   ~ See the License for the specific language governing permissions and
 16 |   ~ limitations under the License.
 17 |   -->
 18 | 
 19 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 20 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 21 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 22 |   <modelVersion>4.0.0</modelVersion>
 23 |   <parent>
 24 |     <groupId>com.hortonworks.spark</groupId>
 25 |     <artifactId>spark-atlas-connector-main_2.11</artifactId>
 26 |     <version>0.1.0-SNAPSHOT</version>
 27 |     <relativePath>../pom.xml</relativePath>
 28 |   </parent>
 29 | 
 30 |   <artifactId>spark-atlas-connector-assembly</artifactId>
 31 |   <packaging>jar</packaging>
 32 | 
 33 |   <dependencies>
 34 |     <dependency>
 35 |       <groupId>com.hortonworks.spark</groupId>
 36 |       <artifactId>spark-atlas-connector_${scala.binary.version}</artifactId>
 37 |       <version>${project.version}</version>
 38 |     </dependency>
 39 |   </dependencies>
 40 | 
 41 |   <build>
 42 |     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
 43 |     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
 44 |     <plugins>
 45 |       <plugin>
 46 |         <groupId>org.apache.maven.plugins</groupId>
 47 |         <artifactId>maven-shade-plugin</artifactId>
 48 |         <configuration>
 49 |           <shadedArtifactAttached>false</shadedArtifactAttached>
 50 |           <artifactSet>
 51 |             <includes>
 52 |               <include>*:*</include>
 53 |             </includes>
 54 |           </artifactSet>
 55 |           <filters>
 56 |             <filter>
 57 |               <artifact>*:*</artifact>
 58 |               <excludes>
 59 |                 <exclude>META-INF/*.SF</exclude>
 60 |                 <exclude>META-INF/*.DSA</exclude>
 61 |                 <exclude>META-INF/*.RSA</exclude>
 62 |               </excludes>
 63 |             </filter>
 64 |           </filters>
 65 |         </configuration>
 66 |         <executions>
 67 |           <execution>
 68 |             <phase>package</phase>
 69 |             <goals>
 70 |               <goal>shade</goal>
 71 |             </goals>
 72 |             <configuration>
 73 |               <transformers>
 74 |                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
 75 |                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
 76 |                   <resource>reference.conf</resource>
 77 |                 </transformer>
 78 |                 <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
 79 |                   <resource>log4j.properties</resource>
 80 |                 </transformer>
 81 |                 <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
 82 |                 <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
 83 |               </transformers>
 84 |               <relocations>
 85 |                 <relocation>
 86 |                   <pattern>org.apache.hadoop.hbase</pattern>
 87 |                   <shadedPattern>com.hortonworks.spark.atlas.shade.org.apache.hbase</shadedPattern>
 88 |                 </relocation>
 89 |                 <relocation>
 90 |                   <pattern>org.apache.htrace</pattern>
 91 |                   <shadedPattern>com.hortonworks.spark.atlas.shade.org.apache.htrace</shadedPattern>
 92 |                 </relocation>
 93 |                 <relocation>
 94 |                   <pattern>org.apache.commons.configuration</pattern>
 95 |                   <shadedPattern>com.hortonworks.spark.atlas.shade.org.apache.commons.configuration</shadedPattern>
 96 |                 </relocation>
 97 |                 <relocation>
 98 |                   <pattern>com.sun.jersey</pattern>
 99 |                   <shadedPattern>com.hortonworks.spark.atlas.shade.com.sun.jersey</shadedPattern>
100 |                 </relocation>
101 |                 <relocation>
102 |                   <pattern>org.codehaus.jackson</pattern>
103 |                   <shadedPattern>com.hortonworks.spark.atlas.shade.org.codehaus.jackson</shadedPattern>
104 |                 </relocation>
105 |                 <relocation>
106 |                   <pattern>javax.ws.rs</pattern>
107 |                   <shadedPattern>com.hortonworks.spark.atlas.javax.ws.rs</shadedPattern>
108 |                 </relocation>
109 |                 <relocation>
110 |                   <pattern>com.fasterxml.jackson</pattern>
111 |                   <shadedPattern>com.hortonworks.spark.atlas.com.fasterxml.jackson</shadedPattern>
112 |                 </relocation>
113 | 
114 |               </relocations>
115 |             </configuration>
116 |           </execution>
117 |         </executions>
118 |       </plugin>
119 |     </plugins>
120 |   </build>
121 | 
122 | </project>
123 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |   ~ Licensed to the Apache Software Foundation (ASF) under one or more
 4 |   ~ contributor license agreements.  See the NOTICE file distributed with
 5 |   ~ this work for additional information regarding copyright ownership.
 6 |   ~ The ASF licenses this file to You under the Apache License, Version 2.0
 7 |   ~ (the "License"); you may not use this file except in compliance with
 8 |   ~ the License.  You may obtain a copy of the License at
 9 |   ~
10 |   ~    http://www.apache.org/licenses/LICENSE-2.0
11 |   ~
12 |   ~ Unless required by applicable law or agreed to in writing, software
13 |   ~ distributed under the License is distributed on an "AS IS" BASIS,
14 |   ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |   ~ See the License for the specific language governing permissions and
16 |   ~ limitations under the License.
17 |   -->
18 | <project xmlns="http://maven.apache.org/POM/4.0.0"
19 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21 |   <modelVersion>4.0.0</modelVersion>
22 | 
23 |   <parent>
24 |     <groupId>com.hortonworks.spark</groupId>
25 |     <artifactId>spark-atlas-connector-main_2.11</artifactId>
26 |     <version>0.1.0-SNAPSHOT</version>
27 |     <relativePath>../pom.xml</relativePath>
28 |   </parent>
29 | 
30 |   <artifactId>spark-atlas-connector_2.11</artifactId>
31 |   <packaging>jar</packaging>
32 | </project>
33 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/AbstractEventProcessor.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
21 | 
22 | import scala.reflect.ClassTag
23 | import scala.util.control.NonFatal
24 | import com.google.common.annotations.VisibleForTesting
25 | import com.hortonworks.spark.atlas.utils.Logging
26 | 
27 | abstract class AbstractEventProcessor[T: ClassTag] extends Logging {
28 |   def conf: AtlasClientConf
29 | 
30 |   private val capacity = conf.get(AtlasClientConf.BLOCKING_QUEUE_CAPACITY).toInt
31 | 
32 |   private[atlas] val eventQueue = new LinkedBlockingQueue[T](capacity)
33 | 
34 |   private val timeout = conf.get(AtlasClientConf.BLOCKING_QUEUE_PUT_TIMEOUT).toInt
35 | 
36 |   private val eventProcessThread = new Thread {
37 |     override def run(): Unit = {
38 |       eventProcess()
39 |     }
40 |   }
41 | 
42 |   def pushEvent(event: T): Unit = {
43 |     event match {
44 |       case e: T =>
45 |         if (!eventQueue.offer(e, timeout, TimeUnit.MILLISECONDS)) {
46 |           logError(s"Fail to put event $e into queue within time limit $timeout, will throw it")
47 |         }
48 |       case _ => // Ignore other events
49 |     }
50 |   }
51 | 
52 |   def startThread(): Unit = {
53 |     eventProcessThread.setName(this.getClass.getSimpleName + "-thread")
54 |     eventProcessThread.setDaemon(true)
55 | 
56 |     val ctxClassLoader = Thread.currentThread().getContextClassLoader
57 |     if (ctxClassLoader != null && getClass.getClassLoader != ctxClassLoader) {
58 |       eventProcessThread.setContextClassLoader(ctxClassLoader)
59 |     }
60 | 
61 |     eventProcessThread.start()
62 |   }
63 | 
64 |   protected def process(e: T): Unit
65 | 
66 |   @VisibleForTesting
67 |   private[atlas] def eventProcess(): Unit = {
68 |     var stopped = false
69 |     while (!stopped) {
70 |       try {
71 |         Option(eventQueue.poll(3000, TimeUnit.MILLISECONDS)).foreach { e =>
72 |           process(e)
73 |         }
74 |       } catch {
75 |         case _: InterruptedException =>
76 |           logDebug("Thread is interrupted")
77 |           stopped = true
78 | 
79 |         case NonFatal(f) =>
80 |           logWarn(s"Caught exception during parsing event", f)
81 |       }
82 |     }
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/AtlasClient.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas
 19 | 
 20 | import scala.util.control.NonFatal
 21 | 
 22 | import com.sun.jersey.core.util.MultivaluedMapImpl
 23 | import org.apache.atlas.model.instance.AtlasEntity
 24 | import org.apache.atlas.model.typedef.AtlasTypesDef
 25 | 
 26 | import com.hortonworks.spark.atlas.utils.Logging
 27 | 
 28 | trait AtlasClient extends Logging {
 29 | 
 30 |   def createAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit
 31 | 
 32 |   def getAtlasTypeDefs(searchParams: MultivaluedMapImpl): AtlasTypesDef
 33 | 
 34 |   def updateAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit
 35 | 
 36 |   final def createEntitiesWithDependencies(
 37 |       entity: SACAtlasReferenceable): Unit = this.synchronized {
 38 |     entity match {
 39 |       case e: SACAtlasEntityWithDependencies =>
 40 |         // handle dependencies first
 41 |         if (e.dependencies.nonEmpty) {
 42 |           val deps = e.dependencies.filter(_.isInstanceOf[SACAtlasEntityWithDependencies])
 43 |             .map(_.asInstanceOf[SACAtlasEntityWithDependencies])
 44 | 
 45 |           val depsHavingAnotherDeps = deps.filter(_.dependencies.nonEmpty)
 46 |           val depsHavingNoDeps = deps.filterNot(_.dependencies.nonEmpty)
 47 | 
 48 |           // we should handle them one by one if they're having additional dependencies
 49 |           depsHavingAnotherDeps.foreach(createEntitiesWithDependencies)
 50 | 
 51 |           // otherwise, we can handle them at once
 52 |           createEntities(depsHavingNoDeps.map(_.entity))
 53 |         }
 54 | 
 55 |         // done with dependencies, process origin entity
 56 |         createEntities(Seq(e.entity))
 57 | 
 58 |       case _ => // don't request creation entity for reference
 59 |     }
 60 |   }
 61 | 
 62 |   final def createEntitiesWithDependencies(
 63 |       entities: Seq[SACAtlasReferenceable]): Unit = this.synchronized {
 64 |     entities.foreach(createEntitiesWithDependencies)
 65 |   }
 66 | 
 67 |   final def createEntities(entities: Seq[AtlasEntity]): Unit = this.synchronized {
 68 |     if (entities.isEmpty) {
 69 |       return
 70 |     }
 71 | 
 72 |     try {
 73 |       doCreateEntities(entities)
 74 |     } catch {
 75 |       case NonFatal(e) =>
 76 |         logWarn(s"Failed to create entities", e)
 77 |     }
 78 |   }
 79 | 
 80 |   protected def doCreateEntities(entities: Seq[AtlasEntity]): Unit
 81 | 
 82 |   final def deleteEntityWithUniqueAttr(
 83 |       entityType: String, attribute: String): Unit = this.synchronized {
 84 |     try {
 85 |       doDeleteEntityWithUniqueAttr(entityType, attribute)
 86 |     } catch {
 87 |       case NonFatal(e) =>
 88 |         logWarn(s"Failed to delete entity with type $entityType", e)
 89 |     }
 90 |   }
 91 | 
 92 |   protected def doDeleteEntityWithUniqueAttr(entityType: String, attribute: String): Unit
 93 | 
 94 |   final def updateEntityWithUniqueAttr(
 95 |       entityType: String,
 96 |       attribute: String,
 97 |       entity: AtlasEntity): Unit = this.synchronized {
 98 |     try {
 99 |       doUpdateEntityWithUniqueAttr(entityType, attribute, entity)
100 |     } catch {
101 |       case NonFatal(e) =>
102 |         logWarn(s"Failed to update entity $entity with type $entityType and attribute " +
103 |           s"$attribute", e)
104 |     }
105 |   }
106 | 
107 |   protected def doUpdateEntityWithUniqueAttr(
108 |       entityType: String,
109 |       attribute: String,
110 |       entity: AtlasEntity): Unit
111 | }
112 | 
113 | object AtlasClient {
114 |   @volatile private var client: AtlasClient = null
115 | 
116 |   def atlasClient(conf: AtlasClientConf): AtlasClient = {
117 |     if (client == null) {
118 |       AtlasClient.synchronized {
119 |         if (client == null) {
120 |           conf.get(AtlasClientConf.CLIENT_TYPE).trim match {
121 |             case "rest" =>
122 |               client = new RestAtlasClient(conf)
123 |             case "kafka" =>
124 |               client = new KafkaAtlasClient(conf)
125 |             case e =>
126 |               client = Class.forName(e)
127 |                 .getConstructor(classOf[AtlasClientConf])
128 |                 .newInstance(conf)
129 |                 .asInstanceOf[AtlasClient]
130 |           }
131 |         }
132 |       }
133 |     }
134 | 
135 |     client
136 |   }
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/AtlasClientConf.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import org.apache.atlas.ApplicationProperties
21 | import com.hortonworks.spark.atlas.AtlasClientConf.ConfigEntry
22 | 
23 | class AtlasClientConf {
24 | 
25 |   private lazy val configuration = ApplicationProperties.get()
26 | 
27 |   def set(key: String, value: String): AtlasClientConf = {
28 |     configuration.setProperty(key, value)
29 |     this
30 |   }
31 | 
32 |   def set(key: ConfigEntry, value: String): AtlasClientConf = {
33 |     configuration.setProperty(key.key, value)
34 |     this
35 |   }
36 | 
37 |   def get(key: String, defaultValue: String): String = {
38 |     Option(configuration.getProperty(key).asInstanceOf[String]).getOrElse(defaultValue)
39 |   }
40 | 
41 |   def getOption(key: String): Option[String] = {
42 |     Option(configuration.getProperty(key).asInstanceOf[String])
43 |   }
44 | 
45 |   def getUrl(key: String): Object = {
46 |     configuration.getProperty(key)
47 |   }
48 | 
49 |   def get(t: ConfigEntry): String = {
50 |     Option(configuration.getProperty(t.key).asInstanceOf[String]).getOrElse(t.defaultValue)
51 |   }
52 | }
53 | 
54 | object AtlasClientConf {
55 |   case class ConfigEntry(key: String, defaultValue: String)
56 | 
57 |   val ATLAS_SPARK_ENABLED = ConfigEntry("atlas.spark.enabled", "true")
58 | 
59 |   val ATLAS_REST_ENDPOINT = ConfigEntry("atlas.rest.address", "localhost:21000")
60 | 
61 |   val BLOCKING_QUEUE_CAPACITY = ConfigEntry("atlas.blockQueue.size", "10000")
62 |   val BLOCKING_QUEUE_PUT_TIMEOUT = ConfigEntry("atlas.blockQueue.putTimeout.ms", "3000")
63 | 
64 |   val CLIENT_TYPE = ConfigEntry("atlas.client.type", "kafka")
65 |   val CLIENT_USERNAME = ConfigEntry("atlas.client.username", "admin")
66 |   val CLIENT_PASSWORD = ConfigEntry("atlas.client.password", "admin123")
67 |   val CLIENT_NUM_RETRIES = ConfigEntry("atlas.client.numRetries", "3")
68 | 
69 |   val CLUSTER_NAME = ConfigEntry("atlas.cluster.name", "primary")
70 | }
71 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/AtlasEntityCreationRequestHelper.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas
 19 | 
 20 | import java.util.UUID
 21 | 
 22 | import com.hortonworks.spark.atlas.types.metadata
 23 | import com.hortonworks.spark.atlas.utils.Logging
 24 | import org.apache.atlas.model.instance.AtlasObjectId
 25 | 
 26 | import scala.collection.mutable
 27 | 
 28 | class AtlasEntityCreationRequestHelper(
 29 |     atlasClient: AtlasClient) extends Logging {
 30 |   // query to (inputs, outputs)
 31 |   private val queryToInputsAndOutputs = new mutable.HashMap[UUID,
 32 |     (Set[AtlasObjectId], Set[AtlasObjectId])]()
 33 | 
 34 |   def requestCreation(entities: Seq[SACAtlasReferenceable], queryId: Option[UUID] = None): Unit = {
 35 |     queryId match {
 36 |       case Some(rid) => updateEntitiesForStreamingQuery(rid, entities)
 37 |       case None => updateEntitiesForBatchQuery(entities)
 38 |     }
 39 |   }
 40 | 
 41 |   private def updateEntitiesForBatchQuery(entities: Seq[SACAtlasReferenceable]): Unit = {
 42 |     // the query is batch, hence always create entities
 43 |     // create input/output entities as well as update process entity(-ies)
 44 |     createEntities(entities)
 45 |   }
 46 | 
 47 |   private def updateEntitiesForStreamingQuery(
 48 |       queryId: UUID,
 49 |       entities: Seq[SACAtlasReferenceable]): Unit = {
 50 |     // the query is streaming, so which partial of source/sink entities can be seen
 51 |     // in specific batch - need to accumulate efficiently
 52 |     val processes = entities
 53 |       .filter(en => en.typeName == metadata.PROCESS_TYPE_STRING
 54 |         && en.isInstanceOf[SACAtlasEntityWithDependencies])
 55 |       .map(_.asInstanceOf[SACAtlasEntityWithDependencies])
 56 | 
 57 |     val inputs = processes.flatMap { p =>
 58 |       AtlasEntityReadHelper.getSeqAtlasObjectIdAttribute(p.entity, "inputs")
 59 |     }.toSet
 60 | 
 61 |     val outputs = processes.flatMap { p =>
 62 |       AtlasEntityReadHelper.getSeqAtlasObjectIdAttribute(p.entity, "outputs")
 63 |     }.toSet
 64 | 
 65 |     queryToInputsAndOutputs.get(queryId) match {
 66 |       case Some((is, os)) if !inputs.subsetOf(is) || !outputs.subsetOf(os) =>
 67 |         // The query is streaming, and at least either inputs or outputs is not a
 68 |         // subset of accumulated one.
 69 | 
 70 |         // NOTE: we leverage the 'process' model's definition:
 71 |         // inputs and outputs are defined as set in definition, and Atlas automatically
 72 |         // accumulate these values which doesn't require us to track all inputs and
 73 |         // outputs and always provide accumulated one.
 74 |         // If we need to do in our own, we should also accumulate inputs and outputs
 75 |         // in SparkCatalogEventProcessor and maintain full of inputs and outputs.
 76 |         // Here we only accumulate inputs/outputs for each streaming query (queryId).
 77 | 
 78 |         createEntities(entities)
 79 | 
 80 |         // update inputs and outputs as accumulating current one and new inputs/outputs
 81 |         updateInputsAndOutputs(queryId, is.union(inputs), os.union(outputs))
 82 | 
 83 |       case Some((_, _)) => // if inputs.subsetOf(is) && outputs.subsetOf(os)
 84 |       // we already updated superset of inputs/outputs, skip updating
 85 | 
 86 |       case _ =>
 87 |         // the streaming query hasn't been examined in current session
 88 |         createEntities(entities)
 89 | 
 90 |         // update inputs and outputs as new inputs/outputs, as there's nothing to accumulate
 91 |         updateInputsAndOutputs(queryId, inputs, outputs)
 92 |     }
 93 |   }
 94 | 
 95 |   private def createEntities(entities: Seq[SACAtlasReferenceable]): Unit = {
 96 |     // create input/output entities as well as update process entity(-ies)
 97 |     atlasClient.createEntitiesWithDependencies(entities)
 98 |     logDebug(s"Created entities without columns")
 99 |   }
100 | 
101 |   private def updateInputsAndOutputs(
102 |       queryId: UUID,
103 |       newInputs: Set[AtlasObjectId],
104 |       newOutputs: Set[AtlasObjectId]): Unit = {
105 |     queryToInputsAndOutputs.put(queryId, (newInputs, newOutputs))
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/AtlasEntityReadHelper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import scala.collection.convert.Wrappers.SeqWrapper
21 | import org.apache.atlas.model.instance.{AtlasEntity, AtlasObjectId}
22 | 
23 | object AtlasEntityReadHelper {
24 |   def listAtlasEntitiesAsType(entities: Seq[AtlasEntity], typeStr: String): Seq[AtlasEntity] = {
25 |     entities.filter(p => p.getTypeName.equals(typeStr))
26 |   }
27 | 
28 |   def getOnlyOneEntity(entities: Seq[AtlasEntity], typeStr: String): AtlasEntity = {
29 |     val filteredEntities = entities.filter { p =>
30 |       p.getTypeName.equals(typeStr)
31 |     }
32 |     assert(filteredEntities.size == 1)
33 |     filteredEntities.head
34 |   }
35 | 
36 |   def getOnlyOneObjectId(objIds: Seq[AtlasObjectId], typeStr: String): AtlasObjectId = {
37 |     val filteredObjIds = objIds.filter { p =>
38 |       p.getTypeName.equals(typeStr)
39 |     }
40 |     assert(filteredObjIds.size == 1)
41 |     filteredObjIds.head
42 |   }
43 | 
44 |   def getOnlyOneEntityOnAttribute(
45 |       entities: Seq[AtlasEntity],
46 |       attrName: String,
47 |       attrValue: String): AtlasEntity = {
48 |     val filteredEntities = entities.filter { p =>
49 |       p.getAttribute(attrName).equals(attrValue)
50 |     }
51 |     assert(filteredEntities.size == 1)
52 |     filteredEntities.head
53 |   }
54 | 
55 |   def getStringAttribute(entity: AtlasEntity, attrName: String): String = {
56 |     entity.getAttribute(attrName).asInstanceOf[String]
57 |   }
58 | 
59 |   def getQualifiedName(entity: AtlasEntity): String = {
60 |     entity.getAttribute(org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME)
61 |       .asInstanceOf[String]
62 |   }
63 | 
64 |   def getAtlasEntityAttribute(entity: AtlasEntity, attrName: String): AtlasEntity = {
65 |     entity.getAttribute(attrName).asInstanceOf[AtlasEntity]
66 |   }
67 | 
68 |   def getAtlasObjectIdAttribute(entity: AtlasEntity, attrName: String): AtlasObjectId = {
69 |     entity.getAttribute(attrName).asInstanceOf[AtlasObjectId]
70 |   }
71 | 
72 |   def getAtlasObjectIdRelationshipAttribute(
73 |       entity: AtlasEntity,
74 |       attrName: String): AtlasObjectId = {
75 |     entity.getRelationshipAttribute(attrName).asInstanceOf[AtlasObjectId]
76 |   }
77 | 
78 |   def getSeqAtlasEntityAttribute(
79 |       entity: AtlasEntity,
80 |       attrName: String): Seq[AtlasEntity] = {
81 |     entity.getAttribute(attrName).asInstanceOf[SeqWrapper[AtlasEntity]].underlying
82 |   }
83 | 
84 |   def getSeqAtlasObjectIdAttribute(
85 |       entity: AtlasEntity,
86 |       attrName: String): Seq[AtlasObjectId] = {
87 |     entity.getAttribute(attrName).asInstanceOf[SeqWrapper[AtlasObjectId]].underlying
88 |   }
89 | 
90 |   def getSeqAtlasObjectIdRelationshipAttribute(
91 |       entity: AtlasEntity,
92 |       attrName: String): Seq[AtlasObjectId] = {
93 |     entity.getRelationshipAttribute(attrName).asInstanceOf[SeqWrapper[AtlasObjectId]].underlying
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/AtlasUtils.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.util.concurrent.atomic.AtomicLong
21 | 
22 | import com.hortonworks.spark.atlas.utils.Logging
23 | import org.apache.atlas.model.instance.{AtlasEntity, AtlasObjectId}
24 | 
25 | object AtlasUtils extends Logging {
26 |   private val executionId = new AtomicLong(0L)
27 | 
28 |   def entityToReference(entity: AtlasEntity, useGuid: Boolean = false): AtlasObjectId = {
29 |     if (useGuid) {
30 |       new AtlasObjectId(entity.getGuid)
31 |     } else {
32 |       new AtlasObjectId(entity.getTypeName, "qualifiedName", entity.getAttribute("qualifiedName"))
33 |     }
34 |   }
35 | 
36 |   def entitiesToReferences(
37 |       entities: Seq[AtlasEntity],
38 |       useGuid: Boolean = false): Set[AtlasObjectId] = {
39 |     entities.map(entityToReference(_, useGuid)).toSet
40 |   }
41 | 
42 |   def issueExecutionId(): Long = executionId.getAndIncrement()
43 | 
44 |   def isSacEnabled(conf: AtlasClientConf): Boolean = {
45 |     if (!conf.get(AtlasClientConf.ATLAS_SPARK_ENABLED).toBoolean) {
46 |       logWarn("Spark Atlas Connector is disabled.")
47 |       false
48 |     } else {
49 |       true
50 |     }
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/KafkaAtlasClient.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.util
21 | 
22 | import scala.collection.JavaConverters._
23 | import com.sun.jersey.core.util.MultivaluedMapImpl
24 | import org.apache.atlas.hook.AtlasHook
25 | import org.apache.atlas.model.typedef.AtlasTypesDef
26 | import org.apache.atlas.model.instance.{AtlasEntity, AtlasObjectId}
27 | import org.apache.atlas.v1.model.notification.HookNotificationV1
28 | import org.apache.atlas.v1.model.notification.HookNotificationV1.{EntityCreateRequest, EntityDeleteRequest}
29 | import org.apache.atlas.v1.model.instance.Referenceable
30 | import org.apache.atlas.model.notification.HookNotification
31 | import com.hortonworks.spark.atlas.utils.SparkUtils
32 | import org.apache.atlas.AtlasClientV2.API_V2
33 | import org.apache.atlas.model.instance.AtlasEntity.{AtlasEntitiesWithExtInfo, AtlasEntityWithExtInfo}
34 | import org.apache.atlas.model.notification.HookNotification.{EntityCreateRequestV2, EntityDeleteRequestV2, EntityPartialUpdateRequestV2}
35 | 
36 | class KafkaAtlasClient(atlasClientConf: AtlasClientConf) extends AtlasHook with AtlasClient {
37 | 
38 |    protected def getNumberOfRetriesPropertyKey: String = {
39 |     AtlasClientConf.CLIENT_NUM_RETRIES.key
40 |   }
41 | 
42 |   override def createAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit = {
43 |     throw new UnsupportedOperationException("Kafka atlas client doesn't support create type defs")
44 |   }
45 | 
46 |   override def getAtlasTypeDefs(searchParams: MultivaluedMapImpl): AtlasTypesDef = {
47 |     throw new UnsupportedOperationException("Kafka atlas client doesn't support get type defs")
48 |   }
49 | 
50 |   override def updateAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit = {
51 |     throw new UnsupportedOperationException("Kafka atlas client doesn't support update type defs")
52 |   }
53 | 
54 |   override protected def doCreateEntities(entities: Seq[AtlasEntity]): Unit = {
55 |     val entitiesWithExtInfo = new AtlasEntitiesWithExtInfo()
56 |     entities.foreach(entitiesWithExtInfo.addEntity)
57 |     val createRequest = new EntityCreateRequestV2(
58 |       SparkUtils.currUser(), entitiesWithExtInfo): HookNotification
59 | 
60 |     notifyEntities(Seq(createRequest).asJava, SparkUtils.ugi())
61 |   }
62 | 
63 |   override protected def doDeleteEntityWithUniqueAttr(
64 |       entityType: String,
65 |       attribute: String): Unit = {
66 |     val deleteRequest = new EntityDeleteRequestV2(
67 |       SparkUtils.currUser(),
68 |       Seq(new AtlasObjectId(entityType,
69 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
70 |         attribute)).asJava
71 |     ): HookNotification
72 | 
73 |     notifyEntities(Seq(deleteRequest).asJava, SparkUtils.ugi())
74 |   }
75 | 
76 |   override protected def doUpdateEntityWithUniqueAttr(
77 |       entityType: String,
78 |       attribute: String,
79 |       entity: AtlasEntity): Unit = {
80 |     val partialUpdateRequest = new EntityPartialUpdateRequestV2(
81 |       SparkUtils.currUser(),
82 |       new AtlasObjectId(entityType,
83 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
84 |         attribute),
85 |       new AtlasEntityWithExtInfo(entity)
86 |     ): HookNotification
87 | 
88 |     notifyEntities(Seq(partialUpdateRequest).asJava, SparkUtils.ugi())
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/RestAtlasClient.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.util
21 | 
22 | import scala.collection.JavaConverters._
23 | import com.sun.jersey.core.util.MultivaluedMapImpl
24 | import org.apache.atlas.AtlasClientV2
25 | import org.apache.atlas.model.SearchFilter
26 | import org.apache.atlas.model.instance.AtlasEntity
27 | import org.apache.atlas.model.instance.AtlasEntity.{AtlasEntitiesWithExtInfo, AtlasEntityWithExtInfo}
28 | import org.apache.atlas.model.typedef.AtlasTypesDef
29 | import org.apache.atlas.utils.AuthenticationUtil
30 | 
31 | class RestAtlasClient(atlasClientConf: AtlasClientConf) extends AtlasClient {
32 | 
33 |   private val client = {
34 |     if (!AuthenticationUtil.isKerberosAuthenticationEnabled) {
35 |       val basicAuth = Array(atlasClientConf.get(AtlasClientConf.CLIENT_USERNAME),
36 |         atlasClientConf.get(AtlasClientConf.CLIENT_PASSWORD))
37 |       new AtlasClientV2(getServerUrl(), basicAuth)
38 |     } else {
39 |       new AtlasClientV2(getServerUrl(): _*)
40 |     }
41 |   }
42 | 
43 |   private def getServerUrl(): Array[String] = {
44 | 
45 |     atlasClientConf.getUrl(AtlasClientConf.ATLAS_REST_ENDPOINT.key) match {
46 |       case a: util.ArrayList[_] => a.toArray().map(b => b.toString)
47 |       case s: String => Array(s)
48 |       case _: Throwable => throw new IllegalArgumentException(s"Fail to get atlas.rest.address")
49 |     }
50 |   }
51 | 
52 |   override def createAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit = {
53 |     client.createAtlasTypeDefs(typeDefs)
54 |   }
55 | 
56 |   override def getAtlasTypeDefs(searchParams: MultivaluedMapImpl): AtlasTypesDef = {
57 |     val searchFilter = new SearchFilter(searchParams)
58 |     client.getAllTypeDefs(searchFilter)
59 |   }
60 | 
61 |   override def updateAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit = {
62 |     client.updateAtlasTypeDefs(typeDefs)
63 |   }
64 | 
65 |   override protected def doCreateEntities(entities: Seq[AtlasEntity]): Unit = {
66 |     val entitesWithExtInfo = new AtlasEntitiesWithExtInfo()
67 |     entities.foreach(entitesWithExtInfo.addEntity)
68 |     val response = client.createEntities(entitesWithExtInfo)
69 |     try {
70 |       logInfo(s"Entities ${response.getCreatedEntities.asScala.map(_.getGuid).mkString(", ")} " +
71 |         s"created")
72 |     } catch {
73 |       case _: Throwable => throw new IllegalStateException(s"Fail to get create entities")
74 |     }
75 |   }
76 | 
77 |   override protected def doDeleteEntityWithUniqueAttr(
78 |       entityType: String,
79 |       attribute: String): Unit = {
80 |     client.deleteEntityByAttribute(entityType,
81 |       Map(org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME -> attribute).asJava)
82 |   }
83 | 
84 |   override protected def doUpdateEntityWithUniqueAttr(
85 |       entityType: String,
86 |       attribute: String,
87 |       entity: AtlasEntity): Unit = {
88 |     client.updateEntityByAttribute(
89 |       entityType,
90 |       Map(org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME -> attribute).asJava,
91 |       new AtlasEntityWithExtInfo(entity))
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/SACAtlasEntity.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import org.apache.atlas.model.instance.{AtlasEntity, AtlasObjectId}
21 | 
22 | trait SACAtlasReferenceable {
23 |   def typeName: String
24 |   def qualifiedName: String
25 |   def asObjectId: AtlasObjectId
26 | }
27 | 
28 | case class SACAtlasEntityReference(ref: AtlasObjectId) extends SACAtlasReferenceable {
29 |   require(typeName != null && !typeName.isEmpty)
30 |   require(qualifiedName != null && !qualifiedName.isEmpty)
31 | 
32 |   override def typeName: String = ref.getTypeName
33 | 
34 |   override def qualifiedName: String = ref.getUniqueAttributes.get(
35 |     org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME).toString
36 | 
37 |   override def asObjectId: AtlasObjectId = ref
38 | }
39 | 
40 | case class SACAtlasEntityWithDependencies(
41 |     entity: AtlasEntity,
42 |     dependencies: Seq[SACAtlasReferenceable]) extends SACAtlasReferenceable {
43 | 
44 |   require(typeName != null && !typeName.isEmpty)
45 |   require(qualifiedName != null && !qualifiedName.isEmpty)
46 | 
47 |   override def typeName: String = entity.getTypeName
48 | 
49 |   override def qualifiedName: String = entity.getAttribute(
50 |     org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME).toString
51 | 
52 |   override def asObjectId: AtlasObjectId = AtlasUtils.entityToReference(entity, useGuid = false)
53 | 
54 |   def dependenciesAdded(deps: Seq[SACAtlasReferenceable]): SACAtlasEntityWithDependencies = {
55 |     new SACAtlasEntityWithDependencies(entity, dependencies ++ deps)
56 |   }
57 | }
58 | 
59 | object SACAtlasEntityWithDependencies {
60 |   def apply(entity: AtlasEntity): SACAtlasEntityWithDependencies = {
61 |     new SACAtlasEntityWithDependencies(entity, Seq.empty)
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/SparkAtlasEventTracker.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import com.google.common.annotations.VisibleForTesting
21 | import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
22 | import org.apache.spark.sql.catalyst.catalog.ExternalCatalogEvent
23 | import org.apache.spark.sql.execution.QueryExecution
24 | import org.apache.spark.sql.util.QueryExecutionListener
25 | import com.hortonworks.spark.atlas.sql._
26 | import com.hortonworks.spark.atlas.ml.MLPipelineEventProcessor
27 | import com.hortonworks.spark.atlas.utils.Logging
28 | 
29 | class SparkAtlasEventTracker(atlasClient: AtlasClient, atlasClientConf: AtlasClientConf)
30 |     extends SparkListener with QueryExecutionListener with Logging {
31 | 
32 |   def this(atlasClientConf: AtlasClientConf) = {
33 |     this(AtlasClient.atlasClient(atlasClientConf), atlasClientConf)
34 |   }
35 | 
36 |   def this() {
37 |     this(new AtlasClientConf)
38 |   }
39 | 
40 |   private val enabled: Boolean = AtlasUtils.isSacEnabled(atlasClientConf)
41 | 
42 |   // Processor to handle DDL related events
43 |   @VisibleForTesting
44 |   private[atlas] val catalogEventTracker =
45 |     new SparkCatalogEventProcessor(atlasClient, atlasClientConf)
46 |   catalogEventTracker.startThread()
47 | 
48 |   // Processor to handle DML related events
49 |   private val executionPlanTracker = new SparkExecutionPlanProcessor(atlasClient, atlasClientConf)
50 |   executionPlanTracker.startThread()
51 | 
52 |   private val mlEventTracker = new MLPipelineEventProcessor(atlasClient, atlasClientConf)
53 |   mlEventTracker.startThread()
54 | 
55 |   override def onOtherEvent(event: SparkListenerEvent): Unit = {
56 |     if (!enabled) {
57 |       // No op if SAC is disabled
58 |       return
59 |     }
60 | 
61 |     // We only care about SQL related events.
62 |     event match {
63 |       case e: ExternalCatalogEvent => catalogEventTracker.pushEvent(e)
64 |       case e: SparkListenerEvent if e.getClass.getName.contains("org.apache.spark.ml") =>
65 |         mlEventTracker.pushEvent(e)
66 |       case _ => // Ignore other events
67 |     }
68 |   }
69 | 
70 |   override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
71 |     if (!enabled) {
72 |       // No op if SAC is disabled
73 |       return
74 |     }
75 | 
76 |     if (qe.logical.isStreaming) {
77 |       // streaming query will be tracked via SparkAtlasStreamingQueryEventTracker
78 |       return
79 |     }
80 | 
81 |     val qd = QueryDetail.fromQueryExecutionListener(qe, durationNs)
82 |     executionPlanTracker.pushEvent(qd)
83 |   }
84 | 
85 |   override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
86 |     // No-op: SAC is one of the listener.
87 |   }
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/SparkAtlasStreamingQueryEventTracker.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import com.hortonworks.spark.atlas.sql.{QueryDetail, SparkExecutionPlanProcessor}
21 | 
22 | import scala.collection.mutable
23 | import org.apache.spark.sql.streaming.StreamingQueryListener
24 | import org.apache.spark.sql.streaming.StreamingQueryListener._
25 | import com.hortonworks.spark.atlas.utils.Logging
26 | import org.apache.spark.sql.SparkSession
27 | import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper}
28 | 
29 | class SparkAtlasStreamingQueryEventTracker(
30 |      atlasClient: AtlasClient,
31 |      atlasClientConf: AtlasClientConf)
32 |   extends StreamingQueryListener with Logging {
33 | 
34 |   def this(atlasClientConf: AtlasClientConf) = {
35 |     this(AtlasClient.atlasClient(atlasClientConf), atlasClientConf)
36 |   }
37 | 
38 |   def this() {
39 |     this(new AtlasClientConf)
40 |   }
41 | 
42 |   private val enabled: Boolean = AtlasUtils.isSacEnabled(atlasClientConf)
43 | 
44 |   private val executionPlanTracker = new SparkExecutionPlanProcessor(atlasClient, atlasClientConf)
45 |   executionPlanTracker.startThread()
46 | 
47 |   override def onQueryStarted(event: QueryStartedEvent): Unit = {
48 |     logDebug(s"Start to track the Spark Streaming query in the Spark Atlas $event")
49 |   }
50 | 
51 |   override def onQueryProgress(event: QueryProgressEvent): Unit = {
52 |     if (!enabled) {
53 |       // No op if SAC is disabled
54 |       return
55 |     }
56 |     logInfo(s"Track running Spark Streaming query in the Spark Atlas: $event")
57 |     val query = SparkSession.active.streams.get(event.progress.id)
58 |     if (query != null) {
59 |       val qd = query match {
60 |         case query: StreamingQueryWrapper =>
61 |           Some(QueryDetail.fromStreamingQueryListener(query.streamingQuery, event))
62 | 
63 |         case query: StreamExecution =>
64 |           Some(QueryDetail.fromStreamingQueryListener(query, event))
65 | 
66 |         case _ =>
67 |           logWarn(s"Unexpected type of streaming query: ${query.getClass}")
68 |           None
69 |       }
70 | 
71 |       qd.foreach { q =>
72 |         if (q.qe != null) {
73 |           executionPlanTracker.pushEvent(q)
74 |         } else {
75 |           logInfo(s"Can't retrieve query execution information for query ${event.progress.id}" +
76 |             " - skip and wait for next batch.")
77 |         }
78 |       }
79 |     } else {
80 |       logWarn(s"Cannot find query ${event.progress.id} from active spark session!")
81 |     }
82 |   }
83 | 
84 |   override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
85 |     logDebug(s"Tack Spark Streaming query in the Spark Atlas Terminated: $event")
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/sql/Harvester.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql
19 | 
20 | import com.hortonworks.spark.atlas.SACAtlasReferenceable
21 | 
22 | trait Harvester[T] {
23 |   def harvest(node: T, qd: QueryDetail): Seq[SACAtlasReferenceable]
24 | }
25 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/sql/KafkaTopicInformation.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql
19 | 
20 | case class KafkaTopicInformation(topicName: String, clusterName: Option[String] = None)
21 | 
22 | object KafkaTopicInformation {
23 |   def getQualifiedName(ti: KafkaTopicInformation, defaultClusterName: String): String = {
24 |     val cName = ti.clusterName.getOrElse(defaultClusterName)
25 |     s"${ti.topicName}@$cName"
26 |   }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/sql/SparkCatalogEventProcessor.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import scala.collection.mutable
 21 | import org.apache.atlas.model.instance.AtlasEntity
 22 | import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 23 | import org.apache.spark.sql.catalyst.catalog._
 24 | import com.hortonworks.spark.atlas.{AbstractEventProcessor, AtlasClient, AtlasClientConf, AtlasEntityReadHelper}
 25 | import com.hortonworks.spark.atlas.types.{AtlasEntityUtils, external}
 26 | import com.hortonworks.spark.atlas.utils.{Logging, SparkUtils}
 27 | 
 28 | class SparkCatalogEventProcessor(
 29 |     private[atlas] val atlasClient: AtlasClient,
 30 |     val conf: AtlasClientConf)
 31 |   extends AbstractEventProcessor[ExternalCatalogEvent] with AtlasEntityUtils with Logging {
 32 | 
 33 |   private val cachedObject = new mutable.WeakHashMap[String, Object]
 34 | 
 35 |   override protected def process(e: ExternalCatalogEvent): Unit = {
 36 |     if (SparkUtils.usingRemoteMetastoreService()) {
 37 |       // SAC will not handle any DDL events when remote HMS is used:
 38 |       // Hive hook will take care of all DDL events in Hive Metastore Service.
 39 |       // No-op here.
 40 |       return
 41 |     }
 42 | 
 43 |     e match {
 44 |       case CreateDatabasePreEvent(_) => // No-op
 45 | 
 46 |       case CreateDatabaseEvent(db) =>
 47 |         val dbDefinition = SparkUtils.getExternalCatalog().getDatabase(db)
 48 |         val entity = sparkDbToEntity(dbDefinition)
 49 |         atlasClient.createEntitiesWithDependencies(entity)
 50 |         logDebug(s"Created db entity $db")
 51 | 
 52 |       case DropDatabasePreEvent(db) =>
 53 |         try {
 54 |           cachedObject.put(sparkDbUniqueAttribute(db),
 55 |             SparkUtils.getExternalCatalog().getDatabase(db))
 56 |         } catch {
 57 |           case _: NoSuchDatabaseException =>
 58 |             logDebug(s"Spark already deleted the database: $db")
 59 |         }
 60 | 
 61 |       case DropDatabaseEvent(db) =>
 62 |         atlasClient.deleteEntityWithUniqueAttr(sparkDbType, sparkDbUniqueAttribute(db))
 63 | 
 64 |         cachedObject.remove(sparkDbUniqueAttribute(db)).foreach { o =>
 65 |           val dbDef = o.asInstanceOf[CatalogDatabase]
 66 |           val path = dbDef.locationUri.toString
 67 |           val pathEntity = external.pathToEntity(path)
 68 | 
 69 |           atlasClient.deleteEntityWithUniqueAttr(pathEntity.entity.getTypeName,
 70 |             AtlasEntityReadHelper.getQualifiedName(pathEntity.entity))
 71 |         }
 72 | 
 73 |         logDebug(s"Deleted db entity $db")
 74 | 
 75 |       case CreateTablePreEvent(_, _) => // No-op
 76 | 
 77 |       // TODO. We should also not create/alter view table in Atlas
 78 |       case CreateTableEvent(db, table) =>
 79 |         val tableDefinition = SparkUtils.getExternalCatalog().getTable(db, table)
 80 |         val tableEntity = sparkTableToEntity(tableDefinition)
 81 |         atlasClient.createEntitiesWithDependencies(tableEntity)
 82 |         logDebug(s"Created table entity $table without columns")
 83 | 
 84 |       case DropTablePreEvent(_, _) => // No-op
 85 | 
 86 |       case DropTableEvent(db, table) =>
 87 |         logDebug(s"Can't handle drop table event since we don't have context information for " +
 88 |           s"table $table in db $db. Can't delete table entity and corresponding entities.")
 89 | 
 90 |       case RenameTableEvent(db, name, newName) =>
 91 |         // Update storageFormat's unique attribute
 92 |         val sdEntity = new AtlasEntity(sparkStorageFormatType)
 93 |         sdEntity.setAttribute(org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
 94 |           sparkStorageFormatUniqueAttribute(db, newName))
 95 |         atlasClient.updateEntityWithUniqueAttr(
 96 |           sparkStorageFormatType,
 97 |           sparkStorageFormatUniqueAttribute(db, name),
 98 |           sdEntity)
 99 | 
100 |         // Update Table name and Table's unique attribute
101 |         val tableEntity = new AtlasEntity(sparkTableType)
102 |         tableEntity.setAttribute(org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
103 |           sparkTableUniqueAttribute(db, newName))
104 |         tableEntity.setAttribute("name", newName)
105 |         atlasClient.updateEntityWithUniqueAttr(
106 |           sparkTableType,
107 |           sparkTableUniqueAttribute(db, name),
108 |           tableEntity)
109 | 
110 |         logDebug(s"Rename table entity $name to $newName")
111 | 
112 |       case AlterDatabaseEvent(db) =>
113 |         val dbDefinition = SparkUtils.getExternalCatalog().getDatabase(db)
114 |         val dbEntity = sparkDbToEntity(dbDefinition)
115 |         atlasClient.createEntitiesWithDependencies(dbEntity)
116 |         logDebug(s"Updated DB properties")
117 | 
118 |       case AlterTableEvent(db, table, kind) =>
119 |         val tableDefinition = SparkUtils.getExternalCatalog().getTable(db, table)
120 |         kind match {
121 |           case "table" =>
122 |             val tableEntity = sparkTableToEntityForAlterTable(tableDefinition)
123 |             atlasClient.createEntitiesWithDependencies(tableEntity)
124 |             logDebug(s"Updated table entity $table without columns")
125 | 
126 |           case "dataSchema" =>
127 |             // We don't mind updating column
128 |             logDebug("Detected updating of table schema but ignored: " +
129 |               "column update will not be tracked here")
130 | 
131 |           case "stats" =>
132 |             logDebug(s"Stats update will not be tracked here")
133 | 
134 |           case _ =>
135 |           // No op.
136 |         }
137 | 
138 |       case f =>
139 |         logDebug(s"Drop unknown event $f")
140 |     }
141 |   }
142 | }
143 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/sql/SparkExtension.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql
19 | 
20 | import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
21 | import org.apache.spark.sql.catalyst.expressions.Expression
22 | import org.apache.spark.sql.catalyst.parser.ParserInterface
23 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
24 | import org.apache.spark.sql.types.{DataType, StructType}
25 | import org.apache.spark.sql.{SparkSession, SparkSessionExtensions}
26 | 
27 | 
28 | class SparkExtension extends (SparkSessionExtensions => Unit) {
29 |   def apply(e: SparkSessionExtensions): Unit = {
30 |     e.injectParser(SparkAtlasConnectorParser)
31 |   }
32 | }
33 | 
34 | case class SparkAtlasConnectorParser(spark: SparkSession, delegate: ParserInterface)
35 |   extends ParserInterface {
36 |   override def parsePlan(sqlText: String): LogicalPlan = {
37 |     SQLQuery.set(sqlText)
38 |     delegate.parsePlan(sqlText)
39 |   }
40 | 
41 |   override def parseExpression(sqlText: String): Expression =
42 |     delegate.parseExpression(sqlText)
43 | 
44 |   override def parseTableIdentifier(sqlText: String): TableIdentifier =
45 |     delegate.parseTableIdentifier(sqlText)
46 | 
47 |   override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier =
48 |     delegate.parseFunctionIdentifier(sqlText)
49 | 
50 |   override def parseTableSchema(sqlText: String): StructType =
51 |     delegate.parseTableSchema(sqlText)
52 | 
53 |   override def parseDataType(sqlText: String): DataType =
54 |     delegate.parseDataType(sqlText)
55 | }
56 | 
57 | object SQLQuery {
58 |   private[this] val sqlQuery = new ThreadLocal[String]
59 |   def get(): String = sqlQuery.get
60 |   def set(s: String): Unit = sqlQuery.set(s)
61 | }
62 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/types/AtlasEntityUtils.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.types
 19 | 
 20 | import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable}
 21 | import com.hortonworks.spark.atlas.{AtlasClientConf, SACAtlasEntityWithDependencies, SACAtlasReferenceable}
 22 | import com.hortonworks.spark.atlas.utils.{Logging, SparkUtils}
 23 | import org.apache.spark.ml.Pipeline
 24 | 
 25 | trait AtlasEntityUtils extends Logging {
 26 | 
 27 |   def conf: AtlasClientConf
 28 | 
 29 |   def clusterName: String = conf.get(AtlasClientConf.CLUSTER_NAME)
 30 | 
 31 |   def sparkDbType: String = metadata.DB_TYPE_STRING
 32 | 
 33 |   def sparkDbToEntity(dbDefinition: CatalogDatabase): SACAtlasEntityWithDependencies = {
 34 |     internal.sparkDbToEntity(dbDefinition, clusterName, SparkUtils.currUser())
 35 |   }
 36 | 
 37 |   def sparkDbUniqueAttribute(db: String): String = {
 38 |     internal.sparkDbUniqueAttribute(db)
 39 |   }
 40 | 
 41 |   def sparkStorageFormatType: String = metadata.STORAGEDESC_TYPE_STRING
 42 | 
 43 |   def sparkStorageFormatToEntity(
 44 |       storageFormat: CatalogStorageFormat,
 45 |       db: String,
 46 |       table: String): SACAtlasEntityWithDependencies = {
 47 |     internal.sparkStorageFormatToEntity(storageFormat, db, table)
 48 |   }
 49 | 
 50 |   def sparkStorageFormatUniqueAttribute(db: String, table: String): String = {
 51 |     internal.sparkStorageFormatUniqueAttribute(db, table)
 52 |   }
 53 | 
 54 |   def sparkTableType: String = metadata.TABLE_TYPE_STRING
 55 | 
 56 |   def tableToEntity(
 57 |       tableDefinition: CatalogTable,
 58 |       mockDbDefinition: Option[CatalogDatabase] = None): SACAtlasReferenceable = {
 59 |     if (SparkUtils.usingRemoteMetastoreService()) {
 60 |       external.hiveTableToReference(tableDefinition, clusterName, mockDbDefinition)
 61 |     } else {
 62 |       internal.sparkTableToEntity(tableDefinition, clusterName, mockDbDefinition)
 63 |     }
 64 |   }
 65 | 
 66 |   def sparkTableToEntity(
 67 |       tableDefinition: CatalogTable,
 68 |       mockDbDefinition: Option[CatalogDatabase] = None): SACAtlasReferenceable = {
 69 |     internal.sparkTableToEntity(tableDefinition, clusterName, mockDbDefinition)
 70 |   }
 71 | 
 72 |   def sparkTableToEntityForAlterTable(
 73 |       tableDefinition: CatalogTable,
 74 |       mockDbDefinition: Option[CatalogDatabase] = None): SACAtlasReferenceable = {
 75 |     internal.sparkTableToEntityForAlterTable(tableDefinition, clusterName, mockDbDefinition)
 76 |   }
 77 | 
 78 |   def sparkTableUniqueAttribute(db: String, table: String): String = {
 79 |     internal.sparkTableUniqueAttribute(db, table)
 80 |   }
 81 | 
 82 |   def pipelineUniqueAttribute(pipeline: Pipeline): String = {
 83 |     pipeline.uid
 84 |   }
 85 | 
 86 |   def processType: String = metadata.PROCESS_TYPE_STRING
 87 | 
 88 |   def processUniqueAttribute(executionId: Long): String =
 89 |     internal.sparkProcessUniqueAttribute(executionId)
 90 | 
 91 |   // If there is cycle, return empty output entity list
 92 |   def cleanOutput(
 93 |                    inputs: Seq[SACAtlasReferenceable],
 94 |                    outputs: Seq[SACAtlasReferenceable]): List[SACAtlasReferenceable] = {
 95 |     val qualifiedNames = inputs.map(_.qualifiedName)
 96 |     val isCycle = outputs.exists(x => qualifiedNames.contains(x.qualifiedName))
 97 |     if (isCycle) {
 98 |       logWarn("Detected cycle - same entity observed to both input and output. " +
 99 |         "Discarding output entities as Atlas doesn't support cycle.")
100 |       List.empty
101 |     } else {
102 |       outputs.toList
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/types/metadata.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.types
19 | 
20 | object metadata {
21 |   val METADATA_VERSION = "1.0"
22 |   val DB_TYPE_STRING = "spark_db"
23 |   val STORAGEDESC_TYPE_STRING = "spark_storagedesc"
24 |   val TABLE_TYPE_STRING = "spark_table"
25 |   val PROCESS_TYPE_STRING = "spark_process"
26 |   val ML_DIRECTORY_TYPE_STRING = "spark_ml_directory"
27 |   val ML_PIPELINE_TYPE_STRING = "spark_ml_pipeline"
28 |   val ML_MODEL_TYPE_STRING = "spark_ml_model"
29 | }
30 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/utils/CatalogUtils.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.utils
19 | 
20 | import java.net.URI
21 | 
22 | import org.apache.spark.sql.catalyst.TableIdentifier
23 | import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType}
24 | import org.apache.spark.sql.types.StructType
25 | 
26 | 
27 | object CatalogUtils {
28 | 
29 |   def createDB(name: String, location: String): CatalogDatabase = {
30 |     CatalogDatabase(name, "", new URI(location), Map.empty)
31 |   }
32 | 
33 |   def createStorageFormat(
34 |         locationUri: Option[URI] = None,
35 |         inputFormat: Option[String] = None,
36 |         outputFormat: Option[String] = None,
37 |         serd: Option[String] = None,
38 |         compressed: Boolean = false,
39 |         properties: Map[String, String] = Map.empty): CatalogStorageFormat = {
40 |       CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties)
41 |   }
42 | 
43 |   def createTable(
44 |         db: String,
45 |         table: String,
46 |         schema: StructType,
47 |         storage: CatalogStorageFormat,
48 |         isHiveTable: Boolean = false): CatalogTable = {
49 |       CatalogTable(
50 |         TableIdentifier(table, Some(db)),
51 |         CatalogTableType.MANAGED,
52 |         storage,
53 |         schema,
54 |         provider = if (isHiveTable) Some("hive") else None)
55 |   }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/utils/JdbcUtils.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.utils
 19 | 
 20 | import com.hortonworks.spark.atlas.sql.CommandsHarvester.logWarn
 21 | 
 22 | object JdbcUtils {
 23 | 
 24 |   private val DB2_PREFIX = "jdbc:db2"
 25 |   private val DERBY_PREFIX = "jdbc:derby"
 26 |   private val MARIADB_PREFIX = "jdbc:mariadb"
 27 |   private val MYSQL_PREFIX = "jdbc:mysql"
 28 |   private val ORACLE_PREFIX = "jdbc:oracle"
 29 |   private val POSTGRES_PREFIX = "jdbc:postgresql"
 30 |   private val SQL_SERVER_PREFIX = "jdbc:sqlserver"
 31 |   private val TERADATA_PREFIX = "jdbc:teradata"
 32 | 
 33 |   /**
 34 |    * Retrieves the database name from the url
 35 |    *
 36 |    * @param url the url used by the JDBC driver
 37 |    * @return
 38 |    */
 39 |   def getDatabaseName(url: String): String = url match {
 40 |     case url if url.startsWith(DB2_PREFIX) => getDatabaseNameEndOfUrl(url)
 41 |     case url if url.startsWith(DERBY_PREFIX) => getDatabaseNameDerbyFormat(url)
 42 |     case url if url.startsWith(MARIADB_PREFIX) => getDatabaseNameEndOfUrl(url)
 43 |     case url if url.startsWith(MYSQL_PREFIX) => getDatabaseNameEndOfUrl(url)
 44 |     case url if url.startsWith(ORACLE_PREFIX) => getDatabaseOracleFormat(url)
 45 |     case url if url.startsWith(POSTGRES_PREFIX) => getDatabaseNameEndOfUrl(url)
 46 |     case url if url.startsWith(SQL_SERVER_PREFIX) => getDatabaseSqlServerFormat(url)
 47 |     case url if url.startsWith(TERADATA_PREFIX) => getDatabaseNameTeradataFormat(url)
 48 |     case _ =>
 49 |       logWarn(s"Unsupported JDBC driver for url: $url")
 50 |       ""
 51 |   }
 52 | 
 53 |   /**
 54 |    * Retrieves database name where in hose:port/dbname format
 55 |    */
 56 |   private def getDatabaseNameEndOfUrl(url: String): String = {
 57 |     val parsedUrl = url.substring(url.lastIndexOf("/") + 1)
 58 |     if (parsedUrl.contains("?")) {
 59 |       return parsedUrl.substring(0, parsedUrl.indexOf("?"))
 60 |     }
 61 | 
 62 |     parsedUrl
 63 |   }
 64 | 
 65 |   /**
 66 |    * Retrieves the database name based on Derby format
 67 |    */
 68 |   private def getDatabaseNameDerbyFormat(url: String): String = {
 69 |     val parsedUrl = url match {
 70 |       case url if url.contains("/") => url.substring(url.lastIndexOf("/") + 1)
 71 |       case _ => url.substring(url.lastIndexOf(":") + 1)
 72 |     }
 73 | 
 74 |     if (parsedUrl.contains(";")) {
 75 |       return parsedUrl.substring(0, parsedUrl.indexOf(";"))
 76 |     }
 77 | 
 78 |     parsedUrl
 79 |   }
 80 | 
 81 |   /**
 82 |    * Retrieves the database name based on Teradata format
 83 |    */
 84 |   private def getDatabaseNameTeradataFormat(url: String): String = {
 85 |     val databaseKey = "/DATABASE="
 86 |     val parsedUrl = url.substring(url.indexOf(databaseKey) + databaseKey.length)
 87 |     if (parsedUrl.contains("/")) {
 88 |       return parsedUrl.substring(0, parsedUrl.indexOf("/"))
 89 |     }
 90 | 
 91 |     parsedUrl
 92 |   }
 93 | 
 94 |   /**
 95 |    * Retrieves the database name based on Oracle format
 96 |    * e.g. jdbc:oracle:thin:@localhost:1521:testdb
 97 |    */
 98 |   private def getDatabaseOracleFormat(url: String): String = {
 99 |     url.substring(url.toUpperCase().lastIndexOf(":") + 1)
100 |   }
101 | 
102 |   /**
103 |    * Retrieves the database name based on Microsoft SQL Server format
104 |    */
105 |   private def getDatabaseSqlServerFormat(url: String): String = {
106 |     val databaseNameKey = ";databaseName="
107 |     val parsedUrl = url.substring(url.indexOf(databaseNameKey) + databaseNameKey.length)
108 |     if (parsedUrl.contains(";")) {
109 |       return parsedUrl.substring(0, parsedUrl.indexOf(";"))
110 |     }
111 | 
112 |     parsedUrl
113 |   }
114 | 
115 | }
116 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/utils/Logging.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.utils
19 | 
20 | import org.slf4j.LoggerFactory
21 | 
22 | trait Logging {
23 |   lazy val logger = LoggerFactory.getLogger(this.getClass)
24 | 
25 |   def logTrace(message: => Any): Unit = {
26 |     if (logger.isTraceEnabled) {
27 |       logger.trace(message.toString)
28 |     }
29 |   }
30 | 
31 |   def logDebug(message: => Any): Unit = {
32 |     if (logger.isDebugEnabled) {
33 |       logger.debug(message.toString)
34 |     }
35 |   }
36 | 
37 |   def logInfo(message: => Any): Unit = {
38 |     if (logger.isInfoEnabled) {
39 |       logger.info(message.toString)
40 |     }
41 |   }
42 | 
43 |   def logWarn(message: => Any): Unit = {
44 |     logger.warn(message.toString)
45 |   }
46 | 
47 |   def logWarn(message: => Any, t: Throwable): Unit = {
48 |     logger.warn(message.toString, t)
49 |   }
50 | 
51 |   def logError(message: => Any, t: Throwable): Unit = {
52 |     logger.error(message.toString, t)
53 |   }
54 | 
55 |   def logError(message: => Any): Unit = {
56 |     logger.error(message.toString)
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/main/scala/com/hortonworks/spark/atlas/utils/ReflectionHelper.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.utils
 19 | 
 20 | import scala.util.control.NonFatal
 21 | 
 22 | object ReflectionHelper extends Logging {
 23 |   import scala.reflect.runtime.universe.{TermName, runtimeMirror, typeOf, TypeTag}
 24 |   private val currentMirror = runtimeMirror(getClass.getClassLoader)
 25 | 
 26 |   def reflectField[T, OUT](obj: Any, fieldName: String)(implicit ttag: TypeTag[T]): Option[OUT] = {
 27 |     val relMirror = currentMirror.reflect(obj)
 28 | 
 29 |     try {
 30 |       val method = typeOf[T].decl(TermName(fieldName)).asTerm.accessed.asTerm
 31 | 
 32 |       Some(relMirror.reflectField(method).get.asInstanceOf[OUT])
 33 |     } catch {
 34 |       case NonFatal(_) =>
 35 |         logWarn(s"Failed to reflect field $fieldName from $obj. " +
 36 |           s"Maybe missing to apply necessary patch?")
 37 |         None
 38 |     }
 39 |   }
 40 | 
 41 |   def reflectFieldWithContextClassloaderLoosenType(obj: Any, fieldName: String): Option[Any] = {
 42 |     val typeMirror = runtimeMirror(Thread.currentThread().getContextClassLoader)
 43 |     val instanceMirror = typeMirror.reflect(obj)
 44 | 
 45 |     val members = instanceMirror.symbol.typeSignature.members
 46 |     val field = members.find(_.name.decodedName.toString == fieldName)
 47 |     field match {
 48 |       case Some(f) =>
 49 |         try {
 50 |           Some(instanceMirror.reflectField(f.asTerm).get)
 51 |         } catch {
 52 |           case NonFatal(e) =>
 53 |             logWarn(s"Failed to reflect field $fieldName from $obj. " +
 54 |               s"Maybe missing to apply necessary patch? $e")
 55 |             None
 56 |         }
 57 | 
 58 |       case None =>
 59 |         logWarn(s"Failed to reflect field $fieldName from $obj. " +
 60 |           s"Maybe missing to apply necessary patch?")
 61 |         None
 62 |     }
 63 |   }
 64 | 
 65 |   def reflectFieldWithContextClassloader[OUT](obj: Any, fieldName: String): Option[OUT] = {
 66 |     reflectFieldWithContextClassloaderLoosenType(obj, fieldName).map(_.asInstanceOf[OUT])
 67 |   }
 68 | 
 69 |   def reflectMethodWithContextClassloaderLoosenType(
 70 |       obj: Any,
 71 |       methodName: String,
 72 |       params: Any*): Option[Any] = {
 73 |     val typeMirror = runtimeMirror(Thread.currentThread().getContextClassLoader)
 74 |     val instanceMirror = typeMirror.reflect(obj)
 75 | 
 76 |     val members = instanceMirror.symbol.typeSignature.members
 77 |     val method = members.find(_.name.decodedName.toString == methodName)
 78 |     method match {
 79 |       case Some(f) =>
 80 |         try {
 81 |           Some(instanceMirror.reflectMethod(f.asMethod).apply(params))
 82 |         } catch {
 83 |           case NonFatal(_) =>
 84 |             logWarn(s"Failed to call method $methodName from $obj via reflection. " +
 85 |               s"Maybe missing to apply necessary patch?")
 86 |             None
 87 |         }
 88 | 
 89 |       case None =>
 90 |         logWarn(s"Failed to call method $methodName from $obj via reflection. " +
 91 |           s"Maybe missing to apply necessary patch?")
 92 |         None
 93 |     }
 94 |   }
 95 | 
 96 |   def reflectMethodWithContextClassloader[OUT](
 97 |       obj: Any,
 98 |       fieldName: String,
 99 |       params: Any*): Option[OUT] = {
100 |     reflectMethodWithContextClassloaderLoosenType(obj, fieldName, params: _*)
101 |       .map(_.asInstanceOf[OUT])
102 |   }
103 | 
104 |   def classForName(className: String): Class[_] = {
105 |     Class.forName(className, true, getContextOrClassClassLoader)
106 |   }
107 | 
108 |   private def getContextOrClassClassLoader: ClassLoader =
109 |     Option(Thread.currentThread().getContextClassLoader).getOrElse(getClass.getClassLoader)
110 | }
111 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Set everything to be logged to the file target/unit-tests.log
19 | test.appender=file
20 | log4j.rootCategory=INFO, ${test.appender}
21 | log4j.appender.file=org.apache.log4j.FileAppender
22 | log4j.appender.file.append=true
23 | log4j.appender.file.file=target/unit-tests.log
24 | log4j.appender.file.layout=org.apache.log4j.PatternLayout
25 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
26 | 
27 | # Silence some noisy libraries.
28 | log4j.logger.org.apache.http=WARN
29 | log4j.logger.org.apache.spark=INFO
30 | log4j.logger.org.eclipse.jetty=WARN
31 | log4j.logger.org.spark-project.jetty=WARN
32 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/resources/users.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hortonworks-spark/spark-atlas-connector/0b10e337cdfd427744a92f8505d46297afb4c295/spark-atlas-connector/src/test/resources/users.parquet


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/AtlasEntityCreationRequestHelperSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas
 19 | 
 20 | import java.util.UUID
 21 | 
 22 | import com.hortonworks.spark.atlas.sql.KafkaTopicInformation
 23 | import com.hortonworks.spark.atlas.sql.testhelper.CreateEntitiesTrackingAtlasClient
 24 | import com.hortonworks.spark.atlas.types.{external, internal}
 25 | import org.apache.atlas.model.instance.AtlasEntity
 26 | import org.scalatest.{BeforeAndAfterEach, FunSuite}
 27 | 
 28 | class AtlasEntityCreationRequestHelperSuite
 29 |   extends FunSuite
 30 |   with WithHiveSupport
 31 |   with BeforeAndAfterEach {
 32 | 
 33 |   private val client = new CreateEntitiesTrackingAtlasClient
 34 |   private var sut: AtlasEntityCreationRequestHelper = _
 35 | 
 36 |   override protected def beforeEach(): Unit = {
 37 |     client.clearEntities()
 38 |     sut = new AtlasEntityCreationRequestHelper(client)
 39 |   }
 40 | 
 41 |   test("SAC-253 partial sources presented in streaming query") {
 42 |     val cluster = "cl1"
 43 |     val queryId = UUID.randomUUID()
 44 | 
 45 |     val topic1 = KafkaTopicInformation("topic1")
 46 |     val topic2 = KafkaTopicInformation("topic2")
 47 |     val topic3 = KafkaTopicInformation("topic3")
 48 |     val topicSink = KafkaTopicInformation("topicSink")
 49 | 
 50 |     val source1 = external.kafkaToEntity(cluster, topic1)
 51 |     val source2 = external.kafkaToEntity(cluster, topic2)
 52 |     val source3 = external.kafkaToEntity(cluster, topic3)
 53 |     val sink = external.kafkaToEntity(cluster, topicSink)
 54 | 
 55 |     // source1
 56 |     validateInputsOutputs(queryId, Seq(source1), Seq(sink), expectNoCreationRequest = false)
 57 | 
 58 |     client.clearEntities()
 59 | 
 60 |     // source1, source2
 61 |     validateInputsOutputs(queryId, Seq(source1, source2), Seq(sink),
 62 |       expectNoCreationRequest = false)
 63 | 
 64 |     client.clearEntities()
 65 | 
 66 |     // source2, source3
 67 |     validateInputsOutputs(queryId, Seq(source2, source3), Seq(sink),
 68 |       expectNoCreationRequest = false)
 69 | 
 70 |     client.clearEntities()
 71 | 
 72 |     // source1, source2
 73 |     validateInputsOutputs(queryId, Seq(source1, source2), Seq(sink), expectNoCreationRequest = true)
 74 | 
 75 |     client.clearEntities()
 76 | 
 77 |     // source1, source2, source3
 78 |     validateInputsOutputs(queryId, Seq(source1, source2, source3), Seq(sink),
 79 |       expectNoCreationRequest = true)
 80 |   }
 81 | 
 82 |   test("SAC-253 partial sinks presented in streaming query") {
 83 |     val cluster = "cl1"
 84 |     val queryId = UUID.randomUUID()
 85 | 
 86 |     val topic1 = KafkaTopicInformation("topic1")
 87 |     val topic2 = KafkaTopicInformation("topic2")
 88 |     val topic3 = KafkaTopicInformation("topic3")
 89 |     val topicSource = KafkaTopicInformation("topicSource")
 90 | 
 91 |     val source = external.kafkaToEntity(cluster, topicSource)
 92 |     val sink1 = external.kafkaToEntity(cluster, topic1)
 93 |     val sink2 = external.kafkaToEntity(cluster, topic2)
 94 |     val sink3 = external.kafkaToEntity(cluster, topic3)
 95 | 
 96 |     // sink1
 97 |     validateInputsOutputs(queryId, Seq(source), Seq(sink1), expectNoCreationRequest = false)
 98 | 
 99 |     client.clearEntities()
100 | 
101 |     // sink1, sink2
102 |     validateInputsOutputs(queryId, Seq(source), Seq(sink1, sink2), expectNoCreationRequest = false)
103 | 
104 |     client.clearEntities()
105 | 
106 |     // sink2, sink3
107 |     validateInputsOutputs(queryId, Seq(source), Seq(sink2, sink3), expectNoCreationRequest = false)
108 | 
109 |     client.clearEntities()
110 | 
111 |     // sink1, sink2
112 |     validateInputsOutputs(queryId, Seq(source), Seq(sink1, sink2), expectNoCreationRequest = true)
113 | 
114 |     client.clearEntities()
115 | 
116 |     // sink1, sink2, sink3
117 |     validateInputsOutputs(queryId, Seq(source), Seq(sink1, sink2, sink3),
118 |       expectNoCreationRequest = true)
119 |   }
120 | 
121 |   private def validateInputsOutputs(
122 |       queryId: UUID,
123 |       sources: Seq[SACAtlasEntityWithDependencies],
124 |       sinks: Seq[SACAtlasEntityWithDependencies],
125 |       expectNoCreationRequest: Boolean): Unit = {
126 |     val process = internal.etlProcessToEntity(sources, sinks, Map())
127 |     sut.requestCreation(Seq(process), Some(queryId))
128 | 
129 |     if (expectNoCreationRequest) {
130 |       // no entities will be created, as both inputs and outputs are subset of
131 |       // accumulated inputs and outputs
132 |       assert(client.createdEntities.isEmpty)
133 |     } else {
134 |       val allEntities = sources ++ sinks ++ Seq(process)
135 |       assert(client.createdEntities.length === allEntities.length)
136 |       assert(client.createdEntities.toSet === allEntities.map(_.entity).toSet)
137 |     }
138 |   }
139 | }
140 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/BaseResourceIT.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas
 19 | 
 20 | import scala.collection.JavaConverters._
 21 | import com.sun.jersey.core.util.MultivaluedMapImpl
 22 | import org.apache.atlas.AtlasClientV2
 23 | import org.apache.atlas.model.SearchFilter
 24 | import org.apache.atlas.model.instance.AtlasEntity
 25 | import org.apache.atlas.model.typedef.{AtlasStructDef, AtlasTypesDef}
 26 | import org.apache.atlas.utils.AuthenticationUtil
 27 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 28 | 
 29 | abstract class BaseResourceIT extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach {
 30 | 
 31 |   protected var atlasUrls: Array[String] = null
 32 |   private var client: AtlasClientV2 = null
 33 |   protected val atlasClientConf = new AtlasClientConf
 34 |   private var uniquePostfix: Long = 0
 35 | 
 36 |   override protected def beforeAll(): Unit = {
 37 |     super.beforeAll()
 38 | 
 39 |     // set high timeouts so that tests do not fail due to read timeouts while you
 40 |     // are stepping through the code in a debugger
 41 |     atlasClientConf.set("atlas.client.readTimeoutMSecs", "100000000")
 42 |     atlasClientConf.set("atlas.client.connectTimeoutMSecs", "100000000")
 43 |     atlasUrls = Array(atlasClientConf.get(AtlasClientConf.ATLAS_REST_ENDPOINT))
 44 |   }
 45 | 
 46 |   override protected def beforeEach(): Unit = {
 47 |     super.beforeEach()
 48 | 
 49 |     uniquePostfix = System.currentTimeMillis()
 50 |   }
 51 | 
 52 |   private def atlasClient(): AtlasClientV2 = {
 53 |     if (client == null) {
 54 |       if (!AuthenticationUtil.isKerberosAuthenticationEnabled) {
 55 |         client = new AtlasClientV2(atlasUrls, Array[String]("admin", "admin"))
 56 |       } else {
 57 |         client = new AtlasClientV2(atlasUrls: _*)
 58 |       }
 59 |     }
 60 | 
 61 |     client
 62 |   }
 63 | 
 64 |   protected def getTypeDef(name: String): AtlasStructDef = {
 65 |     require(atlasClient != null)
 66 | 
 67 |     val searchParams = new MultivaluedMapImpl()
 68 |     searchParams.add(SearchFilter.PARAM_NAME, name)
 69 |     val searchFilter = new SearchFilter(searchParams)
 70 |     val typesDef = atlasClient.getAllTypeDefs(searchFilter)
 71 |     if (!typesDef.getClassificationDefs.isEmpty) {
 72 |       typesDef.getClassificationDefs.get(0)
 73 |     } else if (!typesDef.getEntityDefs.isEmpty) {
 74 |       typesDef.getEntityDefs.get(0)
 75 |     } else if (!typesDef.getRelationshipDefs.isEmpty) {
 76 |       typesDef.getRelationshipDefs.get(0)
 77 |     } else {
 78 |       null
 79 |     }
 80 |   }
 81 | 
 82 |   protected def updateTypesDef(typesDef: AtlasTypesDef): Unit = {
 83 |     require(atlasClient != null)
 84 | 
 85 |     atlasClient.updateAtlasTypeDefs(typesDef)
 86 |   }
 87 | 
 88 |   protected def deleteTypesDef(typesDef: AtlasTypesDef): Unit = {
 89 |     require(atlasClient != null)
 90 | 
 91 |     atlasClient.deleteAtlasTypeDefs(typesDef)
 92 |   }
 93 | 
 94 |   protected def getEntity(typeName: String, uniqueAttr: String): AtlasEntity = {
 95 |     require(atlasClient != null)
 96 | 
 97 |     atlasClient.getEntityByAttribute(typeName,
 98 |         Map(org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME -> uniqueAttr).asJava)
 99 |       .getEntity
100 |   }
101 | 
102 |   protected def it(desc: String)(testFn: => Unit): Unit = {
103 |     test(desc) {
104 |       assume(
105 |         sys.env.get("ATLAS_INTEGRATION_TEST").contains("true"),
106 |         "integration test can be run only when env ATLAS_INTEGRATION_TEST is set and local Atlas" +
107 |           " is running")
108 |       testFn
109 |     }
110 |   }
111 | 
112 |   protected def uniqueName(name: String): String = {
113 |     s"${name}_$uniquePostfix"
114 |   }
115 | }
116 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/KafkaClientIT.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas
 19 | 
 20 | import java.nio.file.Files
 21 | 
 22 | import scala.concurrent.duration._
 23 | import scala.language.postfixOps
 24 | 
 25 | import org.apache.atlas.AtlasServiceException
 26 | import org.apache.spark.sql.SparkSession
 27 | import org.apache.spark.sql.catalyst.catalog._
 28 | import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 29 | import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 30 | import org.scalatest.Matchers
 31 | 
 32 | import com.hortonworks.spark.atlas.utils.SparkUtils
 33 | 
 34 | class KafkaClientIT extends BaseResourceIT with Matchers {
 35 |   import TestUtils._
 36 | 
 37 |   private var sparkSession: SparkSession = _
 38 | 
 39 |   private var tracker: SparkAtlasEventTracker = _
 40 | 
 41 |   override protected def beforeAll(): Unit = {
 42 |     super.beforeAll()
 43 | 
 44 |     sparkSession = SparkSession.builder()
 45 |       .master("local")
 46 |       .getOrCreate()
 47 | 
 48 |     tracker = new SparkAtlasEventTracker(new KafkaAtlasClient(atlasClientConf), atlasClientConf)
 49 |   }
 50 | 
 51 |   override protected def afterAll(): Unit = {
 52 |     sparkSession.stop()
 53 |     SparkSession.clearActiveSession()
 54 |     SparkSession.clearDefaultSession()
 55 | 
 56 |     super.afterAll()
 57 |   }
 58 | 
 59 |   it("create / update / delete new entities") {
 60 |     val dbName = uniqueName("db2")
 61 |     val tbl1Name = uniqueName("tbl1")
 62 |     val tbl3Name = uniqueName("tbl3")
 63 | 
 64 |     // Create new DB
 65 |     val tempDbPath = Files.createTempDirectory("db_")
 66 |     val dbDefinition = createDB(dbName, tempDbPath.normalize().toUri.toString)
 67 |     SparkUtils.getExternalCatalog().createDatabase(dbDefinition, ignoreIfExists = true)
 68 |     tracker.onOtherEvent(CreateDatabaseEvent(dbName))
 69 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
 70 |       val entity = getEntity(
 71 |         tracker.catalogEventTracker.sparkDbType,
 72 |         tracker.catalogEventTracker.sparkDbUniqueAttribute(dbName))
 73 |       entity should not be (null)
 74 |       entity.getAttribute("name") should be (dbName)
 75 |     }
 76 | 
 77 |     // Create new table
 78 |     val schema = new StructType()
 79 |       .add("user", StringType)
 80 |       .add("age", IntegerType)
 81 |     val sd = CatalogStorageFormat.empty
 82 |     val tableDefinition = createTable(dbName, tbl1Name, schema, sd)
 83 |     SparkUtils.getExternalCatalog().createTable(tableDefinition, ignoreIfExists = true)
 84 |     tracker.onOtherEvent(CreateTableEvent(dbName, tbl1Name))
 85 | 
 86 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
 87 |       val sdEntity = getEntity(tracker.catalogEventTracker.sparkStorageFormatType,
 88 |         tracker.catalogEventTracker.sparkStorageFormatUniqueAttribute(dbName, tbl1Name))
 89 |       sdEntity should not be (null)
 90 | 
 91 |       val tblEntity = getEntity(tracker.catalogEventTracker.sparkTableType,
 92 |         tracker.catalogEventTracker.sparkTableUniqueAttribute(dbName, tbl1Name))
 93 |       tblEntity should not be (null)
 94 |       tblEntity.getAttribute("name") should be (tbl1Name)
 95 |     }
 96 | 
 97 |     // Rename table
 98 |     SparkUtils.getExternalCatalog().renameTable(dbName, tbl1Name, tbl3Name)
 99 |     tracker.onOtherEvent(RenameTableEvent(dbName, tbl1Name, tbl3Name))
100 |     val newTblDef = SparkUtils.getExternalCatalog().getTable(dbName, tbl3Name)
101 | 
102 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
103 |       val tblEntity = getEntity(tracker.catalogEventTracker.sparkTableType,
104 |         tracker.catalogEventTracker.sparkTableUniqueAttribute(dbName, tbl3Name))
105 |       tblEntity should not be (null)
106 |       tblEntity.getAttribute("name") should be (tbl3Name)
107 | 
108 |       val sdEntity = getEntity(tracker.catalogEventTracker.sparkStorageFormatType,
109 |         tracker.catalogEventTracker.sparkStorageFormatUniqueAttribute(dbName, tbl3Name))
110 |       sdEntity should not be (null)
111 |     }
112 | 
113 |     // Drop table
114 |     tracker.onOtherEvent(DropTablePreEvent(dbName, tbl3Name))
115 |     tracker.onOtherEvent(DropTableEvent(dbName, tbl3Name))
116 | 
117 |     // sleeping 2 secs - we have to do this to ensure there's no call on deletion, unfortunately...
118 |     Thread.sleep(2 * 1000)
119 |     // deletion request should not be added
120 |     val tblEntity = getEntity(tracker.catalogEventTracker.sparkTableType,
121 |       tracker.catalogEventTracker.sparkTableUniqueAttribute(dbName, tbl3Name))
122 |     tblEntity should not be (null)
123 |   }
124 | 
125 | }
126 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/TestUtils.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.net.URI
21 | 
22 | import org.apache.spark.sql.catalyst.TableIdentifier
23 | import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType}
24 | import org.apache.spark.sql.types.StructType
25 | import com.hortonworks.spark.atlas.utils.SparkUtils
26 | import org.apache.atlas.model.instance.AtlasObjectId
27 | 
28 | object TestUtils {
29 |   def createDB(name: String, location: String): CatalogDatabase = {
30 |     CatalogDatabase(name, "", new URI(location), Map.empty)
31 |   }
32 | 
33 |   def createStorageFormat(
34 |       locationUri: Option[URI] = None,
35 |       inputFormat: Option[String] = None,
36 |       outputFormat: Option[String] = None,
37 |       serd: Option[String] = None,
38 |       compressed: Boolean = false,
39 |       properties: Map[String, String] = Map.empty): CatalogStorageFormat = {
40 |     CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties)
41 |   }
42 | 
43 |   def createTable(
44 |       db: String,
45 |       table: String,
46 |       schema: StructType,
47 |       storage: CatalogStorageFormat,
48 |       isHiveTable: Boolean = false): CatalogTable = {
49 |     CatalogTable(
50 |       TableIdentifier(table, Some(db)),
51 |       CatalogTableType.MANAGED,
52 |       storage,
53 |       schema,
54 |       provider = if (isHiveTable) Some("hive") else None,
55 |       bucketSpec = None,
56 |       owner = SparkUtils.currUser())
57 |   }
58 | 
59 |   def assertSubsetOf[T](set: Set[T], subset: Set[T]): Unit = {
60 |     assert(subset.subsetOf(set), s"$subset is not a subset of $set")
61 |   }
62 | 
63 |   def findEntity(
64 |                   entities: Seq[SACAtlasReferenceable],
65 |                   objId: AtlasObjectId): Option[SACAtlasReferenceable] = {
66 |     entities.find(p => p.asObjectId == objId)
67 |   }
68 | 
69 |   def findEntities(
70 |                     entities: Seq[SACAtlasReferenceable],
71 |                     objIds: Seq[AtlasObjectId]): Seq[SACAtlasReferenceable] = {
72 |     entities.filter(p => objIds.contains(p.asObjectId))
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/WithHDFSSupport.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.io.File
21 | 
22 | import org.apache.hadoop.conf.Configuration
23 | import org.apache.hadoop.fs.FileUtil
24 | import org.apache.hadoop.hdfs.MiniDFSCluster
25 | import org.apache.spark.sql.SparkSession
26 | import org.scalatest.{BeforeAndAfterAll, Suite}
27 | 
28 | trait WithHDFSSupport extends BeforeAndAfterAll { self: Suite =>
29 | 
30 |   protected var sparkSession: SparkSession = _
31 | 
32 |   private var hdfsCluster: MiniDFSCluster = _
33 |   protected var hdfsURI: String = _
34 | 
35 |   private def cleanupAnyExistingSession(): Unit = {
36 |     val session = SparkSession.getActiveSession.orElse(SparkSession.getDefaultSession)
37 |     if (session.isDefined) {
38 |       session.get.sessionState.catalog.reset()
39 |       session.get.stop()
40 |       SparkSession.clearActiveSession()
41 |       SparkSession.clearDefaultSession()
42 |     }
43 |   }
44 | 
45 |   override protected def beforeAll(): Unit = {
46 |     super.beforeAll()
47 | 
48 |     cleanupAnyExistingSession()
49 | 
50 |     val baseDir = new File("./target/hdfs/").getAbsoluteFile()
51 |     FileUtil.fullyDelete(baseDir)
52 | 
53 |     val conf = new Configuration()
54 |     conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath())
55 |     val builder = new MiniDFSCluster.Builder(conf)
56 | 
57 |     hdfsCluster = builder.build()
58 |     hdfsURI = s"hdfs://localhost:${hdfsCluster.getNameNodePort()}/"
59 | 
60 |     sparkSession = SparkSession.builder()
61 |       .master("local")
62 |       .appName(this.getClass.getCanonicalName)
63 |       .enableHiveSupport()
64 |       .config("spark.hadoop.fs.defaultFS", hdfsURI)
65 |       .config("spark.ui.enabled", "false")
66 |       .getOrCreate()
67 |   }
68 | 
69 |   override protected def afterAll(): Unit = {
70 |     try {
71 |       sparkSession.sessionState.catalog.reset()
72 |       sparkSession.stop()
73 |       SparkSession.clearActiveSession()
74 |       SparkSession.clearDefaultSession()
75 |     } finally {
76 |       sparkSession = null
77 |     }
78 |     System.clearProperty("spark.driver.port")
79 | 
80 |     hdfsCluster.shutdown(true)
81 | 
82 |     super.afterAll()
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/WithHiveSupport.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.io.File
21 | import java.nio.file.Files
22 | 
23 | import org.apache.commons.io.FileUtils
24 | import org.apache.spark.sql.SparkSession
25 | import org.scalatest.{BeforeAndAfterAll, Suite}
26 | 
27 | trait WithHiveSupport extends BeforeAndAfterAll { self: Suite =>
28 | 
29 |   protected var sparkSession: SparkSession = _
30 | 
31 |   private var metastoreDir: String = _
32 |   private var warehouseDir: String = _
33 | 
34 |   private def cleanupAnyExistingSession(): Unit = {
35 |     val session = SparkSession.getActiveSession.orElse(SparkSession.getDefaultSession)
36 |     if (session.isDefined) {
37 |       session.get.sessionState.catalog.reset()
38 |       session.get.stop()
39 |       SparkSession.clearActiveSession()
40 |       SparkSession.clearDefaultSession()
41 |     }
42 |   }
43 | 
44 |   override protected def beforeAll(): Unit = {
45 |     super.beforeAll()
46 | 
47 |     cleanupAnyExistingSession()
48 | 
49 |     metastoreDir = Files.createTempDirectory("sac-metastore-").toString
50 |     warehouseDir = Files.createTempDirectory("sac-warehouse-").toString
51 |     System.setProperty("derby.system.home", metastoreDir)
52 |     sparkSession = SparkSession.builder()
53 |       .master("local")
54 |       .appName(this.getClass.getCanonicalName)
55 |       .enableHiveSupport()
56 |       .config("spark.ui.enabled", "false")
57 |       .config("spark.sql.warehouse.dir", warehouseDir)
58 |       .getOrCreate()
59 |   }
60 | 
61 |   override protected def afterAll(): Unit = {
62 |     try {
63 |       sparkSession.sessionState.catalog.reset()
64 |       sparkSession.stop()
65 |       SparkSession.clearActiveSession()
66 |       SparkSession.clearDefaultSession()
67 |     } finally {
68 |       sparkSession = null
69 |       FileUtils.deleteDirectory(new File(warehouseDir))
70 |     }
71 |     System.clearProperty("spark.driver.port")
72 | 
73 |     super.afterAll()
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/WithRemoteHiveMetastoreServiceSupport.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas
19 | 
20 | import java.io.File
21 | import java.nio.file.Files
22 | 
23 | import com.hortonworks.spark.atlas.utils.SparkUtils
24 | import com.hotels.beeju.ThriftHiveMetaStoreTestUtil
25 | import org.apache.commons.io.FileUtils
26 | import org.apache.spark.sql.SparkSession
27 | import org.scalatest.{BeforeAndAfterAll, Suite}
28 | 
29 | trait WithRemoteHiveMetastoreServiceSupport extends BeforeAndAfterAll { self: Suite =>
30 |   protected val dbName = "sac_hive_metastore"
31 | 
32 |   protected var sparkSession: SparkSession = _
33 | 
34 |   private var warehouseDir: String = _
35 | 
36 |   private val hive = new ThriftHiveMetaStoreTestUtil(dbName)
37 | 
38 |   private def cleanupAnyExistingSession(): Unit = {
39 |     val session = SparkSession.getActiveSession.orElse(SparkSession.getDefaultSession)
40 |     if (session.isDefined) {
41 |       session.get.sessionState.catalog.reset()
42 |       session.get.stop()
43 |       SparkSession.clearActiveSession()
44 |       SparkSession.clearDefaultSession()
45 |     }
46 |   }
47 | 
48 |   override protected def beforeAll(): Unit = {
49 |     super.beforeAll()
50 | 
51 |     cleanupAnyExistingSession()
52 | 
53 |     hive.before()
54 | 
55 |     warehouseDir = Files.createTempDirectory("sac-warehouse-").toString
56 |     sparkSession = SparkSession.builder()
57 |       .master("local")
58 |       .appName(this.getClass.getCanonicalName)
59 |       .enableHiveSupport()
60 |       .config("spark.ui.enabled", "false")
61 |       .config("spark.sql.warehouse.dir", warehouseDir)
62 |       .config("spark.hadoop.hive.metastore.uris", hive.getThriftConnectionUri)
63 |       .getOrCreate()
64 | 
65 |     // reset hiveConf to make sure the configuration change takes effect
66 |     SparkUtils.resetHiveConf
67 |   }
68 | 
69 |   override protected def afterAll(): Unit = {
70 |     try {
71 |       hive.after()
72 |       sparkSession.sessionState.catalog.reset()
73 |       sparkSession.stop()
74 |       SparkSession.clearActiveSession()
75 |       SparkSession.clearDefaultSession()
76 |     } finally {
77 |       // reset hiveConf again to prevent affecting other tests
78 |       SparkUtils.resetHiveConf
79 | 
80 |       sparkSession = null
81 |       FileUtils.deleteDirectory(new File(warehouseDir))
82 |     }
83 |     System.clearProperty("spark.driver.port")
84 | 
85 |     super.afterAll()
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/ml/MLPipelineTrackerIT.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.ml
19 | 
20 | import org.apache.spark.ml.Pipeline
21 | import org.apache.spark.ml.feature.MinMaxScaler
22 | import org.apache.spark.ml.linalg.Vectors
23 | import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
24 | import org.scalatest.Matchers
25 | import com.hortonworks.spark.atlas._
26 | import com.hortonworks.spark.atlas.types._
27 | import com.hortonworks.spark.atlas.TestUtils._
28 | 
29 | class MLPipelineTrackerIT extends BaseResourceIT with Matchers with WithHiveSupport {
30 |   private val atlasClient = new RestAtlasClient(atlasClientConf)
31 | 
32 |   def clusterName: String = atlasClientConf.get(AtlasClientConf.CLUSTER_NAME)
33 | 
34 |   def getTableEntity(tableName: String): SACAtlasEntityWithDependencies = {
35 |     val dbDefinition = createDB("db1", "hdfs:///test/db/db1")
36 |     val sd = createStorageFormat()
37 |     val schema = new StructType()
38 |       .add("user", StringType, false)
39 |       .add("age", IntegerType, true)
40 |     val tableDefinition = createTable("db1", s"$tableName", schema, sd)
41 |     internal.sparkTableToEntity(tableDefinition, clusterName, Some(dbDefinition))
42 |   }
43 | 
44 |   // Enable it to run integrated test.
45 |   it("pipeline and pipeline model") {
46 |     val uri = "hdfs://"
47 |     val pipelineDir = "tmp/pipeline"
48 |     val modelDir = "tmp/model"
49 | 
50 |     val pipelineDirEntity = internal.mlDirectoryToEntity(uri, pipelineDir)
51 |     val modelDirEntity = internal.mlDirectoryToEntity(uri, modelDir)
52 | 
53 |     atlasClient.createEntitiesWithDependencies(Seq(pipelineDirEntity, modelDirEntity))
54 | 
55 |     val df = sparkSession.createDataFrame(Seq(
56 |       (1, Vectors.dense(0.0, 1.0, 4.0), 1.0),
57 |       (2, Vectors.dense(1.0, 0.0, 4.0), 2.0),
58 |       (3, Vectors.dense(1.0, 0.0, 5.0), 3.0),
59 |       (4, Vectors.dense(0.0, 0.0, 5.0), 4.0)
60 |     )).toDF("id", "features", "label")
61 | 
62 |     val scaler = new MinMaxScaler()
63 |       .setInputCol("features")
64 |       .setOutputCol("features_scaled")
65 |       .setMin(0.0)
66 |       .setMax(3.0)
67 |     val pipeline = new Pipeline().setStages(Array(scaler))
68 | 
69 |     val model = pipeline.fit(df)
70 | 
71 |     pipeline.write.overwrite().save(pipelineDir)
72 | 
73 |     val pipelineEntity = internal.mlPipelineToEntity(pipeline.uid, pipelineDirEntity)
74 | 
75 |     atlasClient.createEntitiesWithDependencies(Seq(pipelineDirEntity, pipelineEntity))
76 | 
77 |     val modelEntity = internal.mlModelToEntity(model.uid, modelDirEntity)
78 | 
79 |     atlasClient.createEntitiesWithDependencies(Seq(modelDirEntity, modelEntity))
80 | 
81 |     val tableEntities1 = getTableEntity("chris1")
82 |     val tableEntities2 = getTableEntity("chris2")
83 | 
84 |     atlasClient.createEntitiesWithDependencies(tableEntities1)
85 |     atlasClient.createEntitiesWithDependencies(tableEntities2)
86 | 
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/CatalogEventToAtlasIT.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import java.nio.file.Files
 21 | 
 22 | import scala.concurrent.duration._
 23 | import scala.language.postfixOps
 24 | 
 25 | import org.apache.atlas.AtlasServiceException
 26 | import org.apache.spark.sql.SparkSession
 27 | import org.apache.spark.sql.catalyst.catalog._
 28 | import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 29 | import org.scalatest.Matchers
 30 | import org.scalatest.concurrent.Eventually._
 31 | 
 32 | import com.hortonworks.spark.atlas.utils.SparkUtils
 33 | import com.hortonworks.spark.atlas.{BaseResourceIT, RestAtlasClient, TestUtils}
 34 | 
 35 | class CatalogEventToAtlasIT extends BaseResourceIT with Matchers {
 36 |   import TestUtils._
 37 | 
 38 |   private var sparkSession: SparkSession = _
 39 | 
 40 |   private var processor: SparkCatalogEventProcessor = _
 41 | 
 42 |   override protected def beforeAll(): Unit = {
 43 |     super.beforeAll()
 44 |     sparkSession = SparkSession.builder()
 45 |       .master("local")
 46 |       .getOrCreate()
 47 |     processor =
 48 |       new SparkCatalogEventProcessor(new RestAtlasClient(atlasClientConf), atlasClientConf)
 49 |     processor.startThread()
 50 |   }
 51 | 
 52 |   override def afterAll(): Unit = {
 53 |     sparkSession.stop()
 54 |     SparkSession.clearActiveSession()
 55 |     SparkSession.clearDefaultSession()
 56 |     super.afterAll()
 57 |   }
 58 | 
 59 |   it("catalog spark db event to Atlas entities") {
 60 |     val dbName = uniqueName("db1")
 61 | 
 62 |     // Create db entity in Atlas and make sure we get it from Atlas
 63 |     val tempDbPath = Files.createTempDirectory("db_")
 64 |     val dbDefinition = createDB(dbName, tempDbPath.normalize().toUri.toString)
 65 |     SparkUtils.getExternalCatalog().createDatabase(dbDefinition, ignoreIfExists = true)
 66 |     processor.pushEvent(CreateDatabaseEvent(dbName))
 67 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
 68 |       val entity = getEntity(processor.sparkDbType, processor.sparkDbUniqueAttribute(dbName))
 69 |       entity should not be (null)
 70 |       entity.getAttribute("name") should be (dbName)
 71 |       entity.getAttribute("owner") should be (SparkUtils.currUser())
 72 |       entity.getAttribute("ownerType") should be ("USER")
 73 |     }
 74 | 
 75 |     // Drop DB from external catalog to make sure we also delete the corresponding Atlas entity
 76 |     SparkUtils.getExternalCatalog().dropDatabase(dbName, ignoreIfNotExists = true, cascade = false)
 77 |     processor.pushEvent(DropDatabaseEvent(dbName))
 78 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
 79 |       intercept[AtlasServiceException](
 80 |         getEntity(processor.sparkDbType, processor.sparkDbUniqueAttribute(dbName)))
 81 |     }
 82 |   }
 83 | 
 84 |   it("catalog spark table event to Atlas entities") {
 85 |     val dbName = uniqueName("db2")
 86 |     val tbl1Name = uniqueName("tbl1")
 87 |     val tbl2Name = uniqueName("tbl2")
 88 | 
 89 |     val tempDbPath = Files.createTempDirectory("db_")
 90 |     val dbDefinition = createDB(dbName, tempDbPath.normalize().toUri.toString)
 91 |     SparkUtils.getExternalCatalog().createDatabase(dbDefinition, ignoreIfExists = true)
 92 |     processor.pushEvent(CreateDatabaseEvent(dbName))
 93 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
 94 |       val entity = getEntity(processor.sparkDbType, processor.sparkDbUniqueAttribute(dbName))
 95 |       entity should not be (null)
 96 |       entity.getAttribute("name") should be (dbName)
 97 |     }
 98 | 
 99 |     // Create new table
100 |     val schema = new StructType()
101 |       .add("user", StringType)
102 |       .add("age", IntegerType)
103 |     val sd = CatalogStorageFormat.empty
104 |     val tableDefinition = createTable(dbName, tbl1Name, schema, sd)
105 |     SparkUtils.getExternalCatalog().createTable(tableDefinition, ignoreIfExists = true)
106 |     processor.pushEvent(CreateTableEvent(dbName, tbl1Name))
107 | 
108 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
109 |       val sdEntity = getEntity(processor.sparkStorageFormatType,
110 |         processor.sparkStorageFormatUniqueAttribute(dbName, tbl1Name))
111 |       sdEntity should not be (null)
112 | 
113 |       val tblEntity = getEntity(processor.sparkTableType,
114 |         processor.sparkTableUniqueAttribute(dbName, tbl1Name))
115 |       tblEntity should not be (null)
116 |       tblEntity.getAttribute("name") should be (tbl1Name)
117 |     }
118 | 
119 |     // Rename table
120 |     SparkUtils.getExternalCatalog().renameTable(dbName, tbl1Name, tbl2Name)
121 |     processor.pushEvent(RenameTableEvent(dbName, tbl1Name, tbl2Name))
122 |     eventually(timeout(30 seconds), interval(100 milliseconds)) {
123 |       val tblEntity = getEntity(processor.sparkTableType,
124 |         processor.sparkTableUniqueAttribute(dbName, tbl2Name))
125 |       tblEntity should not be (null)
126 |       tblEntity.getAttribute("name") should be (tbl2Name)
127 | 
128 |       val sdEntity = getEntity(processor.sparkStorageFormatType,
129 |         processor.sparkStorageFormatUniqueAttribute(dbName, tbl2Name))
130 |       sdEntity should not be (null)
131 |     }
132 | 
133 |     // Drop table
134 |     val tblDef2 = SparkUtils.getExternalCatalog().getTable(dbName, tbl2Name)
135 |     processor.pushEvent(DropTablePreEvent(dbName, tbl2Name))
136 |     processor.pushEvent(DropTableEvent(dbName, tbl2Name))
137 | 
138 |     // sleeping 2 secs - we have to do this to ensure there's no call on deletion, unfortunately...
139 |     Thread.sleep(2 * 1000)
140 |     // deletion request should not be added
141 |     val tblEntity = getEntity(processor.sparkTableType,
142 |       processor.sparkTableUniqueAttribute(dbName, tbl2Name))
143 |     tblEntity should not be (null)
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/CreateDataSourceTableAsSelectHarvesterSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql
19 | 
20 | import com.hortonworks.spark.atlas.types.metadata
21 | 
22 | import scala.util.Random
23 | import com.hortonworks.spark.atlas.{SACAtlasEntityWithDependencies, WithHiveSupport}
24 | import com.hortonworks.spark.atlas.utils.SparkUtils
25 | import org.apache.atlas.model.instance.AtlasEntity
26 | import org.apache.spark.sql.SaveMode
27 | import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
28 | import org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand
29 | import org.apache.spark.sql.execution.datasources.DataSource
30 | import org.apache.spark.sql.types.StructType
31 | import org.scalatest.{FunSuite, Matchers}
32 | 
33 | // This is not leveraging BaseHarvesterSuite, as it doesn't need to be tested with
34 | // both non-remote HMS and remote HMS cases.
35 | class CreateDataSourceTableAsSelectHarvesterSuite
36 |     extends FunSuite with Matchers with WithHiveSupport {
37 | 
38 |   private val sourceTblName = "source_" + Random.nextInt(100000)
39 | 
40 |   override protected def beforeAll(): Unit = {
41 |     super.beforeAll()
42 | 
43 |     sparkSession.sql(s"CREATE TABLE $sourceTblName (name string, age int)")
44 |   }
45 | 
46 |   test("saveAsTable should have output entity having table details - parquet") {
47 |     testWithProvider("parquet")
48 |   }
49 | 
50 |   test("saveAsTable should have output entity having table details - hive") {
51 |     val entity = testWithProvider("hive")
52 |     assert(entity.getAttribute("partitionProvider") == "Catalog")
53 |   }
54 | 
55 |   def testWithProvider(provider: String): AtlasEntity = {
56 |     val destTblName = "dest1_" + Random.nextInt(100000)
57 |     val df = sparkSession.sql(s"SELECT * FROM $sourceTblName")
58 | 
59 |     // The codes below look after DataFrameWriter.saveAsTable codes as of Spark 2.4.
60 |     // It uses internal APIs for this test. If the compatibility is broken, we should better
61 |     // just remove this test.
62 |     val tableIdent = df.sparkSession.sessionState.sqlParser.parseTableIdentifier(destTblName)
63 |     val storage = DataSource.buildStorageFormatFromOptions(Map("path" -> "/tmp/foo"))
64 |     val tableDesc = CatalogTable(
65 |       identifier = tableIdent,
66 |       tableType = CatalogTableType.EXTERNAL,
67 |       storage = storage,
68 |       schema = new StructType,
69 |       provider = Some(provider),
70 |       partitionColumnNames = Nil,
71 |       bucketSpec = None)
72 |     val cmd = CreateDataSourceTableAsSelectCommand(
73 |       tableDesc,
74 |       SaveMode.ErrorIfExists,
75 |       df.queryExecution.logical,
76 |       Seq("name", "age"))
77 |     val newTable = tableDesc.copy(
78 |       storage = tableDesc.storage.copy(),
79 |       schema = df.schema)
80 |     sparkSession.sessionState.catalog.createTable(
81 |       newTable, ignoreIfExists = false, validateLocation = false)
82 | 
83 |     val qd = QueryDetail(df.queryExecution, 0L)
84 |     val entities = CommandsHarvester.CreateDataSourceTableAsSelectHarvester.harvest(cmd, qd)
85 |     val processDeps = entities.head.asInstanceOf[SACAtlasEntityWithDependencies].dependencies
86 |     val maybeEntity = processDeps.find(_.typeName == metadata.TABLE_TYPE_STRING)
87 |       .map(_.asInstanceOf[SACAtlasEntityWithDependencies].entity)
88 | 
89 |     assert(maybeEntity.isDefined, s"Output entity for table [$destTblName] was not found.")
90 |     assert(maybeEntity.get.getAttribute("name") == destTblName)
91 |     assert(maybeEntity.get.getAttribute("owner") == SparkUtils.currUser())
92 |     assert(maybeEntity.get.getAttribute("schemaDesc") == "struct<name:string,age:int>")
93 |     assert(maybeEntity.get.getAttribute("provider") == provider)
94 |     maybeEntity.get
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/CreateViewHarvesterSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import scala.util.Random
 21 | import org.apache.spark.sql.execution.command.{CreateViewCommand, ExecutedCommandExec}
 22 | import com.hortonworks.spark.atlas._
 23 | import com.hortonworks.spark.atlas.sql.testhelper.BaseHarvesterSuite
 24 | import org.apache.spark.sql.SparkSession
 25 | 
 26 | abstract class BaseCreateViewHarvesterSuite
 27 |   extends BaseHarvesterSuite {
 28 | 
 29 |   private val sourceTblName = "source_" + Random.nextInt(100000)
 30 |   private val destinationViewName = "destination_" + Random.nextInt(100000)
 31 |   private val destinationViewName2 = "destination_" + Random.nextInt(100000)
 32 | 
 33 |   protected override def initializeTestEnvironment(): Unit = {
 34 |     prepareDatabase()
 35 | 
 36 |     _spark.sql(s"CREATE TABLE $sourceTblName (name string)")
 37 |     _spark.sql(s"INSERT INTO TABLE $sourceTblName VALUES ('lucy'), ('tom')")
 38 |   }
 39 | 
 40 |   protected override def cleanupTestEnvironment(): Unit = {
 41 |     cleanupDatabase()
 42 |   }
 43 | 
 44 |   test("CREATE VIEW FROM TABLE") {
 45 |     val qe = _spark.sql(s"CREATE VIEW $destinationViewName " +
 46 |       s"AS SELECT * FROM $sourceTblName").queryExecution
 47 |     val qd = QueryDetail(qe, 0L)
 48 | 
 49 |     assert(qe.sparkPlan.isInstanceOf[ExecutedCommandExec])
 50 |     val node = qe.sparkPlan.asInstanceOf[ExecutedCommandExec]
 51 |     assert(node.cmd.isInstanceOf[CreateViewCommand])
 52 |     val cmd = node.cmd.asInstanceOf[CreateViewCommand]
 53 | 
 54 |     val entities = CommandsHarvester.CreateViewHarvester.harvest(cmd, qd)
 55 |     validateProcessEntity(entities.head, _ => {}, inputs => {
 56 |       inputs.size should be (1)
 57 |       assertTable(inputs.head, sourceTblName)
 58 |     }, outputs => {
 59 |       outputs.size should be (1)
 60 |       assertTable(outputs.head, destinationViewName)
 61 |     })
 62 |   }
 63 | 
 64 |   test("CREATE VIEW without source") {
 65 |     val qe = _spark.sql(s"CREATE VIEW $destinationViewName2 " +
 66 |       s"AS SELECT 1").queryExecution
 67 |     val qd = QueryDetail(qe, 0L)
 68 | 
 69 |     assert(qe.sparkPlan.isInstanceOf[ExecutedCommandExec])
 70 |     val node = qe.sparkPlan.asInstanceOf[ExecutedCommandExec]
 71 |     assert(node.cmd.isInstanceOf[CreateViewCommand])
 72 |     val cmd = node.cmd.asInstanceOf[CreateViewCommand]
 73 | 
 74 |     val entities = CommandsHarvester.CreateViewHarvester.harvest(cmd, qd)
 75 |     validateProcessEntity(entities.head, _ => {}, inputs => {
 76 |       inputs.size should be (0)
 77 |     }, outputs => {
 78 |       outputs.size should be (1)
 79 |       assertTable(outputs.head, destinationViewName2)
 80 |     })
 81 |   }
 82 | }
 83 | 
 84 | class CreateViewHarvesterSuite
 85 |   extends BaseCreateViewHarvesterSuite
 86 |   with WithHiveSupport {
 87 | 
 88 |   override def beforeAll(): Unit = {
 89 |     super.beforeAll()
 90 |     initializeTestEnvironment()
 91 |   }
 92 | 
 93 |   override def afterAll(): Unit = {
 94 |     cleanupTestEnvironment()
 95 |     super.afterAll()
 96 |   }
 97 | 
 98 |   override protected def getSparkSession: SparkSession = sparkSession
 99 | 
100 |   override protected def getDbName: String = "sac"
101 | 
102 |   override protected def expectSparkTableModels: Boolean = true
103 | }
104 | 
105 | class CreateViewHarvesterWithRemoteHMSSuite
106 |   extends BaseCreateViewHarvesterSuite
107 |   with WithRemoteHiveMetastoreServiceSupport {
108 | 
109 |   override def beforeAll(): Unit = {
110 |     super.beforeAll()
111 |     initializeTestEnvironment()
112 |   }
113 | 
114 |   override def afterAll(): Unit = {
115 |     cleanupTestEnvironment()
116 |     super.afterAll()
117 |   }
118 | 
119 |   override protected def getSparkSession: SparkSession = sparkSession
120 | 
121 |   override protected def getDbName: String = dbName
122 | 
123 |   override protected def expectSparkTableModels: Boolean = false
124 | }
125 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/InsertIntoHiveDirHarvesterSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import java.io.File
 21 | 
 22 | import scala.util.Random
 23 | import org.apache.spark.sql.execution.command.DataWritingCommandExec
 24 | import org.apache.spark.sql.hive.execution.InsertIntoHiveDirCommand
 25 | import com.hortonworks.spark.atlas.{WithHiveSupport, WithRemoteHiveMetastoreServiceSupport}
 26 | import com.hortonworks.spark.atlas.sql.testhelper.{BaseHarvesterSuite, FsEntityValidator}
 27 | import org.apache.spark.sql.SparkSession
 28 | 
 29 | abstract class BaseInsertIntoHiveDirHarvesterSuite
 30 |   extends BaseHarvesterSuite
 31 |   with FsEntityValidator {
 32 | 
 33 |   private val sourceTblName = "source_" + Random.nextInt(100000)
 34 | 
 35 |   protected override def initializeTestEnvironment(): Unit = {
 36 |     prepareDatabase()
 37 | 
 38 |     _spark.sql(s"CREATE TABLE $sourceTblName (name string)")
 39 |     _spark.sql(s"INSERT INTO TABLE $sourceTblName VALUES ('a'), ('b'), ('c')")
 40 |   }
 41 | 
 42 |   override protected def cleanupTestEnvironment(): Unit = {
 43 |     cleanupDatabase()
 44 |   }
 45 | 
 46 |   test("INSERT OVERWRITE DIRECTORY path...") {
 47 |     val qe = _spark.sql(s"INSERT OVERWRITE DIRECTORY 'target/dir1' " +
 48 |       s"SELECT * FROM $sourceTblName").queryExecution
 49 |     val qd = QueryDetail(qe, 0L)
 50 | 
 51 |     assert(qe.sparkPlan.isInstanceOf[DataWritingCommandExec])
 52 |     val node = qe.sparkPlan.asInstanceOf[DataWritingCommandExec]
 53 |     assert(node.cmd.isInstanceOf[InsertIntoHiveDirCommand])
 54 |     val cmd = node.cmd.asInstanceOf[InsertIntoHiveDirCommand]
 55 | 
 56 |     val entities = CommandsHarvester.InsertIntoHiveDirHarvester.harvest(cmd, qd)
 57 |     validateProcessEntity(entities.head, _ => {}, inputs => {
 58 |       inputs.size should be (1)
 59 |       assertTable(inputs.head, _dbName, sourceTblName, _clusterName, _useSparkTable)
 60 |     }, outputs => {
 61 |       outputs.size should be (1)
 62 |       val dir = new File("target/dir1").getAbsolutePath
 63 |       assertFsEntity(outputs.head, dir)
 64 |     })
 65 |   }
 66 | }
 67 | 
 68 | class InsertIntoHiveDirHarvesterSuite
 69 |   extends BaseInsertIntoHiveDirHarvesterSuite
 70 |   with WithHiveSupport {
 71 | 
 72 |   override def beforeAll(): Unit = {
 73 |     super.beforeAll()
 74 |     initializeTestEnvironment()
 75 |   }
 76 | 
 77 |   override def afterAll(): Unit = {
 78 |     cleanupTestEnvironment()
 79 |     super.afterAll()
 80 |   }
 81 | 
 82 |   override protected def getSparkSession: SparkSession = sparkSession
 83 | 
 84 |   override protected def getDbName: String = "sac"
 85 | 
 86 |   override protected def expectSparkTableModels: Boolean = true
 87 | }
 88 | 
 89 | class InsertIntoHiveDirHarvesterWithRemoteHMSSuite
 90 |   extends BaseInsertIntoHiveDirHarvesterSuite
 91 |   with WithRemoteHiveMetastoreServiceSupport {
 92 | 
 93 |   override def beforeAll(): Unit = {
 94 |     super.beforeAll()
 95 |     initializeTestEnvironment()
 96 |   }
 97 | 
 98 |   override def afterAll(): Unit = {
 99 |     cleanupTestEnvironment()
100 |     super.afterAll()
101 |   }
102 | 
103 |   override protected def getSparkSession: SparkSession = sparkSession
104 | 
105 |   override protected def getDbName: String = dbName
106 | 
107 |   override protected def expectSparkTableModels: Boolean = false
108 | }
109 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/LoadDataHarvesterSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import java.io.{FileOutputStream, PrintWriter}
 21 | import java.nio.file.Files
 22 | 
 23 | import scala.util.Random
 24 | import org.apache.spark.sql.execution.LeafExecNode
 25 | import org.apache.spark.sql.execution.command.{ExecutedCommandExec, LoadDataCommand}
 26 | import com.hortonworks.spark.atlas.types.external
 27 | import com.hortonworks.spark.atlas._
 28 | import com.hortonworks.spark.atlas.sql.testhelper.BaseHarvesterSuite
 29 | import org.apache.spark.sql.SparkSession
 30 | 
 31 | abstract class BaseLoadDataHarvesterSuite
 32 |   extends BaseHarvesterSuite {
 33 | 
 34 |   protected val sourceTblName = "source_" + Random.nextInt(100000)
 35 | 
 36 |   protected override def initializeTestEnvironment(): Unit = {
 37 |     prepareDatabase()
 38 | 
 39 |     _spark.sql(s"CREATE TABLE $sourceTblName (name string)")
 40 |   }
 41 | 
 42 |   override protected def cleanupTestEnvironment(): Unit = {
 43 |     cleanupDatabase()
 44 |   }
 45 | 
 46 |   test("LOAD DATA [LOCAL] INPATH path source") {
 47 |     val file = Files.createTempFile("input", ".txt").toFile
 48 |     val out = new PrintWriter(new FileOutputStream(file))
 49 |     out.write("a\nb\nc\nd\n")
 50 |     out.close()
 51 | 
 52 |     val qe = _spark.sql(s"LOAD DATA LOCAL INPATH '${file.getAbsolutePath}' " +
 53 |       s"OVERWRITE INTO  TABLE $sourceTblName").queryExecution
 54 |     val qd = QueryDetail(qe, 0L)
 55 |     val node = qe.sparkPlan.collect { case p: LeafExecNode => p }
 56 |     assert(node.size == 1)
 57 |     val execNode = node.head.asInstanceOf[ExecutedCommandExec]
 58 | 
 59 |     val entities = CommandsHarvester.LoadDataHarvester.harvest(
 60 |       execNode.cmd.asInstanceOf[LoadDataCommand], qd)
 61 |     validateProcessEntity(entities.head, _ => {}, inputs => {
 62 |       inputs.size should be (1)
 63 |       val inputEntity = inputs.head.asInstanceOf[SACAtlasEntityWithDependencies].entity
 64 |       inputEntity.getTypeName should be (external.FS_PATH_TYPE_STRING)
 65 |       inputEntity.getAttribute("name") should be (file.getAbsolutePath.toLowerCase)
 66 |     }, outputs => {
 67 |       outputs.size should be (1)
 68 |       assertTable(outputs.head, _dbName, sourceTblName, _clusterName, _useSparkTable)
 69 |     })
 70 |   }
 71 | }
 72 | 
 73 | class LoadDataHarvesterSuite
 74 |   extends BaseLoadDataHarvesterSuite
 75 |   with WithHiveSupport {
 76 | 
 77 |   override def beforeAll(): Unit = {
 78 |     super.beforeAll()
 79 |     initializeTestEnvironment()
 80 |   }
 81 | 
 82 |   override def afterAll(): Unit = {
 83 |     cleanupTestEnvironment()
 84 |     super.afterAll()
 85 |   }
 86 | 
 87 |   override def getSparkSession: SparkSession = sparkSession
 88 | 
 89 |   override def getDbName: String = "sac"
 90 | 
 91 |   override def expectSparkTableModels: Boolean = true
 92 | }
 93 | 
 94 | class LoadDataHarvesterWithRemoteHMSSuite
 95 |   extends BaseLoadDataHarvesterSuite
 96 |   with WithRemoteHiveMetastoreServiceSupport {
 97 | 
 98 |   override def beforeAll(): Unit = {
 99 |     super.beforeAll()
100 |     initializeTestEnvironment()
101 |   }
102 | 
103 |   override def afterAll(): Unit = {
104 |     cleanupTestEnvironment()
105 |     super.afterAll()
106 |   }
107 | 
108 |   override def getSparkSession: SparkSession = sparkSession
109 | 
110 |   override def expectSparkTableModels: Boolean = false
111 | 
112 |   override def getDbName: String = dbName
113 | }
114 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/SparkExecutionPlanProcessForRdbmsQuerySuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
 21 | import java.sql.DriverManager
 22 | 
 23 | import com.hortonworks.spark.atlas.{AtlasClientConf, AtlasUtils, WithHiveSupport}
 24 | import com.hortonworks.spark.atlas.AtlasEntityReadHelper._
 25 | import com.hortonworks.spark.atlas.sql.testhelper.{AtlasQueryExecutionListener, CreateEntitiesTrackingAtlasClient, DirectProcessSparkExecutionPlanProcessor, ProcessEntityValidator}
 26 | import com.hortonworks.spark.atlas.types.{external, metadata}
 27 | import org.apache.atlas.model.instance.AtlasEntity
 28 | 
 29 | class SparkExecutionPlanProcessForRdbmsQuerySuite
 30 |   extends FunSuite
 31 |   with Matchers
 32 |   with BeforeAndAfter
 33 |   with WithHiveSupport
 34 |   with ProcessEntityValidator {
 35 | 
 36 |   val sinkTableName = "sink_table"
 37 |   val sourceTableName = "source_table"
 38 |   val databaseName = "testdb"
 39 |   val jdbcDriver = "org.apache.derby.jdbc.EmbeddedDriver"
 40 | 
 41 |   val atlasClientConf: AtlasClientConf = new AtlasClientConf()
 42 |   var atlasClient: CreateEntitiesTrackingAtlasClient = _
 43 |   val testHelperQueryListener = new AtlasQueryExecutionListener()
 44 | 
 45 |   before {
 46 |     // setup derby database and necesaary table
 47 |     val connectionURL = s"jdbc:derby:memory:$databaseName;create=true"
 48 |     Class.forName(jdbcDriver)
 49 |     val connection = DriverManager.getConnection(connectionURL)
 50 | 
 51 |     val createSinkTableQuery = s"CREATE TABLE $sinkTableName (NAME VARCHAR(20))"
 52 |     val createSourceTableQuery = s"CREATE TABLE $sourceTableName (NAME VARCHAR(20))"
 53 |     val insertQuery = s"INSERT INTO $sourceTableName (Name) VALUES ('A'), ('B'), ('C')"
 54 |     val statement = connection.createStatement
 55 |     statement.executeUpdate(createSinkTableQuery)
 56 |     statement.executeUpdate(createSourceTableQuery)
 57 |     statement.executeUpdate(insertQuery)
 58 | 
 59 |     // setup Atlas client
 60 |     atlasClient = new CreateEntitiesTrackingAtlasClient()
 61 |     sparkSession.listenerManager.register(testHelperQueryListener)
 62 |   }
 63 | 
 64 |   test("read from derby table and insert into a different derby table") {
 65 |     val planProcessor = new DirectProcessSparkExecutionPlanProcessor(atlasClient, atlasClientConf)
 66 | 
 67 |     val jdbcProperties = new java.util.Properties
 68 |     jdbcProperties.setProperty("driver", jdbcDriver)
 69 |     val url = s"jdbc:derby:memory:$databaseName;create=false"
 70 | 
 71 |     val readDataFrame = sparkSession.read.jdbc(url, sourceTableName, jdbcProperties)
 72 |     readDataFrame.write.mode("append").jdbc(url, sinkTableName, jdbcProperties)
 73 | 
 74 |     val queryDetail = testHelperQueryListener.queryDetails.last
 75 |     planProcessor.process(queryDetail)
 76 |     val entities = atlasClient.createdEntities
 77 | 
 78 |     // we're expecting two table entities:
 79 |     // one from the source table and another from the sink table
 80 |     val tableEntities = listAtlasEntitiesAsType(entities, external.RDBMS_TABLE)
 81 |     assert(tableEntities.size === 2)
 82 | 
 83 |     val inputEntity = getOnlyOneEntityOnAttribute(tableEntities, "name", sourceTableName)
 84 |     val outputEntity = getOnlyOneEntityOnAttribute(tableEntities, "name", sinkTableName)
 85 |     assertTableEntity(inputEntity, sourceTableName)
 86 |     assertTableEntity(outputEntity, sinkTableName)
 87 | 
 88 |     // check for 'spark_process'
 89 |     validateProcessEntityWithAtlasEntities(entities, _ => {},
 90 |       AtlasUtils.entitiesToReferences(Seq(inputEntity)),
 91 |       AtlasUtils.entitiesToReferences(Seq(outputEntity)))
 92 |   }
 93 | 
 94 |   private def assertTableEntity(entity: AtlasEntity, tableName: String): Unit = {
 95 |     val tableQualifiedName = getStringAttribute(entity, "qualifiedName")
 96 |     assert(tableQualifiedName.equals(s"$databaseName.$tableName"))
 97 |   }
 98 | 
 99 | }
100 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/SparkExecutionPlanProcessorForComplicatedQuerySuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import java.io.File
 21 | 
 22 | import com.hortonworks.spark.atlas.{AtlasClientConf, AtlasUtils, WithRemoteHiveMetastoreServiceSupport}
 23 | import com.hortonworks.spark.atlas.sql.testhelper._
 24 | import com.hortonworks.spark.atlas.types.external
 25 | import org.apache.atlas.model.instance.AtlasObjectId
 26 | import org.scalatest.{BeforeAndAfterEach, FunSuite, Matchers}
 27 | 
 28 | class SparkExecutionPlanProcessorForComplicatedQuerySuite
 29 |   extends FunSuite
 30 |   with BeforeAndAfterEach
 31 |   with Matchers
 32 |   with WithRemoteHiveMetastoreServiceSupport
 33 |   with ProcessEntityValidator
 34 |   with TableEntityValidator
 35 |   with FsEntityValidator {
 36 | 
 37 |   import com.hortonworks.spark.atlas.AtlasEntityReadHelper._
 38 | 
 39 |   val atlasClientConf: AtlasClientConf = new AtlasClientConf()
 40 |   var testHelperQueryListener: AtlasQueryExecutionListener = _
 41 | 
 42 |   val clusterName: String = atlasClientConf.get(AtlasClientConf.CLUSTER_NAME)
 43 | 
 44 |   override def beforeAll(): Unit = {
 45 |     super.beforeAll()
 46 | 
 47 |     testHelperQueryListener = new AtlasQueryExecutionListener()
 48 |     sparkSession.listenerManager.register(testHelperQueryListener)
 49 |   }
 50 | 
 51 |   override def afterAll(): Unit = {
 52 |     sparkSession.listenerManager.unregister(testHelperQueryListener)
 53 |     super.afterAll()
 54 |   }
 55 | 
 56 |   test("select tbl1, tbl2 -> save to tbl3 -> select tbl3 -> save to file") {
 57 |     val atlasClient = new CreateEntitiesTrackingAtlasClient()
 58 |     val planProcessor = new DirectProcessSparkExecutionPlanProcessor(atlasClient, atlasClientConf)
 59 | 
 60 |     val rand = new scala.util.Random()
 61 |     val randNum = rand.nextInt(1000000000)
 62 | 
 63 |     val table1 = s"t1_$randNum"
 64 |     val table2 = s"t2_$randNum"
 65 |     val table3 = s"t3_$randNum"
 66 |     val outputPath = s"/tmp/hdfs_$randNum"
 67 | 
 68 |     sparkSession.sql(s"create table ${dbName}.${table1}(col1 int)")
 69 |     sparkSession.sql(s"create table ${dbName}.${table2}(col2 int)")
 70 | 
 71 |     testHelperQueryListener.clear()
 72 | 
 73 |     sparkSession
 74 |       .sql(s"select * from ${dbName}.${table1}, ${dbName}.${table2} where col1=col2")
 75 |       .write
 76 |       .saveAsTable(s"${dbName}.${table3}")
 77 | 
 78 |     val queryDetail = testHelperQueryListener.queryDetails.last
 79 |     planProcessor.process(queryDetail)
 80 |     val entities = atlasClient.createdEntities
 81 | 
 82 |     val expectedInputs = Set(
 83 |       new AtlasObjectId(external.HIVE_TABLE_TYPE_STRING,
 84 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
 85 |         external.hiveTableUniqueAttribute(clusterName, dbName, table1)),
 86 |       new AtlasObjectId(external.HIVE_TABLE_TYPE_STRING,
 87 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
 88 |         external.hiveTableUniqueAttribute(clusterName, dbName, table2)))
 89 | 
 90 |     val expectedOutputs = Set(
 91 |       new AtlasObjectId(external.HIVE_TABLE_TYPE_STRING,
 92 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
 93 |         external.hiveTableUniqueAttribute(clusterName, dbName, table3)))
 94 | 
 95 |     validateProcessEntityWithAtlasEntities(entities, _ => {}, expectedInputs, expectedOutputs)
 96 | 
 97 |     testHelperQueryListener.clear()
 98 |     atlasClient.clearEntities()
 99 | 
100 |     sparkSession
101 |       .sql(s"select * from ${dbName}.${table3}")
102 |       .write
103 |       .mode("append")
104 |       .save(outputPath)
105 | 
106 |     val queryDetail2 = testHelperQueryListener.queryDetails.last
107 |     planProcessor.process(queryDetail2)
108 |     val entities2 = atlasClient.createdEntities
109 | 
110 |     val expectedInputs2 = Set(
111 |       new AtlasObjectId(external.HIVE_TABLE_TYPE_STRING,
112 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
113 |         external.hiveTableUniqueAttribute(clusterName, dbName, table3)))
114 | 
115 |     val output = getOnlyOneEntity(entities, external.FS_PATH_TYPE_STRING)
116 |     val dir = new File(outputPath).getAbsolutePath
117 |     assertFsEntity(output, dir)
118 |     val expectedOutputs2 = AtlasUtils.entitiesToReferences(Seq(output), useGuid = false)
119 | 
120 |     validateProcessEntityWithAtlasEntities(entities2, _ => {}, expectedInputs2, expectedOutputs2)
121 |   }
122 | }
123 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/SparkExecutionPlanProcessorForViewSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql
19 | 
20 | import scala.util.Random
21 | import org.scalatest.{FunSuite, Matchers}
22 | import org.apache.atlas.model.instance.AtlasEntity
23 | import com.hortonworks.spark.atlas.AtlasEntityReadHelper._
24 | import com.hortonworks.spark.atlas.{AtlasClientConf, AtlasUtils, WithHiveSupport}
25 | import com.hortonworks.spark.atlas.sql.testhelper.{AtlasQueryExecutionListener, CreateEntitiesTrackingAtlasClient, DirectProcessSparkExecutionPlanProcessor, ProcessEntityValidator}
26 | import com.hortonworks.spark.atlas.types.metadata
27 | 
28 | class SparkExecutionPlanProcessorForViewSuite
29 |   extends FunSuite
30 |   with Matchers
31 |   with WithHiveSupport
32 |   with ProcessEntityValidator {
33 |   private val sourceTblName = "source_" + Random.nextInt(100000)
34 |   private val destinationViewName = "destination_" + Random.nextInt(100000)
35 |   private val destinationTableName = "destination_" + Random.nextInt(100000)
36 | 
37 |   private val testHelperQueryListener = new AtlasQueryExecutionListener()
38 | 
39 |   var atlasClient: CreateEntitiesTrackingAtlasClient = _
40 |   val atlasClientConf: AtlasClientConf = new AtlasClientConf()
41 | 
42 |   override protected def beforeAll(): Unit = {
43 |     super.beforeAll()
44 | 
45 |     sparkSession.sql(s"CREATE TABLE $sourceTblName (name string)")
46 |     sparkSession.sql(s"INSERT INTO TABLE $sourceTblName VALUES ('lucy'), ('tom')")
47 | 
48 |     // setup Atlas client
49 |     testHelperQueryListener.clear()
50 |     atlasClient = new CreateEntitiesTrackingAtlasClient()
51 |     sparkSession.listenerManager.register(testHelperQueryListener)
52 |   }
53 | 
54 |   test("CREATE TEMPORARY VIEW FROM TABLE, SAVE TEMP VIEW TO TABLE") {
55 |     val planProcessor = new DirectProcessSparkExecutionPlanProcessor(atlasClient, atlasClientConf)
56 |     sparkSession.sql(s"SELECT * FROM $sourceTblName").createOrReplaceTempView(destinationViewName)
57 | 
58 |     var queryDetail = testHelperQueryListener.queryDetails.last
59 |     planProcessor.process(queryDetail)
60 |     var entities = atlasClient.createdEntities
61 | 
62 |     // no entities should have been received from a creating a temporary view
63 |     assert(entities.isEmpty)
64 | 
65 |     // we don't want to check above queries, so reset the entities in listener
66 |     testHelperQueryListener.clear()
67 | 
68 |     sparkSession.sql(s"SELECT * FROM $destinationViewName").write.saveAsTable(destinationTableName)
69 | 
70 |     queryDetail = testHelperQueryListener.queryDetails.last
71 |     planProcessor.process(queryDetail)
72 |     entities = atlasClient.createdEntities
73 | 
74 |     // we're expecting two table entities:
75 |     // one from the source table and another from the sink table, the temporary view is ignored
76 |     assert(entities.nonEmpty)
77 |     val tableEntities = listAtlasEntitiesAsType(entities, metadata.TABLE_TYPE_STRING)
78 |     assert(tableEntities.size === 2)
79 | 
80 |     val inputEntity = getOnlyOneEntityOnAttribute(tableEntities, "name", sourceTblName)
81 |     val outputEntity = getOnlyOneEntityOnAttribute(tableEntities, "name", destinationTableName)
82 |     assertTableEntity(inputEntity, sourceTblName)
83 |     assertTableEntity(outputEntity, destinationTableName)
84 | 
85 |     // check for 'spark_process'
86 |     validateProcessEntityWithAtlasEntities(entities, _ => {},
87 |       AtlasUtils.entitiesToReferences(Seq(inputEntity)),
88 |       AtlasUtils.entitiesToReferences(Seq(outputEntity)))
89 |   }
90 | 
91 |   private def assertTableEntity(entity: AtlasEntity, tableName: String): Unit = {
92 |     val tableQualifiedName = getStringAttribute(entity, "qualifiedName")
93 |     assert(tableQualifiedName.contains(s"$tableName"))
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/SparkExecutionPlanProcessorWithRemoteHiveMetastoreServiceSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.sql
 19 | 
 20 | import java.io.{File, FileOutputStream, PrintWriter}
 21 | import java.nio.file.Files
 22 | 
 23 | import com.hortonworks.spark.atlas._
 24 | import com.hortonworks.spark.atlas.sql.testhelper._
 25 | import com.hortonworks.spark.atlas.types.external
 26 | import org.apache.atlas.model.instance.AtlasObjectId
 27 | import org.apache.spark.sql.execution.command.{CreateViewCommand, ExecutedCommandExec}
 28 | import org.apache.spark.sql.kafka010.KafkaTestUtils
 29 | import org.scalatest.{BeforeAndAfterEach, FunSuite, Matchers}
 30 | 
 31 | import scala.util.Random
 32 | 
 33 | class SparkExecutionPlanProcessorWithRemoteHiveMetastoreServiceSuite
 34 |   extends FunSuite
 35 |   with BeforeAndAfterEach
 36 |   with Matchers
 37 |   with WithRemoteHiveMetastoreServiceSupport
 38 |   with ProcessEntityValidator
 39 |   with TableEntityValidator
 40 |   with FsEntityValidator {
 41 |   import com.hortonworks.spark.atlas.AtlasEntityReadHelper._
 42 | 
 43 |   private val sourceTblName = "source_" + Random.nextInt(100000)
 44 | 
 45 |   val brokerProps: Map[String, Object] = Map[String, Object]()
 46 |   var kafkaTestUtils: KafkaTestUtils = _
 47 | 
 48 |   val atlasClientConf: AtlasClientConf = new AtlasClientConf()
 49 |   var atlasClient: CreateEntitiesTrackingAtlasClient = _
 50 |   val testHelperQueryListener = new AtlasQueryExecutionListener()
 51 | 
 52 |   val clusterName: String = atlasClientConf.get(AtlasClientConf.CLUSTER_NAME)
 53 | 
 54 |   override def beforeAll(): Unit = {
 55 |     super.beforeAll()
 56 | 
 57 |     sparkSession.sql(s"CREATE TABLE $dbName.$sourceTblName (name string)")
 58 |     sparkSession.sql(s"INSERT INTO TABLE $dbName.$sourceTblName VALUES ('a'), ('b'), ('c')")
 59 | 
 60 |     atlasClient = new CreateEntitiesTrackingAtlasClient()
 61 |     testHelperQueryListener.clear()
 62 |     sparkSession.listenerManager.register(testHelperQueryListener)
 63 |   }
 64 | 
 65 |   override def afterAll(): Unit = {
 66 |     atlasClient = null
 67 |     sparkSession.listenerManager.unregister(testHelperQueryListener)
 68 |     super.afterAll()
 69 |   }
 70 | 
 71 |   override def beforeEach(): Unit = {
 72 |     atlasClient.clearEntities()
 73 |   }
 74 | 
 75 |   test("CREATE EXTERNAL TABLE ... LOCATION ...") {
 76 |     val planProcessor = new DirectProcessSparkExecutionPlanProcessor(atlasClient, atlasClientConf)
 77 |     val tempDir = Files.createTempDirectory("spark-atlas-connector-temp")
 78 | 
 79 |     val rand = new scala.util.Random()
 80 |     val outputTableName = "employee_details_" + rand.nextInt(1000000000)
 81 | 
 82 |     sparkSession.sql(s"CREATE EXTERNAL TABLE IF NOT EXISTS $dbName.$outputTableName " +
 83 |       "(name STRING, age INT, emp_id INT, designation STRING) " +
 84 |       s"LOCATION '$tempDir'")
 85 | 
 86 |     val queryDetail = testHelperQueryListener.queryDetails.last
 87 |     planProcessor.process(queryDetail)
 88 |     val entities = atlasClient.createdEntities
 89 | 
 90 |     // The query doesn't bring spark_process entity - it only brings table entities
 91 |     // which SAC will create reference for table entity when Spark connects to remote HMS.
 92 |     // SAC Atlas Client doesn't request creation for reference, hence NO recorded entities.
 93 |     assert(entities.length === 0)
 94 |   }
 95 | 
 96 |   // Here we are duplicating some tests here to verify whether no table entity is created
 97 |   // for hive table but process has reference for hive table.
 98 | 
 99 |   // borrowed from LoadDataHarvesterSuite
100 |   test("LOAD DATA [LOCAL] INPATH path source") {
101 |     val file = Files.createTempFile("input", ".txt").toFile
102 |     val out = new PrintWriter(new FileOutputStream(file))
103 |     out.write("a\nb\nc\nd\n")
104 |     out.close()
105 | 
106 |     val planProcessor = new DirectProcessSparkExecutionPlanProcessor(atlasClient, atlasClientConf)
107 | 
108 |     sparkSession.sql(s"LOAD DATA LOCAL INPATH '${file.getAbsolutePath}' " +
109 |       s"OVERWRITE INTO TABLE $dbName.$sourceTblName").queryExecution
110 | 
111 |     val queryDetail = testHelperQueryListener.queryDetails.last
112 |     planProcessor.process(queryDetail)
113 |     val entities = atlasClient.createdEntities
114 | 
115 |     val input = getOnlyOneEntity(entities, external.FS_PATH_TYPE_STRING)
116 |     val expectedInputs = AtlasUtils.entitiesToReferences(Seq(input), useGuid = false)
117 | 
118 |     val expectedOutputs = Set(
119 |       new AtlasObjectId(external.HIVE_TABLE_TYPE_STRING,
120 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
121 |         external.hiveTableUniqueAttribute(clusterName, dbName, sourceTblName)))
122 | 
123 |     validateProcessEntityWithAtlasEntities(entities, _ => {}, expectedInputs, expectedOutputs)
124 |   }
125 | 
126 |   // borrowed from InsertIntoHiveDirHarvesterSuite
127 |   test("INSERT OVERWRITE DIRECTORY path...") {
128 |     val planProcessor = new DirectProcessSparkExecutionPlanProcessor(atlasClient, atlasClientConf)
129 | 
130 |     sparkSession.sql(s"INSERT OVERWRITE DIRECTORY 'target/dir1' " +
131 |       s"SELECT * FROM $dbName.$sourceTblName").queryExecution
132 | 
133 |     val queryDetail = testHelperQueryListener.queryDetails.last
134 |     planProcessor.process(queryDetail)
135 |     val entities = atlasClient.createdEntities
136 | 
137 |     val expectedInputs = Set(
138 |       new AtlasObjectId(external.HIVE_TABLE_TYPE_STRING,
139 |         org.apache.atlas.AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
140 |         external.hiveTableUniqueAttribute(clusterName, dbName, sourceTblName)))
141 | 
142 |     val output = getOnlyOneEntity(entities, external.FS_PATH_TYPE_STRING)
143 |     val dir = new File("target/dir1").getAbsolutePath
144 |     assertFsEntity(output, dir)
145 |     val expectedOutputs = AtlasUtils.entitiesToReferences(Seq(output), useGuid = false)
146 | 
147 |     validateProcessEntityWithAtlasEntities(entities, _ => {}, expectedInputs, expectedOutputs)
148 |   }
149 | 
150 | }
151 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/AtlasQueryExecutionListener.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import com.hortonworks.spark.atlas.sql.QueryDetail
21 | import org.apache.spark.sql.execution.QueryExecution
22 | import org.apache.spark.sql.util.QueryExecutionListener
23 | 
24 | import scala.collection.mutable
25 | 
26 | class AtlasQueryExecutionListener extends QueryExecutionListener {
27 |   val queryDetails = new mutable.MutableList[QueryDetail]()
28 | 
29 |   override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
30 |     if (qe.logical.isStreaming) {
31 |       // streaming query will be tracked via SparkAtlasStreamingQueryEventTracker
32 |       return
33 |     }
34 |     queryDetails += QueryDetail.fromQueryExecutionListener(qe, durationNs)
35 |   }
36 | 
37 |   override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
38 |     throw exception
39 |   }
40 | 
41 |   def clear(): Unit = {
42 |     queryDetails.clear()
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/AtlasStreamingQueryProgressListener.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import com.hortonworks.spark.atlas.sql.QueryDetail
21 | import com.hortonworks.spark.atlas.utils.Logging
22 | import org.apache.spark.sql.SparkSession
23 | import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper}
24 | 
25 | import scala.collection.mutable
26 | import org.apache.spark.sql.streaming.StreamingQueryListener
27 | import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
28 | 
29 | class AtlasStreamingQueryProgressListener extends StreamingQueryListener with Logging {
30 |   val queryDetails = new mutable.MutableList[QueryDetail]()
31 | 
32 |   def onQueryStarted(event: QueryStartedEvent): Unit = {}
33 | 
34 |   def onQueryProgress(event: QueryProgressEvent): Unit = {
35 |     // FIXME: this is totally duplicated with SparkAtlasStreamingQueryEventTracker...
36 |     //  Extract into somewhere...
37 |     val query = SparkSession.active.streams.get(event.progress.id)
38 |     if (query != null) {
39 |       query match {
40 |         case query: StreamingQueryWrapper =>
41 |           val qd = QueryDetail.fromStreamingQueryListener(query.streamingQuery, event)
42 |           queryDetails += qd
43 | 
44 |         case query: StreamExecution =>
45 |           val qd = QueryDetail.fromStreamingQueryListener(query, event)
46 |           queryDetails += qd
47 | 
48 |         case _ => logWarn(s"Unexpected type of streaming query: ${query.getClass}")
49 |       }
50 |     } else {
51 |       logWarn(s"Cannot find query ${event.progress.id} from active spark session!")
52 |     }
53 |   }
54 | 
55 |   def onQueryTerminated(event: QueryTerminatedEvent): Unit = {}
56 | 
57 |   def clear(): Unit = {
58 |     queryDetails.clear()
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/BaseHarvesterSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import com.hortonworks.spark.atlas.{AtlasClientConf, SACAtlasReferenceable}
21 | import org.apache.spark.sql.SparkSession
22 | import org.scalatest.{FunSuite, Matchers}
23 | 
24 | abstract class BaseHarvesterSuite
25 |   extends FunSuite
26 |   with Matchers
27 |   with ProcessEntityValidator
28 |   with TableEntityValidator {
29 | 
30 |   protected def getSparkSession: SparkSession
31 | 
32 |   protected def getDbName: String
33 | 
34 |   protected def expectSparkTableModels: Boolean
35 | 
36 |   protected def initializeTestEnvironment(): Unit = {}
37 | 
38 |   protected def cleanupTestEnvironment(): Unit = {}
39 | 
40 |   private val atlasClientConf: AtlasClientConf = new AtlasClientConf()
41 |   protected val _clusterName: String = atlasClientConf.get(AtlasClientConf.CLUSTER_NAME)
42 | 
43 |   protected lazy val _spark: SparkSession = getSparkSession
44 |   protected lazy val _dbName: String = getDbName
45 |   protected lazy val _useSparkTable: Boolean = expectSparkTableModels
46 | 
47 |   protected def prepareDatabase(): Unit = {
48 |     _spark.sql(s"DROP DATABASE IF EXISTS ${_dbName} Cascade")
49 |     _spark.sql(s"CREATE DATABASE ${_dbName}")
50 |     _spark.sql(s"USE ${_dbName}")
51 |   }
52 | 
53 |   protected def cleanupDatabase(): Unit = {
54 |     _spark.sql(s"DROP DATABASE IF EXISTS ${_dbName} Cascade")
55 |   }
56 | 
57 |   protected def assertTable(ref: SACAtlasReferenceable, tableName: String): Unit = {
58 |     assertTable(ref, _dbName, tableName, _clusterName, _useSparkTable)
59 |   }
60 | 
61 |   protected def assertTableWithNamePrefix(
62 |                                            ref: SACAtlasReferenceable,
63 |                                            tblNamePrefix: String): Unit = {
64 |     assertTableWithNamePrefix(ref, _dbName, tblNamePrefix, _clusterName, _useSparkTable)
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/CreateEntitiesTrackingAtlasClient.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import com.hortonworks.spark.atlas.AtlasClient
21 | import com.sun.jersey.core.util.MultivaluedMapImpl
22 | import org.apache.atlas.model.instance.AtlasEntity
23 | import org.apache.atlas.model.typedef.AtlasTypesDef
24 | 
25 | import scala.collection.mutable
26 | 
27 | class CreateEntitiesTrackingAtlasClient extends AtlasClient {
28 |   val createdEntities = new mutable.ListBuffer[AtlasEntity]()
29 | 
30 |   def clearEntities(): Unit = {
31 |     createdEntities.clear()
32 |   }
33 | 
34 |   override def createAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit = {}
35 | 
36 |   override def getAtlasTypeDefs(searchParams: MultivaluedMapImpl): AtlasTypesDef = {
37 |     new AtlasTypesDef()
38 |   }
39 | 
40 |   override def updateAtlasTypeDefs(typeDefs: AtlasTypesDef): Unit = {}
41 | 
42 |   override protected def doCreateEntities(entities: Seq[AtlasEntity]): Unit = {
43 |     createdEntities ++= entities
44 |   }
45 | 
46 |   override protected def doDeleteEntityWithUniqueAttr(entityType: String,
47 |                                                       attribute: String): Unit = {}
48 | 
49 |   override protected def doUpdateEntityWithUniqueAttr(entityType: String, attribute: String,
50 |                                                       entity: AtlasEntity): Unit = {}
51 | }
52 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/DirectProcessSparkExecutionPlanProcessor.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import com.hortonworks.spark.atlas.sql.{QueryDetail, SparkExecutionPlanProcessor}
21 | import com.hortonworks.spark.atlas.{AtlasClient, AtlasClientConf}
22 | 
23 | class DirectProcessSparkExecutionPlanProcessor(
24 |     atlasClient: AtlasClient,
25 |     atlasClientConf: AtlasClientConf)
26 |   extends SparkExecutionPlanProcessor(atlasClient, atlasClientConf) {
27 | 
28 |   override def process(qd: QueryDetail): Unit = super.process(qd)
29 | }
30 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/FsEntityValidator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import java.io.File
21 | import java.util.Locale
22 | 
23 | import com.hortonworks.spark.atlas.AtlasEntityReadHelper.{getStringAttribute, listAtlasEntitiesAsType}
24 | import com.hortonworks.spark.atlas.{SACAtlasEntityWithDependencies, SACAtlasReferenceable}
25 | import com.hortonworks.spark.atlas.types.external
26 | import org.apache.atlas.model.instance.AtlasEntity
27 | import org.scalatest.FunSuite
28 | 
29 | trait FsEntityValidator extends FunSuite {
30 | 
31 |   def findFsEntities(entities: Seq[AtlasEntity], dir: File): Seq[AtlasEntity] = {
32 |     entities.filter { e =>
33 |       getStringAttribute(e, "qualifiedName").toLowerCase(Locale.ROOT).contains(
34 |         dir.getAbsolutePath.toLowerCase(Locale.ROOT))
35 |     }
36 |   }
37 | 
38 |   def assertEntitiesFsType(
39 |       dirToExpectedCount: Map[File, Int],
40 |       entities: Set[AtlasEntity]): Unit = {
41 |     val fsEntities = listAtlasEntitiesAsType(entities.toSeq, external.FS_PATH_TYPE_STRING)
42 |     assert(fsEntities.size === dirToExpectedCount.values.sum)
43 | 
44 |     dirToExpectedCount.foreach { case (dir, expectedCnt) =>
45 |       val fsEntitiesFiltered = fsEntities.filter { e =>
46 |         getStringAttribute(e, "qualifiedName").toLowerCase(Locale.ROOT).contains(
47 |           dir.getAbsolutePath.toLowerCase(Locale.ROOT))
48 |       }
49 |       assert(fsEntitiesFiltered.length === expectedCnt)
50 |     }
51 |   }
52 | 
53 |   def assertFsEntity(ref: SACAtlasReferenceable, path: String): Unit = {
54 |     val inputEntity = ref.asInstanceOf[SACAtlasEntityWithDependencies].entity
55 |     assertFsEntity(inputEntity, path)
56 |   }
57 | 
58 |   def assertFsEntity(entity: AtlasEntity, path: String): Unit = {
59 |     assert(entity.getTypeName === external.FS_PATH_TYPE_STRING)
60 |     assert(entity.getAttribute("name") === path.toLowerCase)
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/KafkaTopicEntityValidator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import com.hortonworks.spark.atlas.TestUtils
21 | import com.hortonworks.spark.atlas.sql.KafkaTopicInformation
22 | import org.scalatest.FunSuite
23 | import com.hortonworks.spark.atlas.AtlasEntityReadHelper.{getStringAttribute, listAtlasEntitiesAsType}
24 | import com.hortonworks.spark.atlas.types.external.KAFKA_TOPIC_STRING
25 | import org.apache.atlas.model.instance.AtlasEntity
26 | 
27 | trait KafkaTopicEntityValidator extends FunSuite {
28 | 
29 |   def assertEntitiesKafkaTopicType(
30 |       topics: Seq[KafkaTopicInformation],
31 |       entities: Set[AtlasEntity]): Unit = {
32 |     val kafkaTopicEntities = listAtlasEntitiesAsType(entities.toSeq, KAFKA_TOPIC_STRING)
33 |     assert(kafkaTopicEntities.size === topics.size)
34 | 
35 |     val expectedTopicNames = topics.map(_.topicName).toSet
36 |     val expectedClusterNames = topics.map(_.clusterName.getOrElse("primary")).toSet
37 |     val expectedQualifiedNames = topics.map { ti =>
38 |       KafkaTopicInformation.getQualifiedName(ti, "primary")
39 |     }.toSet
40 | 
41 |     assert(kafkaTopicEntities.map(_.getAttribute("name").toString()).toSet === expectedTopicNames)
42 |     assert(kafkaTopicEntities.map(_.getAttribute("topic").toString()).toSet ===
43 |       expectedTopicNames)
44 |     assert(kafkaTopicEntities.map(getStringAttribute(_, "uri")).toSet === expectedTopicNames)
45 |     assert(kafkaTopicEntities.map(getStringAttribute(_, "clusterName")).toSet ===
46 |       expectedClusterNames)
47 |     assert(kafkaTopicEntities.map(getStringAttribute(_, "qualifiedName")).toSet ===
48 |       expectedQualifiedNames)
49 |   }
50 | 
51 |   def assertKafkaTopicEntities(
52 |       topics: Seq[KafkaTopicInformation], entities: Seq[AtlasEntity]): Unit = {
53 |     assert(
54 |       topics.map(KafkaTopicInformation.getQualifiedName(_, "primary")).toSet ===
55 |       entities.map(getStringAttribute(_, "qualifiedName")).toSet)
56 |   }
57 | 
58 |   def assertEntitiesAreSubsetOfTopics(
59 |       topics: Seq[KafkaTopicInformation], entities: Seq[AtlasEntity]): Unit = {
60 |     TestUtils.assertSubsetOf(
61 |       topics.map(KafkaTopicInformation.getQualifiedName(_, "primary")).toSet,
62 |       entities.map(getStringAttribute(_, "qualifiedName")).toSet)
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/ProcessEntityValidator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import java.util
21 | 
22 | import com.hortonworks.spark.atlas.AtlasEntityReadHelper.getOnlyOneEntity
23 | import com.hortonworks.spark.atlas.types.metadata
24 | 
25 | import scala.collection.JavaConverters._
26 | import com.hortonworks.spark.atlas.{SACAtlasEntityWithDependencies, SACAtlasReferenceable, AtlasUtils, TestUtils}
27 | import org.apache.atlas.model.instance.{AtlasEntity, AtlasObjectId}
28 | import org.scalatest.FunSuite
29 | 
30 | trait ProcessEntityValidator extends FunSuite {
31 |   def validateProcessEntity(
32 |                              process: SACAtlasReferenceable,
33 |                              validateFnForProcess: AtlasEntity => Unit,
34 |                              validateFnForInputs: Seq[SACAtlasReferenceable] => Unit,
35 |                              validateFnForOutputs: Seq[SACAtlasReferenceable] => Unit): Unit = {
36 |     require(process.isInstanceOf[SACAtlasEntityWithDependencies])
37 |     val pEntity = process.asInstanceOf[SACAtlasEntityWithDependencies].entity
38 |     validateFnForProcess(pEntity)
39 | 
40 |     assert(pEntity.getAttribute("inputs").isInstanceOf[util.Collection[_]])
41 |     assert(pEntity.getAttribute("outputs").isInstanceOf[util.Collection[_]])
42 |     val inputs = pEntity.getAttribute("inputs").asInstanceOf[util.Collection[AtlasObjectId]]
43 |     val outputs = pEntity.getAttribute("outputs").asInstanceOf[util.Collection[AtlasObjectId]]
44 | 
45 |     val pDeps = process.asInstanceOf[SACAtlasEntityWithDependencies].dependencies
46 |     val inputEntities = TestUtils.findEntities(pDeps, inputs.asScala.toSeq)
47 |     val outputEntities = TestUtils.findEntities(pDeps, outputs.asScala.toSeq)
48 | 
49 |     assert(inputs.size() === inputEntities.size)
50 |     assert(outputs.size() === outputEntities.size)
51 | 
52 |     validateFnForInputs(inputEntities)
53 |     validateFnForOutputs(outputEntities)
54 |   }
55 | 
56 |   def validateProcessEntityWithAtlasEntities(
57 |       entities: Seq[AtlasEntity],
58 |       validateFnForProcess: AtlasEntity => Unit,
59 |       expectedInputObjectIds: Set[AtlasObjectId],
60 |       expectedOutputObjectIds: Set[AtlasObjectId]): Unit = {
61 |     val pEntity = getOnlyOneEntity(entities, metadata.PROCESS_TYPE_STRING)
62 |     validateFnForProcess(pEntity)
63 | 
64 |     assert(pEntity.getAttribute("inputs").isInstanceOf[util.Collection[_]])
65 |     assert(pEntity.getAttribute("outputs").isInstanceOf[util.Collection[_]])
66 |     val inputs = pEntity.getAttribute("inputs").asInstanceOf[util.Collection[AtlasObjectId]]
67 |     val outputs = pEntity.getAttribute("outputs").asInstanceOf[util.Collection[AtlasObjectId]]
68 | 
69 |     assert(inputs.asScala.toSet === expectedInputObjectIds)
70 |     assert(outputs.asScala.toSet === expectedOutputObjectIds)
71 |   }
72 | 
73 |   def validateProcessEntityWithAtlasEntitiesForStreamingQuery(
74 |       entities: Seq[AtlasEntity],
75 |       validateFnForProcess: AtlasEntity => Unit,
76 |       expectedInputEntities: Seq[AtlasEntity],
77 |       expectedOutputEntities: Seq[AtlasEntity]): Unit = {
78 |     val pEntity = getOnlyOneEntity(entities, metadata.PROCESS_TYPE_STRING)
79 |     validateFnForProcess(pEntity)
80 | 
81 |     assert(pEntity.getAttribute("inputs").isInstanceOf[util.Collection[_]])
82 |     assert(pEntity.getAttribute("outputs").isInstanceOf[util.Collection[_]])
83 |     val inputs = pEntity.getAttribute("inputs").asInstanceOf[util.Collection[AtlasObjectId]]
84 |     val outputs = pEntity.getAttribute("outputs").asInstanceOf[util.Collection[AtlasObjectId]]
85 | 
86 |     val expectedInputObjectIds = expectedInputEntities.map { entity =>
87 |       AtlasUtils.entityToReference(entity, useGuid = false)
88 |     }.toSet
89 |     val expectedOutputObjectIds = expectedOutputEntities.map { entity =>
90 |       AtlasUtils.entityToReference(entity, useGuid = false)
91 |     }.toSet
92 | 
93 |     assert(inputs.asScala.toSet === expectedInputObjectIds)
94 |     assert(outputs.asScala.toSet === expectedOutputObjectIds)
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/sql/testhelper/TableEntityValidator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.sql.testhelper
19 | 
20 | import com.hortonworks.spark.atlas.types.{external, metadata}
21 | import com.hortonworks.spark.atlas.{SACAtlasEntityReference, SACAtlasEntityWithDependencies, SACAtlasReferenceable}
22 | import org.apache.atlas.AtlasClient
23 | import org.scalatest.FunSuite
24 | 
25 | trait TableEntityValidator extends FunSuite {
26 |   def assertTable(
27 |                    ref: SACAtlasReferenceable,
28 |                    dbName: String,
29 |                    tblName: String,
30 |                    clusterName: String,
31 |                    useSparkTable: Boolean): Unit = {
32 |     if (useSparkTable) {
33 |       assertSparkTable(ref, dbName, tblName)
34 |     } else {
35 |       assertHiveTable(ref, dbName, tblName, clusterName)
36 |     }
37 |   }
38 | 
39 |   def assertTableWithNamePrefix(
40 |                                  ref: SACAtlasReferenceable,
41 |                                  dbName: String,
42 |                                  tblNamePrefix: String,
43 |                                  clusterName: String,
44 |                                  useSparkTable: Boolean): Unit = {
45 |     if (useSparkTable) {
46 |       assertSparkTableWithNamePrefix(ref, dbName, tblNamePrefix)
47 |     } else {
48 |       assertHiveTableWithNamePrefix(ref, dbName, tblNamePrefix, clusterName)
49 |     }
50 |   }
51 | 
52 |   def assertSparkTable(ref: SACAtlasReferenceable, dbName: String, tblName: String): Unit = {
53 |     assert(ref.isInstanceOf[SACAtlasEntityWithDependencies])
54 |     val entity = ref.asInstanceOf[SACAtlasEntityWithDependencies].entity
55 |     assert(entity.getTypeName === metadata.TABLE_TYPE_STRING)
56 |     assert(entity.getAttribute("name") === tblName)
57 |     assert(entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME).toString
58 |       .endsWith(s"$dbName.$tblName"))
59 |   }
60 | 
61 |   def assertSparkTableWithNamePrefix(
62 |                                       ref: SACAtlasReferenceable,
63 |                                       dbName: String,
64 |                                       tblNamePrefix: String): Unit = {
65 |     assert(ref.isInstanceOf[SACAtlasEntityWithDependencies])
66 |     val entity = ref.asInstanceOf[SACAtlasEntityWithDependencies].entity
67 |     assert(entity.getTypeName === metadata.TABLE_TYPE_STRING)
68 |     assert(entity.getAttribute("name").toString.startsWith(tblNamePrefix))
69 |     assert(entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME).toString
70 |       .contains(s"$dbName.$tblNamePrefix"))
71 |   }
72 | 
73 |   def assertHiveTable(
74 |                        ref: SACAtlasReferenceable,
75 |                        dbName: String,
76 |                        tblName: String,
77 |                        clusterName: String): Unit = {
78 |     assert(ref.isInstanceOf[SACAtlasEntityReference])
79 |     val outputRef = ref.asInstanceOf[SACAtlasEntityReference]
80 |     assert(outputRef.typeName === external.HIVE_TABLE_TYPE_STRING)
81 |     assert(outputRef.qualifiedName === s"$dbName.$tblName@$clusterName")
82 |   }
83 | 
84 |   def assertHiveTableWithNamePrefix(
85 |                                      ref: SACAtlasReferenceable,
86 |                                      dbName: String,
87 |                                      tblNamePrefix: String,
88 |                                      clusterName: String): Unit = {
89 |     assert(ref.isInstanceOf[SACAtlasEntityReference])
90 |     val outputRef = ref.asInstanceOf[SACAtlasEntityReference]
91 |     assert(outputRef.typeName === external.HIVE_TABLE_TYPE_STRING)
92 |     assert(outputRef.qualifiedName.startsWith(s"$dbName.$tblNamePrefix"))
93 |     assert(outputRef.qualifiedName.endsWith(s"@$clusterName"))
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/types/MLAtlasEntityUtilsSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.types
 19 | 
 20 | import java.io.File
 21 | 
 22 | import org.apache.atlas.{AtlasClient, AtlasConstants}
 23 | import org.apache.atlas.model.instance.AtlasEntity
 24 | import org.apache.commons.io.FileUtils
 25 | import org.apache.spark.ml.Pipeline
 26 | import org.apache.spark.ml.feature.MinMaxScaler
 27 | import org.apache.spark.ml.linalg.Vectors
 28 | import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 29 | import org.scalatest.{FunSuite, Matchers}
 30 | import com.hortonworks.spark.atlas.TestUtils._
 31 | import com.hortonworks.spark.atlas.{AtlasUtils, WithHiveSupport}
 32 | 
 33 | class MLAtlasEntityUtilsSuite extends FunSuite with Matchers with WithHiveSupport {
 34 | 
 35 |   def getTableEntity(tableName: String): AtlasEntity = {
 36 |     val dbDefinition = createDB("db1", "hdfs:///test/db/db1")
 37 |     val sd = createStorageFormat()
 38 |     val schema = new StructType()
 39 |       .add("user", StringType, false)
 40 |       .add("age", IntegerType, true)
 41 |     val tableDefinition = createTable("db1", s"$tableName", schema, sd)
 42 | 
 43 |     val tableEntities = internal.sparkTableToEntity(
 44 |       tableDefinition, AtlasConstants.DEFAULT_CLUSTER_NAME, Some(dbDefinition))
 45 |     val tableEntity = tableEntities.entity
 46 | 
 47 |     tableEntity
 48 |   }
 49 | 
 50 |   test("pipeline, pipeline model, fit and transform") {
 51 |     val uri = "/"
 52 |     val pipelineDir = "tmp/pipeline"
 53 |     val modelDir = "tmp/model"
 54 | 
 55 |     val pipelineDirEntity = internal.mlDirectoryToEntity(uri, pipelineDir)
 56 |     pipelineDirEntity.entity.getAttribute("uri") should be (uri)
 57 |     pipelineDirEntity.entity.getAttribute("directory") should be (pipelineDir)
 58 |     pipelineDirEntity.dependencies.length should be (0)
 59 | 
 60 |     val modelDirEntity = internal.mlDirectoryToEntity(uri, modelDir)
 61 |     modelDirEntity.entity.getAttribute("uri") should be (uri)
 62 |     modelDirEntity.entity.getAttribute("directory") should be (modelDir)
 63 |     modelDirEntity.dependencies.length should be (0)
 64 | 
 65 |     val df = sparkSession.createDataFrame(Seq(
 66 |       (1, Vectors.dense(0.0, 1.0, 4.0), 1.0),
 67 |       (2, Vectors.dense(1.0, 0.0, 4.0), 2.0),
 68 |       (3, Vectors.dense(1.0, 0.0, 5.0), 3.0),
 69 |       (4, Vectors.dense(0.0, 0.0, 5.0), 4.0)
 70 |     )).toDF("id", "features", "label")
 71 | 
 72 |     val scaler = new MinMaxScaler()
 73 |       .setInputCol("features")
 74 |       .setOutputCol("features_scaled")
 75 |       .setMin(0.0)
 76 |       .setMax(3.0)
 77 |     val pipeline = new Pipeline().setStages(Array(scaler))
 78 | 
 79 |     val model = pipeline.fit(df)
 80 | 
 81 |     pipeline.write.overwrite().save(pipelineDir)
 82 | 
 83 |     val pipelineEntity = internal.mlPipelineToEntity(pipeline.uid, pipelineDirEntity)
 84 |     pipelineEntity.entity.getTypeName should be (metadata.ML_PIPELINE_TYPE_STRING)
 85 |     pipelineEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be (
 86 |       pipeline.uid)
 87 |     pipelineEntity.entity.getAttribute("name") should be (pipeline.uid)
 88 |     pipelineEntity.entity.getRelationshipAttribute("directory") should be (
 89 |       AtlasUtils.entityToReference(pipelineDirEntity.entity, useGuid = false))
 90 |     pipelineEntity.dependencies should be (Seq(pipelineDirEntity))
 91 | 
 92 |     val modelEntity = internal.mlModelToEntity(model.uid, modelDirEntity)
 93 |     val modelUid = model.uid.replaceAll("pipeline", "model")
 94 |     modelEntity.entity.getTypeName should be (metadata.ML_MODEL_TYPE_STRING)
 95 |     modelEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be (modelUid)
 96 |     modelEntity.entity.getAttribute("name") should be (modelUid)
 97 |     modelEntity.entity.getRelationshipAttribute("directory") should be (
 98 |       AtlasUtils.entityToReference(modelDirEntity.entity, useGuid = false))
 99 | 
100 |     modelEntity.dependencies should be (Seq(modelDirEntity))
101 | 
102 |     FileUtils.deleteDirectory(new File("tmp"))
103 |   }
104 | }
105 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/types/SparkAtlasEntityUtilsSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.types
 19 | 
 20 | import org.apache.atlas.{AtlasClient, AtlasConstants}
 21 | import org.apache.spark.sql.SparkSession
 22 | import org.apache.spark.sql.types._
 23 | import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}
 24 | import com.hortonworks.spark.atlas.{AtlasClientConf, SACAtlasEntityWithDependencies, AtlasUtils, TestUtils}
 25 | import com.hortonworks.spark.atlas.utils.SparkUtils
 26 | 
 27 | class SparkAtlasEntityUtilsSuite extends FunSuite with Matchers with BeforeAndAfterAll {
 28 |   import TestUtils._
 29 | 
 30 |   private var sparkSession: SparkSession = _
 31 | 
 32 |   private var sparkAtlasEntityUtils: AtlasEntityUtils = _
 33 | 
 34 |   override protected def beforeAll(): Unit = {
 35 |     super.beforeAll()
 36 |     sparkSession = SparkSession.builder()
 37 |       .master("local")
 38 |       .config("spark.ui.enabled", "false")
 39 |       .getOrCreate()
 40 | 
 41 |     sparkAtlasEntityUtils = new AtlasEntityUtils {
 42 |       override def conf: AtlasClientConf = new AtlasClientConf
 43 |     }
 44 |   }
 45 | 
 46 |   override protected def afterAll(): Unit = {
 47 |     sparkSession.stop()
 48 |     SparkSession.clearActiveSession()
 49 |     SparkSession.clearDefaultSession()
 50 |     sparkSession = null
 51 |     sparkAtlasEntityUtils = null
 52 |     super.afterAll()
 53 |   }
 54 | 
 55 |   test("convert spark catalog db to entity") {
 56 |     val dbDefinition = createDB("db1", "hdfs:///test/db/db1")
 57 |     val dbEntity = sparkAtlasEntityUtils.sparkDbToEntity(dbDefinition)
 58 | 
 59 |     dbEntity.entity.getTypeName should be (metadata.DB_TYPE_STRING)
 60 |     dbEntity.entity.getAttribute("name") should be ("db1")
 61 |     dbEntity.entity.getAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE) should be (
 62 |       AtlasConstants.DEFAULT_CLUSTER_NAME)
 63 |     dbEntity.entity.getAttribute("location") should be ("hdfs:///test/db/db1")
 64 |     dbEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be (
 65 |       sparkSession.sparkContext.applicationId + ".db1")
 66 | 
 67 |     dbEntity.dependencies.length should be (0)
 68 |   }
 69 | 
 70 |   test("convert spark catalog storage format to entity") {
 71 |     val storageFormat = createStorageFormat()
 72 |     val sdEntity =
 73 |       sparkAtlasEntityUtils.sparkStorageFormatToEntity(storageFormat, "db1", "tbl1")
 74 | 
 75 |     sdEntity.entity.getTypeName should be (metadata.STORAGEDESC_TYPE_STRING)
 76 |     sdEntity.entity.getAttribute("location") should be (null)
 77 |     sdEntity.entity.getAttribute("inputFormat") should be (null)
 78 |     sdEntity.entity.getAttribute("outputFormat") should be (null)
 79 |     sdEntity.entity.getAttribute("serde") should be (null)
 80 |     sdEntity.entity.getAttribute("compressed") should be (java.lang.Boolean.FALSE)
 81 |     sdEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be (
 82 |       sparkSession.sparkContext.applicationId + ".db1.tbl1.storageFormat")
 83 | 
 84 |     sdEntity.dependencies.length should be (0)
 85 |   }
 86 | 
 87 |   test("convert spark table to entity") {
 88 |     val dbDefinition = createDB("db1", "hdfs:///test/db/db1")
 89 |     val sd = createStorageFormat()
 90 |     val schema = new StructType()
 91 |       .add("user", StringType, false)
 92 |       .add("age", IntegerType, true)
 93 |     val tableDefinition = createTable("db1", "tbl1", schema, sd)
 94 | 
 95 |     val tableEnt = sparkAtlasEntityUtils.sparkTableToEntity(tableDefinition, Some(dbDefinition))
 96 |     assert(tableEnt.isInstanceOf[SACAtlasEntityWithDependencies])
 97 |     val tableEntity = tableEnt.asInstanceOf[SACAtlasEntityWithDependencies]
 98 | 
 99 |     val tableDeps = tableEntity.dependencies
100 | 
101 |     val dbEntity = tableDeps.find(_.typeName == metadata.DB_TYPE_STRING).get
102 |     val sdEntity = tableDeps.find(_.typeName == metadata.STORAGEDESC_TYPE_STRING).get
103 | 
104 |     tableEntity.entity.getTypeName should be (metadata.TABLE_TYPE_STRING)
105 |     tableEntity.entity.getAttribute("name") should be ("tbl1")
106 |     tableEntity.entity.getAttribute("owner") should be (SparkUtils.currUser())
107 |     tableEntity.entity.getAttribute("ownerType") should be ("USER")
108 | 
109 |     tableEntity.entity.getRelationshipAttribute("db") should be (dbEntity.asObjectId)
110 |     tableEntity.entity.getRelationshipAttribute("sd") should be (sdEntity.asObjectId)
111 |   }
112 | }
113 | 
114 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/utils/JdbcUtilsTest.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.hortonworks.spark.atlas.utils
 19 | 
 20 | import org.scalatest.{FunSuite, Matchers}
 21 | 
 22 | class JdbcUtilsTest extends FunSuite with Matchers {
 23 | 
 24 |   test("get database name from mysql url") {
 25 |     val dbName = JdbcUtils.getDatabaseName("jdbc:mysql://localhost:3306/testdb")
 26 |     dbName should be ("testdb")
 27 |   }
 28 | 
 29 |   test("get database name from mysql url with properties") {
 30 |     val dbName = JdbcUtils.getDatabaseName(
 31 |       "jdbc:mysql://localhost:3306/testdb?user=root&password=secret")
 32 |     dbName should be ("testdb")
 33 |   }
 34 | 
 35 |   test("get database name from mariadb url") {
 36 |     val dbName = JdbcUtils.getDatabaseName("jdbc:mariadb://127.0.0.1/testdb")
 37 |     dbName should be ("testdb")
 38 |   }
 39 | 
 40 |   test("get database name from db2 url") {
 41 |     val dbName = JdbcUtils.getDatabaseName("jdbc:db2://127.0.0.1:50000/testdb")
 42 |     dbName should be ("testdb")
 43 |   }
 44 | 
 45 |   test("get database name from derby url") {
 46 |     val dbName = JdbcUtils.getDatabaseName("jdbc:derby://localhost/testdb")
 47 |     dbName should be ("testdb")
 48 |   }
 49 | 
 50 |   test("get database name from derby url with properties") {
 51 |     val dbName = JdbcUtils.getDatabaseName("jdbc:derby://localhost/testdb;create=true")
 52 |     dbName should be ("testdb")
 53 |   }
 54 | 
 55 |   test("get database name from derby in memory format url with properties") {
 56 |     val dbName = JdbcUtils.getDatabaseName("jdbc:derby:memory:testdb;create=true")
 57 |     dbName should be ("testdb")
 58 |   }
 59 | 
 60 |   test("get database name from oracle url") {
 61 |     val dbName = JdbcUtils.getDatabaseName("jdbc:oracle:thin:root/secret@localhost:1521:testdb")
 62 |     dbName should be ("testdb")
 63 |   }
 64 | 
 65 |   test("get database name from postgres url") {
 66 |     val dbName = JdbcUtils.getDatabaseName("jdbc:postgresql://localhost:5432/testdb")
 67 |     dbName should be ("testdb")
 68 |   }
 69 | 
 70 |   test("get database name from sql server url") {
 71 |     val dbName = JdbcUtils.getDatabaseName(
 72 |       "jdbc:sqlserver://localhost:1433;databaseName=testdb;integratedSecurity=true;")
 73 |     dbName should be ("testdb")
 74 |   }
 75 | 
 76 |   test("get database name from sql server url with properties") {
 77 |     val dbName = JdbcUtils.getDatabaseName(
 78 |       "jdbc:sqlserver://localhost:1433;databaseName=testdb")
 79 |     dbName should be ("testdb")
 80 |   }
 81 | 
 82 |   test("get database name from teradata url") {
 83 |     val dbName = JdbcUtils.getDatabaseName(
 84 |       "jdbc:teradata://127.0.0.1/DATABASE=testdb")
 85 |     dbName should be ("testdb")
 86 |   }
 87 | 
 88 |   test("get database name from teradata url with properties") {
 89 |     val dbName = JdbcUtils.getDatabaseName(
 90 |       "jdbc:teradata://127.0.0.1/DATABASE=testdb/CHARSET=UTF8,COMPAT_DBS=true")
 91 |     dbName should be ("testdb")
 92 |   }
 93 | 
 94 |   test("unsupported database") {
 95 |     val dbName = JdbcUtils.getDatabaseName(
 96 |       "jdbc:sqlite:product.db")
 97 |     dbName should be ("")
 98 |   }
 99 | 
100 | }
101 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hortonworks/spark/atlas/utils/SparkUtilsSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hortonworks.spark.atlas.utils
19 | 
20 | import org.apache.hadoop.hive.conf.HiveConf
21 | import org.apache.spark.sql.SparkSession
22 | import org.scalatest._
23 | 
24 | class SparkUtilsSuite extends FunSuite with Matchers with BeforeAndAfter {
25 | 
26 |   var sparkSession: SparkSession = _
27 | 
28 |   after {
29 |     if (sparkSession != null) {
30 |       sparkSession.stop()
31 |       SparkSession.clearActiveSession()
32 |       SparkSession.clearDefaultSession()
33 |       sparkSession = null
34 |     }
35 |   }
36 | 
37 |   test("get unique prefix when using in-memory catalog") {
38 |     sparkSession = SparkSession.builder()
39 |       .master("local")
40 |       .getOrCreate()
41 | 
42 |     SparkUtils.getUniqueQualifiedPrefix() should be (sparkSession.sparkContext.applicationId + ".")
43 |   }
44 | 
45 |   // TODO. Should have a better way to figure out unique name
46 |   ignore("get unique prefix when using hive catalog") {
47 |     sparkSession = SparkSession.builder()
48 |       .master("local")
49 |       .enableHiveSupport()
50 |       .config("spark.ui.enabled", "false")
51 |       .getOrCreate()
52 | 
53 |     val hiveConf = new HiveConf(sparkSession.sparkContext.hadoopConfiguration, classOf[HiveConf])
54 | 
55 |     // if hive.metastore.uris is set, which means we're using metastore server.
56 |     hiveConf.set("hive.metastore.uris", "thrift://localhost:10000")
57 |     SparkUtils.getUniqueQualifiedPrefix(Some(hiveConf)) should be ("thrift://localhost:10000.")
58 | 
59 |     // if embedded mode is used
60 |     hiveConf.unset("hive.metastore.uris")
61 |     hiveConf.set("javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver")
62 |     SparkUtils.getUniqueQualifiedPrefix(Some(hiveConf)) should be (
63 |       sparkSession.sparkContext.applicationId + ".")
64 | 
65 |     // otherwise if local metastore backend is used
66 |     hiveConf.set("javax.jdo.option.ConnectionDriverName", "com.mysql.jdbc.Driver")
67 |     hiveConf.set("javax.jdo.option.ConnectionURL",
68 |       "jdbc:mysql://localhost:3030/hive?createDatabaseIfNotExist=true")
69 |     SparkUtils.getUniqueQualifiedPrefix(Some(hiveConf)) should be (
70 |       "jdbc:mysql://localhost:3030/hive?createDatabaseIfNotExist=true.")
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/spark-atlas-connector/src/test/scala/com/hotels/beeju/ThriftHiveMetaStoreTestUtil.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.hotels.beeju
19 | 
20 | class ThriftHiveMetaStoreTestUtil(dbName: String)
21 |   extends ThriftHiveMetaStoreJUnitRule(dbName) {
22 |   override def before(): Unit = {
23 |     super.before()
24 |   }
25 | 
26 |   override def after(): Unit = {
27 |     super.after()
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------