├── .travis.yml ├── project └── plugins.sbt ├── src ├── main │ └── scala │ │ ├── org │ │ └── apache │ │ │ └── spark │ │ │ └── elasticsearch │ │ │ ├── ResponseHandler.scala │ │ │ ├── SparkOperations.scala │ │ │ ├── package.scala │ │ │ ├── RDDOperations.scala │ │ │ └── ElasticSearchRDD.scala │ │ └── Tryout.scala └── test │ └── scala │ └── org │ └── apache │ └── spark │ └── elasticsearch │ ├── ElasticSearchSuite.scala │ ├── SparkSuite.scala │ ├── LocalElasticSearch.scala │ └── Tests.scala ├── assembly.sbt ├── sonatype.sbt ├── README.md ├── .gitignore └── LICENSE /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += "sbt-idea-repo" at "http://mpeltonen.github.com/maven/" 2 | 3 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "0.5.0") 4 | 5 | addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0") 6 | 7 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.1") 8 | 9 | addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0") 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/elasticsearch/ResponseHandler.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import org.elasticsearch.action.bulk.BulkItemResponse 4 | 5 | sealed abstract class ResponseHandler 6 | 7 | case object IgnoreFailure extends ResponseHandler 8 | 9 | case object ThrowExceptionOnFailure extends ResponseHandler 10 | 11 | case class CustomHandler[T](handler: (BulkItemResponse, T) => Unit) extends ResponseHandler 12 | -------------------------------------------------------------------------------- /assembly.sbt: -------------------------------------------------------------------------------- 1 | assemblyShadeRules in assembly := Seq( 2 | ShadeRule.rename("com.google.**" -> "shadedgoogle.@1").inAll 3 | ) 4 | 5 | assemblyMergeStrategy in assembly := { 6 | case PathList("META-INF", xs@_*) => 7 | xs map (_.toLowerCase) match { 8 | case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" :: Nil) => 9 | MergeStrategy.discard 10 | case _ => MergeStrategy.discard 11 | } 12 | case _ => MergeStrategy.first 13 | } -------------------------------------------------------------------------------- /sonatype.sbt: -------------------------------------------------------------------------------- 1 | sonatypeProfileName := "com.github.shse" 2 | 3 | pomExtra := { 4 | https://github.com/SHSE/spark-es 5 | 6 | 7 | Apache 2 8 | http://www.apache.org/licenses/LICENSE-2.0.txt 9 | 10 | 11 | 12 | scm:git:github.com/SHSE/spark-es 13 | scm:git:git@github.com:SHSE/spark-es 14 | github.com/SHSE/spark-es 15 | 16 | 17 | 18 | SHSE 19 | Sergey Shumov 20 | https://github.com/SHSE 21 | 22 | 23 | } -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/elasticsearch/ElasticSearchSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import org.scalatest.{BeforeAndAfterEach, Suite} 4 | 5 | trait ElasticSearchSuite extends Suite with BeforeAndAfterEach { 6 | val templateUrls = Seq.empty[String] 7 | val indexMappings = Map.empty[String, String] 8 | 9 | private var localES: LocalElasticSearch = null 10 | 11 | def es = localES 12 | 13 | override protected def afterEach(): Unit = { 14 | super.afterEach() 15 | 16 | clean() 17 | } 18 | 19 | def clean(): Unit = { 20 | localES.close() 21 | localES = null 22 | } 23 | 24 | override protected def beforeEach(): Unit = { 25 | super.beforeEach() 26 | 27 | localES = new LocalElasticSearch 28 | } 29 | } 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/elasticsearch/SparkSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | import org.scalatest.{BeforeAndAfterEach, Suite} 5 | 6 | trait SparkSuite extends Suite with BeforeAndAfterEach { 7 | private var currentSparkContext: SparkContext = null 8 | 9 | def sparkContext = currentSparkContext 10 | 11 | val conf = new SparkConf() 12 | .setMaster("local") 13 | .setAppName("self.getClass.getName") 14 | 15 | 16 | override protected def beforeEach(): Unit = { 17 | super.beforeEach() 18 | 19 | currentSparkContext = new SparkContext(conf) 20 | } 21 | 22 | override protected def afterEach(): Unit = { 23 | super.afterEach() 24 | 25 | currentSparkContext.stop() 26 | 27 | System.clearProperty("spark.master.port") 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/elasticsearch/SparkOperations.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import org.apache.spark.SparkContext 4 | import org.elasticsearch.action.search.SearchRequestBuilder 5 | import org.elasticsearch.index.query.QueryBuilders 6 | 7 | object SparkOperations { 8 | def esRDD( 9 | sparkContext: SparkContext, 10 | nodes: Seq[String], 11 | clusterName: String, 12 | indexNames: Seq[String], 13 | typeNames: Seq[String], 14 | query: String): ElasticSearchRDD = { 15 | def setQuery(request: SearchRequestBuilder): Unit = { 16 | request.setQuery( 17 | QueryBuilders.constantScoreQuery( 18 | QueryBuilders.queryStringQuery(query) 19 | ) 20 | ) 21 | } 22 | 23 | new ElasticSearchRDD(sparkContext, nodes, clusterName, indexNames, typeNames, setQuery) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/elasticsearch/LocalElasticSearch.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import java.nio.file.Files 4 | import java.util.UUID 5 | 6 | import org.apache.commons.io.FileUtils 7 | import org.elasticsearch.common.settings.Settings 8 | import org.elasticsearch.node.{NodeBuilder, Node} 9 | 10 | class LocalElasticSearch(val clusterName: String = UUID.randomUUID().toString) { 11 | lazy val node = buildNode() 12 | lazy val client = node.client() 13 | val dataDir = Files.createTempDirectory("elasticsearch").toFile 14 | 15 | private var started = false 16 | 17 | def buildNode(): Node = { 18 | val settings = Settings.settingsBuilder() 19 | .put("path.home", dataDir.getAbsolutePath) 20 | .put("path.logs", s"${dataDir.getAbsolutePath}/logs") 21 | .put("path.data", s"${dataDir.getAbsolutePath}/data") 22 | .put("index.store.fs.memory.enabled", true) 23 | .put("index.number_of_shards", 1) 24 | .put("index.number_of_replicas", 0) 25 | .put("cluster.name", clusterName) 26 | .build() 27 | 28 | val instance = NodeBuilder.nodeBuilder().settings(settings).node() 29 | 30 | started = true 31 | 32 | instance 33 | } 34 | 35 | def close(): Unit = { 36 | if (started) { 37 | client.close() 38 | node.close() 39 | } 40 | 41 | try { 42 | FileUtils.forceDelete(dataDir) 43 | } catch { 44 | case e: Exception => 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/Tryout.scala: -------------------------------------------------------------------------------- 1 | import java.nio.file.Files 2 | 3 | import org.apache.commons.io.FileUtils 4 | import org.apache.spark.SparkContext 5 | import org.elasticsearch.common.settings.Settings 6 | import org.elasticsearch.node.NodeBuilder 7 | import org.apache.spark.elasticsearch._ 8 | 9 | object Tryout { 10 | def main(args: Array[String]): Unit = { 11 | val sparkContext = new SparkContext("local[2]", "SparkES") 12 | 13 | val dataDir = Files.createTempDirectory("elasticsearch").toFile 14 | 15 | dataDir.deleteOnExit() 16 | 17 | val settings = Settings.settingsBuilder() 18 | .put("path.home", dataDir.getAbsolutePath) 19 | .put("path.logs", s"${dataDir.getAbsolutePath}/logs") 20 | .put("path.data", s"${dataDir.getAbsolutePath}/data") 21 | .put("index.store.fs.memory.enabled", true) 22 | .put("index.number_of_shards", 1) 23 | .put("index.number_of_replicas", 0) 24 | .put("cluster.name", "SparkES") 25 | .build() 26 | 27 | val node = NodeBuilder.nodeBuilder().settings(settings).node() 28 | 29 | val client = node.client() 30 | 31 | sparkContext 32 | .parallelize(Seq( 33 | ESDocument(ESMetadata("2", "type1", "index1"), """{"name": "John Smith"}"""), 34 | ESDocument(ESMetadata("1", "type1", "index1"), """{"name": "Sergey Shumov"}""") 35 | ), 2) 36 | .saveToES(Seq("localhost"), "SparkES") 37 | 38 | client.admin().cluster().prepareHealth("index1").setWaitForGreenStatus().get() 39 | 40 | val documents = sparkContext.esRDD( 41 | Seq("localhost"), "SparkES", Seq("index1"), Seq("type1"), "name:sergey") 42 | 43 | println(documents.count()) 44 | 45 | documents.foreach(println) 46 | 47 | sparkContext.stop() 48 | 49 | client.close() 50 | 51 | node.close() 52 | 53 | FileUtils.deleteQuietly(dataDir) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/elasticsearch/Tests.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import org.elasticsearch.common.settings.Settings 4 | import org.scalatest.FunSuite 5 | 6 | class Tests extends FunSuite with SparkSuite with ElasticSearchSuite { 7 | test("Reads documents from multiple shards") { 8 | val client = es.client 9 | 10 | val indexName = "index-with-multiple-shards" 11 | 12 | client.admin().indices().prepareCreate(indexName) 13 | .setSettings(Settings.settingsBuilder() 14 | .put("index.number_of_replicas", 0) 15 | .put("index.number_of_shards", 2) 16 | .build() 17 | ) 18 | .get() 19 | 20 | for (i <- 1 to 1000) { 21 | client.prepareIndex(indexName, "foo", i.toString).setSource("{}").get() 22 | } 23 | 24 | client.admin().cluster().prepareHealth(indexName).setWaitForGreenStatus().get() 25 | client.admin().indices().prepareRefresh(indexName).get() 26 | 27 | val rdd = sparkContext.esRDD(Seq("localhost"), es.clusterName, Seq(indexName), Seq("foo"), "*") 28 | 29 | assert(rdd.partitions.length == 2) 30 | assert(rdd.collect().map(_.metadata.id).sorted.toList == (1 to 1000).map(_.toString).sorted.toList) 31 | } 32 | 33 | test("Writes documents to ElasticSearch") { 34 | val client = es.client 35 | 36 | val indexName = "index1" 37 | 38 | sparkContext.parallelize(Seq(1, 2, 3, 4)) 39 | .map(id => ESDocument(ESMetadata(id.toString, "foo", indexName), "{}")) 40 | .saveToES(Seq("localhost"), es.clusterName) 41 | 42 | client.admin().cluster().prepareHealth(indexName).setWaitForGreenStatus().get() 43 | client.admin().indices().prepareRefresh(indexName).get() 44 | 45 | assert(client.prepareGet(indexName, "foo", "1").get().isExists) 46 | assert(client.prepareGet(indexName, "foo", "2").get().isExists) 47 | assert(client.prepareGet(indexName, "foo", "3").get().isExists) 48 | assert(client.prepareGet(indexName, "foo", "4").get().isExists) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spark ↔ ElasticSearch [![Build Status](https://travis-ci.org/SHSE/spark-es.svg?branch=master)](https://travis-ci.org/SHSE/spark-es) 2 | ElasticSearch integration for Apache Spark. 3 | 4 | ## Features 5 | 6 | * Transport client 7 | * Partition per shard 8 | * Data co-location 9 | * Flexible and simple API 10 | 11 | ## Usage 12 | 13 | Add `spark-es` dependency to an SBT configuration file: 14 | 15 | ```SBT 16 | libraryDependencies += "com.github.shse" %% "spark-es" % "1.0.7" 17 | ``` 18 | 19 | Read from ElasticSearch using `query_string` query: 20 | 21 | ```Scala 22 | import org.apache.spark.elasticsearch._ 23 | 24 | val query = "name:john" 25 | 26 | val documents = sparkContext.esRDD(Seq("localhost"), "cluster1", Seq("index1"), Seq("type1"), query) 27 | ``` 28 | 29 | Read from ElasticSearch using `org.elasticsearch.action.search.SearchRequestBuilder`: 30 | 31 | ```Scala 32 | import org.apache.spark.elasticsearch._ 33 | 34 | def getQuery = QueryBuilders.termQuery("name", "john") // Define query as a function to avoid serialization issues 35 | 36 | val documents = 37 | sparkContext.esRDD(Seq("localhost"), "SparkES", Seq("index1"), Seq("type1"), _.setQuery(getQuery)) 38 | ``` 39 | 40 | Save to ElasticSearch: 41 | 42 | ```Scala 43 | import org.apache.spark.elasticsearch._ 44 | 45 | val documents = Seq( 46 | ESDocument(ESMetadata("1", "type1", "index1"), """{"name": "Sergey Shumov"}"""), 47 | ESDocument(ESMetadata("2", "type1", "index1"), """{"name": "John Smith"}""") 48 | ) 49 | 50 | val options = SaveOptions( 51 | saveOperation = SaveOperation.Create, // Do not overwrite existing documents 52 | ignoreConflicts = true // Do not fail if document already exists 53 | ) 54 | 55 | sparkContext 56 | .parallelize(documents, 2) 57 | .saveToES(Seq("localhost"), "cluster1", options) 58 | ``` 59 | 60 | Delete from ElasticSearch: 61 | 62 | ```Scala 63 | import org.apache.spark.elasticsearch._ 64 | 65 | // Using metadata RDD 66 | val metadata = Seq( 67 | ESMetadata("1", "type1", "index1"), 68 | ESMetadata("2", "type1", "index1") 69 | ) 70 | 71 | sparkContext 72 | .parallelize(documents, 2) 73 | .deleteFromES(Seq("localhost"), "cluster1") 74 | 75 | // Using document indices 76 | val ids = Seq("1", "2") 77 | 78 | sparkContext 79 | .parallelize(ids, 2) 80 | .deleteFromES(Seq("localhost"), "cluster1", "index1", "type1") 81 | ``` 82 | 83 | Custom bulk action: 84 | 85 | ```Scala 86 | import org.apache.spark.elasticsearch._ 87 | 88 | val items = Seq("type1" -> "1", "type2" -> "2") 89 | 90 | def handleResponse(response: BulkItemResponse): Unit = 91 | if (response.isFailed) 92 | println(response.getFailure.getStatus) 93 | 94 | def handleDocument(client: Client, bulk: BulkRequestBuilder, document: (String, String): Unit = 95 | bulk.add(client.prepareDelete("index1", document._1, document._2)) 96 | 97 | sparkContext 98 | .parallelize(items, 2) 99 | .bulkToES(Seq("localhost"), "cluster1", handleDocument, handleResponse) 100 | ``` 101 | 102 | ## Building 103 | 104 | Assembly: 105 | 106 | ```Bash 107 | sbt assembly 108 | ``` 109 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache 6 | .history 7 | .lib/ 8 | dist/* 9 | target/ 10 | lib_managed/ 11 | src_managed/ 12 | project/boot/ 13 | project/plugins/project/ 14 | 15 | # Scala-IDE specific 16 | .scala_dependencies 17 | .worksheet 18 | 19 | .idea 20 | 21 | /.settings/ 22 | /.project 23 | 24 | # ****************** ECLIPSE *********************** 25 | 26 | # Directories # 27 | /build/ 28 | /bin/ 29 | target/ 30 | # OS Files # 31 | .DS_Store 32 | *.class 33 | # Package Files # 34 | *.jar 35 | *.war 36 | *.ear 37 | *.db 38 | ###################### 39 | # Windows 40 | ###################### 41 | # Windows image file caches 42 | Thumbs.db 43 | # Folder config file 44 | Desktop.ini 45 | ###################### 46 | # OSX 47 | ###################### 48 | .DS_Store 49 | .svn 50 | # Thumbnails 51 | ._* 52 | # Files that might appear on external disk 53 | .Spotlight-V100 54 | .Trashes 55 | ###################### 56 | # Eclipse 57 | ###################### 58 | *.pydevproject 59 | .project 60 | .metadata 61 | bin/** 62 | tmp/** 63 | tmp/**/* 64 | *.tmp 65 | *.bak 66 | *.swp 67 | *~.nib 68 | local.properties 69 | .classpath 70 | .settings/ 71 | .loadpath 72 | /src/main/resources/rebel.xml 73 | # External tool builders 74 | .externalToolBuilders/ 75 | # Locally stored "Eclipse launch configurations" 76 | *.launch 77 | # CDT-specific 78 | .cproject 79 | # PDT-specific 80 | .buildpath 81 | 82 | # ****************** JET BRAINS IDEA *********************** 83 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion 84 | *.iml 85 | ## Directory-based project format: 86 | .idea/ 87 | # if you remove the above rule, at least ignore the following: 88 | # User-specific stuff: 89 | # .idea/workspace.xml 90 | # .idea/tasks.xml 91 | # .idea/dictionaries 92 | # Sensitive or high-churn files: 93 | # .idea/dataSources.ids 94 | # .idea/dataSources.xml 95 | # .idea/sqlDataSources.xml 96 | # .idea/dynamic.xml 97 | # .idea/uiDesigner.xml 98 | # Gradle: 99 | # .idea/gradle.xml 100 | # .idea/libraries 101 | # Mongo Explorer plugin: 102 | # .idea/mongoSettings.xml 103 | ## File-based project format: 104 | *.ipr 105 | *.iws 106 | ## Plugin-specific files: 107 | # IntelliJ 108 | /out/ 109 | # mpeltonen/sbt-idea plugin 110 | .idea_modules/ 111 | # JIRA plugin 112 | atlassian-ide-plugin.xml 113 | # Crashlytics plugin (for Android Studio and IntelliJ) 114 | com_crashlytics_export_strings.xml 115 | crashlytics.properties 116 | crashlytics-build.properties 117 | 118 | # ****************** JAVA *********************** 119 | *.class 120 | 121 | # Mobile Tools for Java (J2ME) 122 | .mtj.tmp/ 123 | 124 | # Package Files # 125 | *.jar 126 | *.war 127 | *.ear 128 | 129 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 130 | hs_err_pid* 131 | 132 | 133 | 134 | # ****************** SCALA ********************* 135 | *.class 136 | *.log 137 | # sbt specific 138 | .cache 139 | .history 140 | .lib/ 141 | dist/* 142 | target/ 143 | lib_managed/ 144 | src_managed/ 145 | project/boot/ 146 | project/plugins/project/ 147 | # Scala-IDE specific 148 | .scala_dependencies 149 | .worksheet 150 | /.cache-main 151 | /.cache-tests 152 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/elasticsearch/package.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.elasticsearch.action.bulk.{BulkItemResponse, BulkRequestBuilder} 5 | import org.elasticsearch.action.search.SearchRequestBuilder 6 | import org.elasticsearch.client.Client 7 | import org.elasticsearch.index.VersionType 8 | 9 | package object elasticsearch { 10 | 11 | implicit class SparkExtensions(sparkContext: SparkContext) { 12 | def esRDD( 13 | nodes: Seq[String], 14 | clusterName: String, 15 | indexNames: Seq[String], 16 | typeNames: Seq[String], 17 | configure: SearchRequestBuilder => Unit = _ => ()): ElasticSearchRDD = 18 | new ElasticSearchRDD(sparkContext, nodes, clusterName, indexNames, typeNames, configure) 19 | 20 | def esRDD( 21 | nodes: Seq[String], 22 | clusterName: String, 23 | indexNames: Seq[String], 24 | typeNames: Seq[String], 25 | query: String): ElasticSearchRDD = { 26 | SparkOperations.esRDD(sparkContext, nodes, clusterName, indexNames, typeNames, query) 27 | } 28 | } 29 | 30 | implicit class RDDExtensions[T](rdd: RDD[T]) { 31 | def bulkToES( 32 | nodes: Seq[String], 33 | clusterName: String, 34 | handleDocument: (Client, BulkRequestBuilder, T) => Unit, 35 | handleResponse: ResponseHandler = IgnoreFailure, 36 | batchSize: Int = 20, 37 | refreshIndices: Boolean = true): Unit = 38 | RDDOperations.bulkToES(rdd, nodes, clusterName, handleDocument, handleResponse, batchSize, refreshIndices) 39 | } 40 | 41 | implicit class DocumentRDDExtensions(rdd: RDD[ESDocument]) { 42 | def saveToES( 43 | nodes: Seq[String], 44 | clusterName: String, 45 | options: SaveOptions = SaveOptions()): Unit = 46 | RDDOperations.saveToES(rdd, nodes, clusterName, options) 47 | 48 | def deleteFromES( 49 | nodes: Seq[String], 50 | clusterName: String, 51 | options: SaveOptions = SaveOptions()): Unit = 52 | RDDOperations.saveToES(rdd, nodes, clusterName, options) 53 | } 54 | 55 | implicit class PairRDDExtensions(rdd: RDD[(String, String)]) { 56 | def saveToES( 57 | nodes: Seq[String], 58 | clusterName: String, 59 | indexName: String, 60 | typeName: String, 61 | options: SaveOptions = SaveOptions()): Unit = 62 | RDDOperations.saveToES(rdd, nodes, clusterName, indexName, typeName, options) 63 | } 64 | 65 | implicit class IndicesRDDExtensions(rdd: RDD[String]) { 66 | def deleteFromES( 67 | nodes: Seq[String], 68 | clusterName: String, 69 | indexName: String, 70 | typeName: String, 71 | options: DeleteOptions = DeleteOptions()): Unit = { 72 | RDDOperations.deleteFromES(rdd, nodes, clusterName, indexName, typeName, options) 73 | } 74 | } 75 | 76 | implicit class MetadataRDDExtensions(rdd: RDD[ESMetadata]) { 77 | def deleteFromES( 78 | nodes: Seq[String], 79 | clusterName: String, 80 | options: DeleteOptions = DeleteOptions()): Unit = 81 | RDDOperations.deleteFromES(rdd, nodes, clusterName, options) 82 | } 83 | 84 | case class SaveOptions( 85 | batchSize: Int = 20, 86 | useOptimisticLocking: Boolean = false, 87 | ignoreConflicts: Boolean = false, 88 | saveOperation: SaveOperation.SaveOperation = SaveOperation.Index, 89 | refreshAfterSave: Boolean = true, 90 | versionType: Option[VersionType] = None) 91 | 92 | case class DeleteOptions( 93 | batchSize: Int = 20, 94 | useOptimisticLocking: Boolean = false, 95 | ignoreMissing: Boolean = false, 96 | refreshAfterDelete: Boolean = true, 97 | versionType: Option[VersionType] = None) 98 | 99 | case class ElasticSearchBulkFailedException(response: BulkItemResponse) 100 | extends RuntimeException("Failed to process bulk request:\n" + response.getFailureMessage) 101 | 102 | case class ESDocument( 103 | metadata: ESMetadata, 104 | source: String) 105 | 106 | case class ESMetadata( 107 | id: String, 108 | typeName: String, 109 | indexName: String, 110 | routing: Option[String] = None, 111 | version: Option[Long] = None, 112 | parent: Option[String] = None, 113 | timestamp: Option[String] = None) 114 | 115 | case class ElasticSearchResult( 116 | document: ESDocument, 117 | matchedQueries: Seq[String], 118 | innerHits: Map[String, ESDocument], 119 | nodeId: String, 120 | shardId: Int) 121 | 122 | object SaveOperation extends Enumeration { 123 | type SaveOperation = Value 124 | val Create, Index, Update = Value 125 | } 126 | 127 | } 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/elasticsearch/RDDOperations.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.elasticsearch.action.bulk.{BulkItemResponse, BulkRequestBuilder} 5 | import org.elasticsearch.client.Client 6 | import org.elasticsearch.rest.RestStatus 7 | 8 | object RDDOperations { 9 | def deleteFromES( 10 | rdd: RDD[ESMetadata], 11 | nodes: Seq[String], 12 | clusterName: String, 13 | options: DeleteOptions): Unit = { 14 | def handleResponse(response: BulkItemResponse, document: ESDocument): Unit = { 15 | if (response.isFailed && (response.getFailure.getStatus != RestStatus.NOT_FOUND || !options.ignoreMissing)) 16 | throw new ElasticSearchBulkFailedException(response) 17 | } 18 | 19 | def handleDocument(client: Client, bulk: BulkRequestBuilder, document: ESMetadata): Unit = { 20 | val request = client.prepareDelete( 21 | document.indexName, 22 | document.typeName, 23 | document.id 24 | ) 25 | 26 | document.parent.foreach(request.setParent) 27 | document.routing.foreach(request.setRouting) 28 | 29 | if (options.useOptimisticLocking) { 30 | document.version.foreach(request.setVersion) 31 | options.versionType.foreach(request.setVersionType) 32 | } 33 | 34 | bulk.add(request) 35 | } 36 | 37 | bulkToES[ESMetadata]( 38 | rdd, nodes, clusterName, handleDocument, CustomHandler(handleResponse), options.batchSize, options.refreshAfterDelete 39 | ) 40 | } 41 | 42 | def deleteFromES( 43 | rdd: RDD[String], 44 | nodes: Seq[String], 45 | clusterName: String, 46 | indexName: String, 47 | typeName: String, 48 | options: DeleteOptions): Unit = { 49 | deleteFromES(rdd.map(ESMetadata(_, typeName, indexName)), nodes, clusterName, options) 50 | } 51 | 52 | def saveToES( 53 | rdd: RDD[(String,String)], 54 | nodes: Seq[String], 55 | clusterName: String, 56 | indexName: String, 57 | typeName: String, 58 | options: SaveOptions): Unit = { 59 | val documents = rdd.map { case (id, source) => 60 | ESDocument(ESMetadata(id, typeName, indexName), source) 61 | } 62 | 63 | saveToES(documents, nodes, clusterName, options) 64 | } 65 | 66 | def bulkToES[T]( 67 | rdd: RDD[T], 68 | nodes: Seq[String], 69 | clusterName: String, 70 | handleDocument: (Client, BulkRequestBuilder, T) => Unit, 71 | handleResponse: ResponseHandler = IgnoreFailure, 72 | batchSize: Int, 73 | refreshIndices: Boolean): Unit = { 74 | 75 | val indices = rdd.context.accumulableCollection(scala.collection.mutable.TreeSet.empty[String]) 76 | 77 | rdd.foreachPartition { partition => 78 | val client = ElasticSearchRDD.getESClient(nodes, clusterName) 79 | 80 | try { 81 | for (batch <- partition.grouped(batchSize)) { 82 | val bulk = client.prepareBulk() 83 | 84 | for (document <- batch) { 85 | handleDocument(client, bulk, document) 86 | } 87 | 88 | 89 | if (bulk.numberOfActions() > 0) { 90 | val response = bulk.get() 91 | 92 | for ((item, document) <- response.getItems.zip(batch)) { 93 | if (refreshIndices) 94 | indices += item.getIndex 95 | 96 | handleResponse match { 97 | case IgnoreFailure => 98 | case ThrowExceptionOnFailure => throw new ElasticSearchBulkFailedException(item) 99 | case ch: CustomHandler[T] => ch.handler(item, document) 100 | } 101 | } 102 | } 103 | } 104 | } finally { 105 | client.close() 106 | } 107 | } 108 | 109 | if (refreshIndices) 110 | refresh(nodes, clusterName, indices.value.toSeq) 111 | } 112 | 113 | private[elasticsearch] def refresh(nodes: Seq[String], clusterName: String, indexNames: Seq[String]): Unit = { 114 | val client = ElasticSearchRDD.getESClient(nodes, clusterName) 115 | 116 | try { 117 | client.admin().indices().prepareRefresh(indexNames: _*).get() 118 | } finally { 119 | client.close() 120 | } 121 | } 122 | 123 | def saveToES( 124 | rdd: RDD[ESDocument], 125 | nodes: Seq[String], 126 | clusterName: String, 127 | options: SaveOptions): Unit = { 128 | def handleResponse(response: BulkItemResponse, document: ESDocument): Unit = { 129 | if (response.isFailed && (response.getFailure.getStatus != RestStatus.CONFLICT || !options.ignoreConflicts)) 130 | throw new ElasticSearchBulkFailedException(response) 131 | } 132 | 133 | def handleDocument(client: Client, bulk: BulkRequestBuilder, document: ESDocument): Unit = { 134 | options.saveOperation match { 135 | case SaveOperation.Index | SaveOperation.Create => 136 | val request = client.prepareIndex( 137 | document.metadata.indexName, 138 | document.metadata.typeName, 139 | document.metadata.id) 140 | 141 | request.setSource(document.source) 142 | 143 | if (options.saveOperation == SaveOperation.Create) 144 | request.setCreate(true) 145 | 146 | document.metadata.parent.foreach(request.setParent) 147 | document.metadata.routing.foreach(request.setRouting) 148 | document.metadata.timestamp.foreach(request.setTimestamp) 149 | 150 | if (options.useOptimisticLocking) { 151 | document.metadata.version.foreach(request.setVersion) 152 | options.versionType.foreach(request.setVersionType) 153 | } 154 | 155 | bulk.add(request) 156 | 157 | case SaveOperation.Update => 158 | val request = client.prepareUpdate( 159 | document.metadata.indexName, 160 | document.metadata.typeName, 161 | document.metadata.id) 162 | 163 | request.setDoc(document.source) 164 | 165 | document.metadata.parent.foreach(request.setParent) 166 | document.metadata.routing.foreach(request.setRouting) 167 | 168 | bulk.add(request) 169 | } 170 | } 171 | 172 | bulkToES[ESDocument]( 173 | rdd, nodes, clusterName, handleDocument, CustomHandler(handleResponse), options.batchSize, options.refreshAfterSave 174 | ) 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/elasticsearch/ElasticSearchRDD.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.elasticsearch 2 | 3 | import java.net.{InetSocketAddress, InetAddress} 4 | import java.util.concurrent.TimeUnit 5 | 6 | import org.apache.spark.rdd.RDD 7 | import org.apache.spark.{Partition, SparkContext, TaskContext} 8 | import org.elasticsearch.action.search.{SearchRequestBuilder, SearchType} 9 | import org.elasticsearch.client.Client 10 | import org.elasticsearch.client.transport.TransportClient 11 | import org.elasticsearch.cluster.node.DiscoveryNode 12 | import org.elasticsearch.common.settings.Settings 13 | import org.elasticsearch.common.transport.{InetSocketTransportAddress, LocalTransportAddress, TransportAddress} 14 | import org.elasticsearch.common.unit.TimeValue 15 | import org.elasticsearch.search.SearchHit 16 | 17 | import scala.annotation.meta.param 18 | import scala.collection.JavaConverters._ 19 | import scala.concurrent.duration.Duration 20 | 21 | class ElasticSearchRDD( 22 | @(transient @param) sparkContext: SparkContext, 23 | nodes: Seq[String], 24 | clusterName: String, 25 | indexNames: Seq[String], 26 | typeNames: Seq[String], 27 | configureSearchRequest: SearchRequestBuilder => Unit, 28 | scrollDuration: Duration = Duration(30, TimeUnit.SECONDS)) 29 | extends RDD[ESDocument](sparkContext, Nil) { 30 | 31 | import ElasticSearchRDD._ 32 | 33 | override def compute(split: Partition, context: TaskContext): Iterator[ESDocument] = { 34 | val partition = split.asInstanceOf[ElasticSearchPartition] 35 | 36 | val client = getESClientByAddresses(List(partition.node), clusterName) 37 | 38 | val requestBuilder = client.prepareSearch(indexNames: _*) 39 | .setTypes(typeNames: _*) 40 | .setPreference(s"_shards:${partition.shardId};_local") 41 | .setSearchType(SearchType.SCAN) 42 | .setScroll(TimeValue.timeValueMillis(scrollDuration.toMillis)) 43 | 44 | configureSearchRequest(requestBuilder) 45 | 46 | val scrollId = requestBuilder.get().getScrollId 47 | 48 | TaskContext.get().addTaskCompletionListener { _ => 49 | client.prepareClearScroll().addScrollId(scrollId).get() 50 | client.close() 51 | } 52 | 53 | new DocumentIterator(scrollId, client, TimeValue.timeValueMillis(scrollDuration.toMillis)) 54 | } 55 | 56 | override protected def getPreferredLocations(split: Partition): Seq[String] = { 57 | val endpoint = split.asInstanceOf[ElasticSearchPartition].node 58 | 59 | endpoint match { 60 | case SocketEndpoint(address, _) => Seq(address) 61 | case _ => Seq.empty 62 | } 63 | } 64 | 65 | override protected def getPartitions: Array[Partition] = { 66 | val client = getESClient(nodes, clusterName) 67 | 68 | try { 69 | var partitionsByHost = Map.empty[String, Int] 70 | 71 | def selectNode(nodes: Array[DiscoveryNode]): DiscoveryNode = { 72 | val (selectedNode, assignedPartitionCount) = nodes 73 | .map(node => node -> partitionsByHost.getOrElse(node.getId, 0)) 74 | .sortBy { case (node, count) => count } 75 | .head 76 | 77 | partitionsByHost += selectedNode.getId -> (assignedPartitionCount + 1) 78 | 79 | selectedNode 80 | } 81 | 82 | val metadata = client.admin().cluster().prepareSearchShards(indexNames: _*).get() 83 | 84 | val nodes = metadata.getNodes.map(node => node.getId -> node).toMap 85 | 86 | val partitions = metadata.getGroups 87 | .flatMap(group => group.getShards.map(group.getIndex -> _)) 88 | .map { case (index, shard) => (index, shard.getId) -> nodes(shard.currentNodeId) } 89 | .groupBy { case (indexAndShard, _) => indexAndShard } 90 | .mapValues(_.map(_._2)) 91 | .mapValues(selectNode) 92 | .iterator 93 | .zipWithIndex 94 | .map { case (((indexName, shardId), node), index) => 95 | new ElasticSearchPartition(id, index, indexName, transportAddressToEndpoint(node.getAddress), shardId) 96 | } 97 | .map(_.asInstanceOf[Partition]) 98 | .toArray 99 | 100 | if (partitions.isEmpty) { 101 | logWarning("Found no partitions for indices: " + indexNames.mkString(", ")) 102 | } else { 103 | logInfo(s"Found ${partitions.length} partition(s): " + partitions.mkString(", ")) 104 | } 105 | 106 | partitions 107 | } finally { 108 | client.close() 109 | } 110 | } 111 | } 112 | 113 | object ElasticSearchRDD { 114 | def getESClient(nodes: Seq[String], clusterName: String): Client = { 115 | val endpoints = nodes.map(_.split(':')).map { 116 | case Array(host, port) => SocketEndpoint(host, port.toInt) 117 | case Array(host) => SocketEndpoint(host, 9300) 118 | } 119 | 120 | getESClientByAddresses(endpoints, clusterName) 121 | } 122 | 123 | def endpointToTransportAddress(endpoint: Endpoint): TransportAddress = endpoint match { 124 | case LocalEndpoint(id) => new LocalTransportAddress(id) 125 | case SocketEndpoint(address, port) => new InetSocketTransportAddress(InetAddress.getByName(address), port) 126 | } 127 | 128 | def transportAddressToEndpoint(address: TransportAddress): Endpoint = address match { 129 | case socket: InetSocketTransportAddress => 130 | SocketEndpoint(socket.address().getHostName, socket.address().getPort) 131 | 132 | case local: LocalTransportAddress => LocalEndpoint(local.id()) 133 | 134 | case _ => throw new RuntimeException("Unsupported transport address") 135 | } 136 | 137 | def getESClientByAddresses(endpoints: Seq[Endpoint], clusterName: String): TransportClient = { 138 | val settings = Map("cluster.name" -> clusterName) 139 | val esSettings = Settings.settingsBuilder().put(settings.asJava).build() 140 | val client = TransportClient.builder().settings(esSettings).build() 141 | 142 | val addresses = endpoints.map(endpointToTransportAddress) 143 | 144 | client.addTransportAddresses(addresses: _*) 145 | 146 | client 147 | } 148 | 149 | class DocumentIterator(var scrollId: String, client: Client, scrollDuration: TimeValue) extends Iterator[ESDocument] { 150 | private val batch = scala.collection.mutable.Queue.empty[ESDocument] 151 | 152 | def searchHitToDocument(hit: SearchHit): ESDocument = { 153 | ESDocument( 154 | ESMetadata( 155 | hit.getId, 156 | hit.getType, 157 | hit.getIndex, 158 | None, 159 | Some(hit.getVersion), 160 | Option(hit.field("_parent")).map(_.getValue[String]), 161 | Option(hit.field("_timestamp")).map(_.getValue[String]) 162 | ), 163 | hit.getSourceAsString 164 | ) 165 | } 166 | 167 | override def hasNext: Boolean = { 168 | if (batch.nonEmpty) 169 | true 170 | else { 171 | val response = client.prepareSearchScroll(scrollId).setScroll(scrollDuration).get() 172 | val hits = response.getHits.hits() 173 | 174 | scrollId = response.getScrollId 175 | 176 | if (hits.isEmpty) { 177 | false 178 | } else { 179 | hits.iterator.foreach(item => batch.enqueue(searchHitToDocument(item))) 180 | true 181 | } 182 | } 183 | } 184 | 185 | override def next(): ESDocument = batch.dequeue() 186 | } 187 | 188 | sealed abstract class Endpoint 189 | 190 | case class SocketEndpoint(address: String, port: Int) extends Endpoint 191 | 192 | case class LocalEndpoint(id: String) extends Endpoint 193 | 194 | class ElasticSearchPartition( 195 | rddId: Int, 196 | override val index: Int, 197 | val indexName: String, 198 | val node: Endpoint, 199 | val shardId: Int) extends Partition { 200 | override def hashCode(): Int = 41 * (41 + rddId) + index 201 | 202 | override def toString = s"ElasticSearchPartition($index, $indexName, $node, $shardId)" 203 | } 204 | 205 | } 206 | 207 | 208 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | --------------------------------------------------------------------------------