├── .gitignore ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── insightedge-cli ├── pom.xml └── src │ └── main │ └── java │ └── org │ └── insightedge │ └── cli │ └── commands │ ├── I9ECommandFactory.java │ ├── I9EDemoCommand.java │ └── I9EMainCommand.java ├── insightedge-core ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── insightedge │ │ │ ├── internal │ │ │ └── utils │ │ │ │ ├── ClassLoaderUtils.java │ │ │ │ └── SparkSessionUtils.java │ │ │ └── spark │ │ │ ├── SparkSessionProvider.java │ │ │ ├── SparkSessionProviderFactoryBean.java │ │ │ └── japi │ │ │ └── JBucketedGridModel.java │ └── scala │ │ └── org │ │ ├── apache │ │ └── spark │ │ │ └── sql │ │ │ └── insightedge │ │ │ ├── DataFrameImplicits.scala │ │ │ ├── DataFrameSchema.scala │ │ │ ├── DefaultSource.scala │ │ │ ├── GeospatialImplicits.scala │ │ │ ├── expression │ │ │ ├── GeoContains.scala │ │ │ ├── GeoExpression.scala │ │ │ ├── GeoIntersects.scala │ │ │ └── GeoWithin.scala │ │ │ ├── filter │ │ │ ├── GeoContains.scala │ │ │ ├── GeoIntersects.scala │ │ │ └── GeoWithin.scala │ │ │ ├── relation │ │ │ ├── InsightEdgeAbstractRelation.scala │ │ │ ├── InsightEdgeClassRelation.scala │ │ │ ├── InsightEdgeDocumentRelation.scala │ │ │ └── SchemaInference.scala │ │ │ └── udt │ │ │ ├── CircleUDT.scala │ │ │ ├── GeoUDTRegistration.scala │ │ │ ├── GeoUtils.scala │ │ │ ├── LineStringUDT.scala │ │ │ ├── PointUDT.scala │ │ │ ├── PolygonUDT.scala │ │ │ └── RectangleUDT.scala │ │ └── insightedge │ │ ├── scala │ │ └── annotation.scala │ │ └── spark │ │ ├── context │ │ ├── InsightEdgeConfig.scala │ │ └── InsightEdgeSparkContext.scala │ │ ├── impl │ │ ├── InsightEdgePartition.scala │ │ ├── InsightEdgeQueryIterator.scala │ │ └── ProfilingIterator.scala │ │ ├── implicits.scala │ │ ├── ml │ │ └── MLImplicits.scala │ │ ├── mllib │ │ ├── MLInstance.scala │ │ └── MLlibImplicits.scala │ │ ├── model │ │ └── BucketedGridModel.scala │ │ ├── rdd │ │ ├── InsightEdgeAbstractRDD.scala │ │ ├── InsightEdgeDocumentRDD.scala │ │ ├── InsightEdgeRDD.scala │ │ ├── InsightEdgeRDDFunctions.scala │ │ └── InsightEdgeSqlRDD.scala │ │ ├── streaming │ │ ├── SaveDStreamToGridExtension.scala │ │ └── StreamingImplicits.scala │ │ └── utils │ │ ├── BucketIdSeq.scala │ │ ├── GridProxyFactory.scala │ │ ├── GridProxyUtils.scala │ │ ├── GridTopologyAllocator.scala │ │ ├── InsightEdgeConstants.scala │ │ ├── LocalCache.scala │ │ ├── Logging.scala │ │ ├── LookupPartitionTask.java │ │ ├── Profiler.scala │ │ └── StringCompiler.scala │ └── test │ ├── java │ └── org │ │ ├── apache │ │ └── spark │ │ │ └── sql │ │ │ └── insightedge │ │ │ ├── JAddress.java │ │ │ ├── JPerson.java │ │ │ └── JSpatialData.java │ │ └── insightedge │ │ ├── TestCluster.java │ │ └── spark │ │ └── rdd │ │ ├── JBucketedData.java │ │ └── JData.java │ ├── resources │ ├── cluster-member-config.xml │ ├── cluster-test-config.xml │ └── data │ │ └── sample_libsvm_data.txt │ └── scala │ └── org │ ├── apache │ └── spark │ │ └── sql │ │ └── insightedge │ │ ├── dataframe │ │ ├── DataFrameCreateSpec.scala │ │ ├── DataFrameGetWithStringSpec.scala │ │ ├── DataFrameNestedQuerySpec.scala │ │ ├── DataFramePersistSpec.scala │ │ ├── DataFrameQuerySpec.scala │ │ ├── DataFrameRelationQuerySpec.scala │ │ └── DataFrameSpatialSpec.scala │ │ ├── dataset │ │ ├── DataSetCreateSpec.scala │ │ ├── DataSetNestedQuerySpec.scala │ │ ├── DataSetPersistSpec.scala │ │ ├── DataSetQuerySpec.scala │ │ └── DataSetSpatialSpec.scala │ │ └── model │ │ ├── Address.scala │ │ ├── AllClassesSupport.scala │ │ ├── DummyPerson.scala │ │ ├── NotGridModel.scala │ │ ├── Person.scala │ │ ├── SpatialData.scala │ │ └── SpatialEmbeddedData.scala │ └── insightedge │ └── spark │ ├── fixture │ ├── InsightEdge.scala │ └── InsightEdgeStreaming.scala │ ├── ml │ └── InsightEdgeMlSpec.scala │ ├── mllib │ └── InsightEdgeMLlibSpec.scala │ ├── rdd │ ├── BucketedData.scala │ ├── BucketedGridString.scala │ ├── Data.scala │ ├── GridString.scala │ ├── InsightEdgeRDDSpec.scala │ └── InsightEdgeSqlRDDSpec.scala │ ├── streaming │ └── InsightEdgeStreamingSpec.scala │ └── utils │ ├── GridTopologyAllocatorSpec.scala │ ├── InsightEdgeUtilsSpec.scala │ ├── StringCompilerSpec.scala │ └── Tags.scala ├── insightedge-examples ├── Jenkinsfile ├── LICENSE.md ├── README.md ├── build.sbt ├── doc │ └── images │ │ ├── idea-configuration.png │ │ └── idea-configuration_1.png ├── pom.xml ├── project │ └── assembly.sbt ├── python │ └── sf_salaries.py └── src │ ├── main │ └── scala │ │ └── org │ │ └── insightedge │ │ └── examples │ │ ├── basic │ │ ├── LoadDataFrame.scala │ │ ├── LoadDataset.scala │ │ ├── LoadRdd.scala │ │ ├── LoadRddWithSql.scala │ │ ├── PersistDataFrame.scala │ │ ├── PersistDataset.scala │ │ ├── Product.scala │ │ ├── SaveRdd.scala │ │ ├── SaveRddNewContextInitApi.scala │ │ └── SaveRddNewSessionInitApi.scala │ │ ├── geospatial │ │ ├── GasStation.scala │ │ ├── LoadDataFrameWithGeospatial.scala │ │ └── LoadRddWithGeospatial.scala │ │ ├── mllib │ │ └── SaveAndLoadMLModel.scala │ │ ├── offheap │ │ └── OffHeapPersistence.scala │ │ └── streaming │ │ ├── HashTag.scala │ │ ├── TopTags.scala │ │ └── TwitterPopularTags.scala │ └── test │ └── scala │ └── org │ └── insightedge │ └── examples │ └── InsightedgeExamplesSpec.scala ├── insightedge-integration-tests ├── jobs │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── org │ │ │ └── insightedge │ │ │ └── spark │ │ │ └── jobs │ │ │ ├── Country.java │ │ │ └── Person.java │ │ └── scala │ │ └── org │ │ └── insightedge │ │ └── spark │ │ └── jobs │ │ ├── ContinuosLoadRdd.scala │ │ ├── Data.scala │ │ ├── LoadDataFrame.scala │ │ ├── LoadRdd.scala │ │ ├── Product.scala │ │ ├── SaveRdd.scala │ │ └── StreamExample.scala ├── pom.xml └── tests │ ├── pom.xml │ └── src │ └── test │ ├── resources │ ├── docker │ │ ├── demo-mode │ │ │ ├── Dockerfile │ │ │ └── bootstrap.sh │ │ └── failover │ │ │ └── Dockerfile │ └── log4j.properties │ └── scala │ └── org │ └── insightedge │ └── spark │ ├── examples │ └── ExamplesSubmitSpec.scala │ ├── failover │ ├── DatagridNodeFailOverLoadRddSpec.scala │ ├── DatagridNodeFailOverSaveRddSpec.scala │ ├── MachineFailOverLoadRddSpec.scala │ ├── MachineFailOverSaveRddSpec.scala │ └── MachineFailOverStreamingSpec.scala │ ├── fixture │ └── InsightedgeDemoModeDocker.scala │ ├── jobs │ └── LoadDataFrameSpec.scala │ ├── utils │ ├── BuildUtils.scala │ ├── DockerUtils.scala │ ├── FsUtils.scala │ ├── InsightEdgeAdminUtils.scala │ ├── ProcessUtils.scala │ ├── RestUtils.scala │ ├── Tags.scala │ └── TestUtils.scala │ └── zeppelin │ └── ZeppelinNotebooksSpec.scala ├── insightedge-packager ├── pom.xml ├── src │ └── main │ │ ├── assembly │ │ └── tests │ │ │ └── integration-tests.xml │ │ ├── resources │ │ └── insightedge │ │ │ ├── bin │ │ │ ├── insightedge-pyspark │ │ │ ├── insightedge-pyspark.cmd │ │ │ ├── insightedge-shell │ │ │ ├── insightedge-shell.cmd │ │ │ ├── insightedge-submit │ │ │ ├── insightedge-submit.cmd │ │ │ ├── shell-init.py │ │ │ └── shell-init.scala │ │ │ ├── conf │ │ │ ├── insightedge-env.cmd │ │ │ ├── insightedge-env.sh │ │ │ └── spark_log4j.properties │ │ │ ├── data │ │ │ ├── montgomery_schools.json │ │ │ └── sf_salaries_sample.json │ │ │ ├── spark │ │ │ └── conf │ │ │ │ ├── spark-env.cmd │ │ │ │ └── spark-env.sh │ │ │ ├── winutils │ │ │ └── hadoop-winutils-2.6.0.zip │ │ │ └── zeppelin │ │ │ ├── bin │ │ │ ├── common.cmd │ │ │ └── interpreter.cmd │ │ │ ├── conf │ │ │ ├── zeppelin-env.cmd │ │ │ ├── zeppelin-env.sh │ │ │ └── zeppelin-site.xml │ │ │ ├── interpreter │ │ │ └── spark │ │ │ │ └── interpreter-setting.json │ │ │ └── notebook │ │ │ ├── INSIGHTEDGE-BASIC │ │ │ └── note.json │ │ │ ├── INSIGHTEDGE-GEOSPATIAL │ │ │ └── note.json │ │ │ └── INSIGHTEDGE-PYTHON │ │ │ └── note.json │ │ └── scala │ │ └── org │ │ └── insightedge │ │ └── spark │ │ └── packager │ │ ├── Launcher.scala │ │ └── Utils.scala ├── updateGitInfo.bat └── updateGitInfo.sh ├── insightedge-zeppelin ├── pom.xml └── src │ └── main │ └── java │ └── org │ └── apache │ └── zeppelin │ └── insightedge │ └── CompilingInterpreter.java └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | #*.jar 8 | *.war 9 | *.ear 10 | 11 | # IDE garbage 12 | *.iml 13 | *.ipr 14 | *.iws 15 | *.project 16 | *.classpath 17 | 18 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 19 | hs_err_pid* 20 | 21 | # Ignore idea settings 22 | .idea/ 23 | **/.idea/* 24 | **/*/*.iml 25 | **/.idea/* 26 | out/ 27 | bin/* 28 | 29 | # eclipse specific git ignore 30 | *.pydevproject 31 | .project 32 | .metadata 33 | bin/** 34 | tmp/** 35 | tmp/**/* 36 | *.tmp 37 | *.bak 38 | *.swp 39 | *~.nib 40 | local.properties 41 | .classpath 42 | .settings/ 43 | .loadpath 44 | 45 | # Ignore target directories 46 | target/ 47 | **/target/* 48 | 49 | #gigaspaces 50 | gigaspaces/* 51 | transaction.log 52 | */xap/* 53 | ======= 54 | ### Scala template 55 | *.class 56 | *.log 57 | 58 | # sbt specific 59 | .cache 60 | .history 61 | .lib/ 62 | lib/ 63 | dist/* 64 | target/ 65 | lib_managed/ 66 | src_managed/ 67 | project/boot/ 68 | project/plugins/project/ 69 | 70 | # Scala-IDE specific 71 | .scala_dependencies 72 | .worksheet 73 | 74 | 75 | # Created by .ignore support plugin (hsz.mobi) 76 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to InsightEdge 2 | 3 | ## Issue Reports 4 | 5 | Issues are reported and tracked via JIRA - https://xap-issues.atlassian.net 6 | 7 | ## Pull Requests 8 | 9 | Make sure you submit a [Contributor License Agreement](https://xap.github.io/XAP_CLA.pdf) before you submit your first pull request. 10 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InsightEdge 2 | 3 | **Documentation:** [User Guide](http://insightedge.io/docs/010/index.html)
4 | **Community:** [Slack Channel](http://insightedge-slack.herokuapp.com/), [StackOverflow tag](http://stackoverflow.com/questions/tagged/insightedge), [Email](mailto:hello@insightedge.io)
5 | **Contributing:** [Contribution Guide](https://github.com/InsightEdge/insightedge/blob/branch-1.0/CONTRIBUTING.md)
6 | **Issue Tracker:** [Jira](https://insightedge.atlassian.net)
7 | **License:** [Apache 2.0](https://github.com/InsightEdge/insightedge/blob/master/LICENSE.md) 8 | 9 | 10 | **InsightEdge** is a Spark distribution on top of in-memory [Data Grid](https://github.com/InsightEdge/insightedge-datagrid). A single platform for analytical and transactional workloads. 11 | 12 | ## Features 13 | * Exposes Data Grid as Spark RDDs 14 | * Saves Spark RDDs to Data Grid 15 | * Full DataFrames and Dataset API support with persistence 16 | * Geospatial API for RDD and DataFrames. Geospatial indexes. 17 | * Transparent integration with SparkContext using Scala implicits 18 | * Data Grid side filtering with ability apply indexes 19 | * Running SQL queries in Spark over Data Grid 20 | * Data locality between Spark and Data Grid nodes 21 | * Storing MLlib models in Data Grid 22 | * Continuously saving Spark Streaming computation to Data Grid 23 | * Off-Heap persistence 24 | * Interactive Web Notebook 25 | * Python support 26 | 27 | ## Building InsightEdge 28 | 29 | InsightEdge is built using [Apache Maven](https://maven.apache.org/). 30 | 31 | First, compile and install InsightEdge Core libraries: 32 | 33 | ```bash 34 | # without unit tests 35 | mvn clean install -DskipTests=true 36 | 37 | # with unit tests 38 | mvn clean install 39 | ``` 40 | 41 | To build InsightEdge zip distribution you need the following binary dependencies: 42 | 43 | * [insightedge-datagrid 12.3.0](https://xap.github.io/): download a copy of the XAP 12.x Open Source Edition 44 | * [insightedge-examples](https://github.com/InsightEdge/insightedge-examples): use the same branch as in this repo, find build instructions in repository readme 45 | * [insightedge-zeppelin](https://github.com/InsightEdge/insightedge-zeppelin): use the same branch as in this repo, run `./dev/change_scala_version.sh 2.11`, then build with `mvn clean install -DskipTests -P spark-2.1 -P scala-2.11 -P build-distr -Dspark.version=2.1.1` 46 | * [Apache Spark 2.3.0](http://spark.apache.org/downloads.html): download zip 47 | 48 | Package InsightEdge distribution: 49 | 50 | ```bash 51 | mvn clean package -P package-open -DskipTests=true -Ddist.spark= -Ddist.xap=file:/// -Ddist.zeppelin= -Ddist.examples.target= 52 | ``` 53 | 54 | The archive is generated under `insightedge-packager/target/open` directory. The archive content is under `insightedge-packager/target/contents-community`. 55 | 56 | To run integration tests refer to the [wiki page](https://github.com/InsightEdge/insightedge/wiki/Integration-tests) 57 | 58 | ## Quick Start 59 | 60 | Build the project and start InsightEdge demo mode with 61 | ```bash 62 | cd insightedge-packager/target/contents-community 63 | ./bin/insightedge -demo 64 | ``` 65 | 66 | It starts Zeppelin at http://127.0.0.1:9090 with InsightEdge tutorial and example notebooks you can play with. The full documentation is available at [website](http://insightedge.io/docs/010/index.html). 67 | -------------------------------------------------------------------------------- /insightedge-cli/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | insightedge-package 7 | org.gigaspaces.insightedge 8 | 16.4.0-SNAPSHOT 9 | ../pom.xml 10 | 11 | 4.0.0 12 | 13 | insightedge-cli 14 | 15 | 16 | 2.2.1 17 | 18 | 19 | 20 | 21 | org.gigaspaces 22 | xap-cli 23 | ${project.version} 24 | 25 | 26 | 27 | 28 | insightedge-cli 29 | 30 | 31 | maven-compiler-plugin 32 | 33 | 1.8 34 | 1.8 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /insightedge-cli/src/main/java/org/insightedge/cli/commands/I9ECommandFactory.java: -------------------------------------------------------------------------------- 1 | package org.insightedge.cli.commands; 2 | 3 | import com.gigaspaces.start.GsCommandFactory; 4 | import com.gigaspaces.start.JavaCommandBuilder; 5 | 6 | public class I9ECommandFactory extends GsCommandFactory { 7 | public static void main(String[] args) { 8 | execute(args, new I9ECommandFactory()); 9 | } 10 | 11 | protected JavaCommandBuilder cli() { 12 | super.cli(); 13 | command.mainClass("org.insightedge.cli.commands.I9EMainCommand"); 14 | // Class path: 15 | command.classpathFromEnv("INSIGHTEDGE_CLASSPATH"); 16 | appendOshiClassPath(); 17 | 18 | return command; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /insightedge-cli/src/main/java/org/insightedge/cli/commands/I9EMainCommand.java: -------------------------------------------------------------------------------- 1 | package org.insightedge.cli.commands; 2 | 3 | import org.gigaspaces.cli.CliExecutor; 4 | import org.gigaspaces.cli.CommandsSet; 5 | import org.gigaspaces.cli.commands.*; 6 | import picocli.CommandLine.*; 7 | 8 | @Command(headerHeading = I9EMainCommand.HEADER) 9 | public class I9EMainCommand extends XapMainCommand { 10 | public static final String HEADER = 11 | "@|green _____ _ _ _ ______ _ |@%n"+ 12 | "@|green |_ _| (_) | | | | | ____| | | |@%n"+ 13 | "@|green | | _ __ ___ _ __ _| |__ | |_| |__ __| | __ _ ___ |@%n"+ 14 | "@|green | | | '_ \\/ __| |/ _` | '_ \\| __| __| / _` |/ _` |/ _ \\|@%n"+ 15 | "@|green _| |_| | | \\__ \\ | (_| | | | | |_| |___| (_| | (_| | __/|@%n"+ 16 | "@|green |_____|_| |_|___/_|\\__, |_| |_|\\__|______\\__,_|\\__, |\\___||@%n"+ 17 | "@|green __/ | __/ | |@%n"+ 18 | "@|green |___/ |___/ |@%n" + 19 | "%n"; 20 | 21 | public static void main(String[] args) { 22 | CliExecutor.execute(new I9EMainCommand(), args); 23 | } 24 | 25 | @Override 26 | public CommandsSet getSubCommands() { 27 | CommandsSet result = new CommandsSet(super.getSubCommands()); 28 | result.add(new I9EDemoCommand()); 29 | return result; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /insightedge-core/src/main/java/org/insightedge/internal/utils/ClassLoaderUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.insightedge.internal.utils; 17 | 18 | import com.gigaspaces.api.InternalApi; 19 | import com.gigaspaces.start.ClasspathBuilder; 20 | import com.gigaspaces.start.SystemLocations; 21 | import org.jini.rio.boot.ServiceClassLoader; 22 | 23 | import java.net.MalformedURLException; 24 | import java.nio.file.Path; 25 | import java.util.function.Predicate; 26 | 27 | /** 28 | * @author Niv Ingberg 29 | * @since 14.2 30 | */ 31 | @InternalApi 32 | public class ClassLoaderUtils { 33 | public static ClasspathBuilder getSparkClassPath() { 34 | return getSparkClassPath(ClassLoaderUtils::sparkJarsFilter); 35 | } 36 | 37 | public static ClasspathBuilder getSparkClassPath(Predicate sparkJarsFilter) { 38 | return new ClasspathBuilder() 39 | //.appendPlatformJars("scala") 40 | .appendJars(SystemLocations.singleton().sparkHome().resolve("jars"), sparkJarsFilter); 41 | } 42 | 43 | private static boolean sparkJarsFilter(Path path) { 44 | String jarName = path.getFileName().toString(); 45 | return !jarName.startsWith("xerces"); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /insightedge-core/src/main/java/org/insightedge/internal/utils/SparkSessionUtils.java: -------------------------------------------------------------------------------- 1 | package org.insightedge.internal.utils; 2 | 3 | import com.gigaspaces.api.InternalApi; 4 | import org.apache.spark.TaskContext; 5 | import org.apache.spark.sql.SparkSession; 6 | import scala.Option; 7 | 8 | import java.util.concurrent.ExecutionException; 9 | import java.util.concurrent.Executors; 10 | import java.util.concurrent.Future; 11 | 12 | /** 13 | * @author Niv Ingberg 14 | * @since 14.2 15 | */ 16 | @InternalApi 17 | public class SparkSessionUtils { 18 | public static Option getDefaultSparkSession() { 19 | Option session = SparkSession.getDefaultSession(); 20 | if (session.isDefined()) 21 | return session; 22 | if (TaskContext.get() != null) { 23 | Future> future = Executors.newSingleThreadExecutor().submit(SparkSession::getDefaultSession); 24 | try { 25 | return future.get(); 26 | } catch (InterruptedException e) { 27 | Thread.currentThread().interrupt(); 28 | throw new RuntimeException("Interrupted while getting default spark session", e); 29 | } catch (ExecutionException e) { 30 | throw new RuntimeException("Failed to default spark session", e.getCause()); 31 | } 32 | } 33 | return Option.empty(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /insightedge-core/src/main/java/org/insightedge/spark/SparkSessionProviderFactoryBean.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.insightedge.spark; 17 | 18 | import org.springframework.beans.factory.FactoryBean; 19 | 20 | import java.util.Map; 21 | 22 | /*** 23 | * @author Niv Ingberg 24 | * @since 14.2 25 | */ 26 | public class SparkSessionProviderFactoryBean implements FactoryBean { 27 | 28 | private final SparkSessionProvider.Builder builder = new SparkSessionProvider.Builder(); 29 | private SparkSessionProvider instance; 30 | 31 | @Override 32 | public SparkSessionProvider getObject() throws Exception { 33 | if (instance == null) 34 | instance = builder.create(); 35 | return instance; 36 | } 37 | 38 | @Override 39 | public Class getObjectType() { 40 | return SparkSessionProvider.class; 41 | } 42 | 43 | @Override 44 | public boolean isSingleton() { 45 | return true; 46 | } 47 | 48 | public void setMaster(String master) { 49 | builder.master(master); 50 | } 51 | 52 | public void setConfigOptions(Map configOptions) { 53 | builder.configOptions(configOptions); 54 | } 55 | 56 | public void setEnableHiveSupport(boolean enableHiveSupport) { 57 | builder.enableHiveSupport(enableHiveSupport); 58 | } 59 | 60 | public void setLogLevel(String logLevel) { 61 | builder.logLevel(logLevel); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /insightedge-core/src/main/java/org/insightedge/spark/japi/JBucketedGridModel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.japi; 18 | 19 | import com.gigaspaces.annotation.pojo.SpaceIndex; 20 | import com.gigaspaces.metadata.index.SpaceIndexType; 21 | import org.insightedge.spark.model.BucketedGridModel; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * A port of {@link BucketedGridModel} for Java API. 27 | * 28 | * @author Oleksiy_Dyagilev 29 | */ 30 | public class JBucketedGridModel implements BucketedGridModel, Serializable { 31 | 32 | private Integer metaBucketId; 33 | 34 | public Integer metaBucketId() { 35 | return metaBucketId; 36 | } 37 | 38 | @SpaceIndex(type = SpaceIndexType.EXTENDED) 39 | public Integer getMetaBucketId() { 40 | return metaBucketId; 41 | } 42 | 43 | public void setMetaBucketId(Integer metaBucketId) { 44 | this.metaBucketId = metaBucketId; 45 | } 46 | 47 | public void metaBucketId_$eq(Integer metaBucketId) { 48 | this.metaBucketId = metaBucketId; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/DataFrameImplicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge 18 | 19 | import org.apache.spark.sql.types.{Metadata, MetadataBuilder} 20 | import org.apache.spark.sql._ 21 | import org.insightedge.internal.utils.SparkSessionUtils 22 | import org.insightedge.spark.implicits.basic._ 23 | 24 | import scala.reflect._ 25 | 26 | /** 27 | * @author Danylo_Hurin. 28 | */ 29 | trait DataFrameImplicits { 30 | 31 | val InsightEdgeFormat = "org.apache.spark.sql.insightedge" 32 | 33 | def nestedClass[R: ClassTag]: Metadata = { 34 | nestedClassName(classTag[R].runtimeClass.getName) 35 | } 36 | 37 | def nestedClassName(clazz: String): Metadata = { 38 | new MetadataBuilder().putString("class", clazz).build() 39 | } 40 | 41 | implicit class DataFrameReaderWrapper(val reader: DataFrameReader) { 42 | 43 | def grid(collection: String): DataFrame = { 44 | reader.format(InsightEdgeFormat).load(collection) 45 | } 46 | 47 | def grid[R: ClassTag]: DataFrame = { 48 | reader.format(InsightEdgeFormat).option("class", classTag[R].runtimeClass.getName).load() 49 | } 50 | } 51 | 52 | implicit class DataFrameWriterWrapper(val writer: DataFrameWriter[_]) { 53 | 54 | def grid(collection: String) = { 55 | writer.format(InsightEdgeFormat).save(collection) 56 | } 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/DataFrameSchema.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge 18 | 19 | import com.gigaspaces.metadata.StorageType 20 | import org.insightedge.scala.annotation 21 | import annotation._ 22 | import org.apache.spark.sql.types.StructType 23 | 24 | import scala.beans.BeanProperty 25 | 26 | /** 27 | * Stores the dataframe schema in space when dataframe is persisted. 28 | * Is required to be able to read the dataframe back into spark with it's schema. 29 | * 30 | * @param collection the name of the type of space documents in space 31 | * @param schema the schema of dataframe being persisted 32 | */ 33 | class DataFrameSchema( 34 | @BeanProperty 35 | @SpaceId(autoGenerate = false) 36 | var collection: String, 37 | 38 | @BeanProperty 39 | @SpaceStorageType(storageType = StorageType.BINARY) 40 | var schema: StructType 41 | ) { 42 | 43 | def this() = this(null, null) 44 | 45 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/GeospatialImplicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge 18 | 19 | import org.apache.spark.sql.catalyst.expressions.Literal 20 | import org.apache.spark.sql.{ColumnName, Column} 21 | import org.apache.spark.sql.functions._ 22 | import org.apache.spark.sql.insightedge.expression.{GeoWithin, GeoContains, GeoIntersects} 23 | import org.apache.spark.sql.types.{DataType, ObjectType} 24 | import org.openspaces.spatial.shapes.Shape 25 | 26 | /** 27 | * @author Danylo_Hurin. 28 | */ 29 | trait GeospatialImplicits { 30 | 31 | implicit class ColumnWrapper(val column: Column) { 32 | def geoIntersects(other: Column): Column = new Column(GeoIntersects(column.expr, lit(other).expr)) 33 | 34 | def geoIntersects(shape: Shape): Column = this.geoIntersects(typedLit(shape, new ObjectType(classOf[Shape]))) 35 | 36 | def geoContains(other: Column): Column = new Column(GeoContains(column.expr, lit(other).expr)) 37 | 38 | def geoContains(shape: Shape): Column = this.geoContains(typedLit(shape, new ObjectType(classOf[Shape]))) 39 | 40 | def geoWithin(other: Column): Column = new Column(GeoWithin(column.expr, lit(other).expr)) 41 | 42 | def geoWithin(shape: Shape): Column = this.geoWithin(typedLit(shape, new ObjectType(classOf[Shape]))) 43 | 44 | def typedLit(value: Any, dataType: DataType): Column = { 45 | value match { 46 | case c: Column => return c 47 | case s: Symbol => return new ColumnName(value.asInstanceOf[Symbol].name) 48 | case _ => // continue 49 | } 50 | 51 | val literalExpr = Literal.create(value, dataType) 52 | Column(literalExpr) 53 | } 54 | } 55 | 56 | 57 | } 58 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/expression/GeoContains.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.expression 18 | 19 | import org.apache.lucene.spatial.query.SpatialOperation 20 | import org.apache.spark.sql.catalyst.expressions._ 21 | 22 | /** 23 | * @author Leonid_Poliakov 24 | */ 25 | case class GeoContains(left: Expression, right: Expression) extends GeoExpression(left, right) { 26 | 27 | override val operation: SpatialOperation = SpatialOperation.Contains 28 | 29 | override val operationName: String = "Contains" 30 | 31 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/expression/GeoExpression.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.expression 18 | 19 | import org.apache.lucene.spatial.query.SpatialOperation 20 | import org.apache.spark.sql.catalyst.expressions._ 21 | import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} 22 | import org.apache.spark.sql.insightedge.udt.GeoUtils 23 | import org.locationtech.spatial4j.context.SpatialContext 24 | import org.locationtech.spatial4j.shape.{Shape => SpatialShape} 25 | 26 | /** 27 | * @author Leonid_Poliakov 28 | */ 29 | abstract class GeoExpression(left: Expression, right: Expression) extends BinaryExpression with Predicate with Serializable { 30 | 31 | val operation: SpatialOperation 32 | 33 | val operationName: String 34 | 35 | override def nullSafeEval(first: Any, second: Any): Any = { 36 | val spatialContext = GeoUtils.defaultContext 37 | val firstShape = GeoUtils.unpackSpatialShape(first, spatialContext) 38 | val secondShape = GeoUtils.unpackSpatialShape(second, spatialContext) 39 | 40 | operation.evaluate(firstShape, secondShape) 41 | } 42 | 43 | override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { 44 | val utilClass = GeoUtils.getClass.getName 45 | val contextClass = classOf[SpatialContext].getName 46 | val shapeClass = classOf[SpatialShape].getName 47 | val operationClass = classOf[SpatialOperation].getName 48 | 49 | val context = ctx.freshName("context") 50 | val firstShape = ctx.freshName("firstShape") 51 | val secondShape = ctx.freshName("secondShape") 52 | 53 | val utils = utilClass + ".MODULE$" 54 | 55 | nullSafeCodeGen(ctx, ev, (firstVar, secondVar) => 56 | s""" 57 | $contextClass $context = $utils.defaultContext(); 58 | $shapeClass $firstShape = $utils.unpackSpatialShape($firstVar, $context); 59 | $shapeClass $secondShape = $utils.unpackSpatialShape($secondVar, $context); 60 | 61 | ${ev.value} = $operationClass.$operationName.evaluate($firstShape, $secondShape); 62 | """) 63 | } 64 | 65 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/expression/GeoIntersects.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.expression 18 | 19 | import org.apache.lucene.spatial.query.SpatialOperation 20 | import org.apache.spark.sql.catalyst.expressions._ 21 | 22 | /** 23 | * @author Leonid_Poliakov 24 | */ 25 | case class GeoIntersects(left: Expression, right: Expression) extends GeoExpression(left, right) { 26 | 27 | override val operation: SpatialOperation = SpatialOperation.Intersects 28 | 29 | override val operationName: String = "Intersects" 30 | 31 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/expression/GeoWithin.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.expression 18 | 19 | import org.apache.lucene.spatial.query.SpatialOperation 20 | import org.apache.spark.sql.catalyst.expressions._ 21 | 22 | /** 23 | * @author Leonid_Poliakov 24 | */ 25 | case class GeoWithin(left: Expression, right: Expression) extends GeoExpression(left, right) { 26 | 27 | override val operation: SpatialOperation = SpatialOperation.IsWithin 28 | 29 | override val operationName: String = "IsWithin" 30 | 31 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/filter/GeoContains.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.filter 18 | 19 | import org.apache.spark.sql.sources.Filter 20 | import org.openspaces.spatial.shapes.Shape 21 | 22 | /** 23 | * @author Leonid_Poliakov 24 | */ 25 | case class GeoContains(attribute: String, value: Shape) extends Filter { 26 | override def references: Array[String] = Array(attribute) ++ findReferences(value) 27 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/filter/GeoIntersects.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.filter 18 | 19 | import org.apache.spark.sql.sources.Filter 20 | import org.openspaces.spatial.shapes.Shape 21 | 22 | /** 23 | * @author Leonid_Poliakov 24 | */ 25 | case class GeoIntersects(attribute: String, value: Shape) extends Filter { 26 | override def references: Array[String] = Array(attribute) ++ findReferences(value) 27 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/filter/GeoWithin.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.filter 18 | 19 | import org.apache.spark.sql.sources.Filter 20 | import org.openspaces.spatial.shapes.Shape 21 | 22 | /** 23 | * @author Leonid_Poliakov 24 | */ 25 | case class GeoWithin(attribute: String, value: Shape) extends Filter { 26 | override def references: Array[String] = Array(attribute) ++ findReferences(value) 27 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/relation/InsightEdgeClassRelation.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.relation 18 | 19 | import org.apache.spark.rdd.RDD 20 | import org.apache.spark.sql._ 21 | import org.apache.spark.sql.insightedge.InsightEdgeSourceOptions 22 | import org.apache.spark.sql.types._ 23 | import org.insightedge.spark.rdd.InsightEdgeSqlRDD 24 | 25 | import scala.reflect.ClassTag 26 | 27 | private[insightedge] case class InsightEdgeClassRelation( 28 | context: SQLContext, 29 | clazz: ClassTag[AnyRef], 30 | options: InsightEdgeSourceOptions 31 | ) 32 | extends InsightEdgeAbstractRelation(context, options) with Serializable { 33 | 34 | override lazy val inferredSchema: StructType = { 35 | val schema = SchemaInference.schemaFor(clazz.runtimeClass) 36 | schema.dataType.asInstanceOf[StructType] 37 | } 38 | 39 | override def insert(data: DataFrame, overwrite: Boolean): Unit = throw new UnsupportedOperationException("saving classes is unsupported") 40 | 41 | override def insert(data: DataFrame, mode: SaveMode): Unit = throw new UnsupportedOperationException("saving classes is unsupported") 42 | 43 | override def buildScan(query: String, params: Seq[Any], fields: Seq[String]): RDD[Row] = { 44 | val clazzName = clazz.runtimeClass.getName 45 | 46 | val rdd = new InsightEdgeSqlRDD(ieConfig, sc, query, params, fields, options.splitCount, options.readBufferSize)(clazz) 47 | 48 | rdd.mapPartitions { data => InsightEdgeAbstractRelation.beansToRows(data, clazzName, schema, fields) } 49 | } 50 | 51 | } 52 | 53 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/udt/CircleUDT.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.udt 18 | 19 | import org.apache.spark.sql.catalyst.util.ArrayData 20 | import org.apache.spark.sql.types._ 21 | import org.openspaces.spatial.shapes.Circle 22 | 23 | class CircleUDT extends UserDefinedType[Circle] with Serializable { 24 | 25 | override def sqlType: DataType = ArrayType(DoubleType, containsNull = false) 26 | 27 | override def userClass: Class[Circle] = classOf[Circle] 28 | 29 | override def serialize(obj: Circle): ArrayData = GeoUtils.pack(obj) 30 | 31 | override def deserialize(datum: Any): Circle = GeoUtils.unpackXapCircle(datum.asInstanceOf[ArrayData]) 32 | 33 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/udt/GeoUDTRegistration.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.insightedge.udt 2 | 3 | import org.apache.spark.sql.types.UDTRegistration 4 | 5 | /** 6 | * Registers Geo Shape UDTs 7 | * 8 | * @author Oleksiy_Dyagilev 9 | */ 10 | object GeoUDTRegistration { 11 | 12 | /** 13 | * register Geo Shape UDTs if not already registered 14 | */ 15 | def registerIfNotAlready(): Unit = { 16 | registerIfNotAlready("org.openspaces.spatial.shapes.Circle", "org.apache.spark.sql.insightedge.udt.CircleUDT") 17 | registerIfNotAlready("org.openspaces.spatial.shapes.Point", "org.apache.spark.sql.insightedge.udt.PointUDT") 18 | registerIfNotAlready("org.openspaces.spatial.shapes.Polygon", "org.apache.spark.sql.insightedge.udt.PolygonUDT") 19 | registerIfNotAlready("org.openspaces.spatial.shapes.Rectangle", "org.apache.spark.sql.insightedge.udt.RectangleUDT") 20 | } 21 | 22 | private def registerIfNotAlready(userClass: String, udtClass: String) = { 23 | if (!UDTRegistration.exists(userClass)) { 24 | UDTRegistration.register(userClass, udtClass) 25 | } 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/udt/LineStringUDT.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.udt 18 | 19 | import org.apache.spark.sql.catalyst.util.ArrayData 20 | import org.apache.spark.sql.types._ 21 | import org.openspaces.spatial.shapes.LineString 22 | 23 | class LineStringUDT extends UserDefinedType[LineString] with Serializable { 24 | 25 | override def sqlType: DataType = ArrayType(DoubleType, containsNull = false) 26 | 27 | override def userClass: Class[LineString] = classOf[LineString] 28 | 29 | override def serialize(obj: LineString): ArrayData = GeoUtils.pack(obj) 30 | 31 | override def deserialize(datum: Any): LineString = GeoUtils.unpackXapLineString(datum.asInstanceOf[ArrayData]) 32 | 33 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/udt/PointUDT.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.udt 18 | 19 | import org.apache.spark.sql.catalyst.util.ArrayData 20 | import org.apache.spark.sql.types._ 21 | import org.openspaces.spatial.shapes.Point 22 | 23 | class PointUDT extends UserDefinedType[Point] with Serializable { 24 | 25 | override def sqlType: DataType = ArrayType(DoubleType, containsNull = false) 26 | 27 | override def userClass: Class[Point] = classOf[Point] 28 | 29 | override def serialize(obj: Point): ArrayData = GeoUtils.pack(obj) 30 | 31 | override def deserialize(datum: Any): Point = GeoUtils.unpackXapPoint(datum.asInstanceOf[ArrayData]) 32 | 33 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/udt/PolygonUDT.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.udt 18 | 19 | import org.apache.spark.sql.catalyst.util.ArrayData 20 | import org.apache.spark.sql.types._ 21 | import org.openspaces.spatial.shapes.Polygon 22 | 23 | class PolygonUDT extends UserDefinedType[Polygon] with Serializable { 24 | 25 | override def sqlType: DataType = ArrayType(DoubleType, containsNull = false) 26 | 27 | override def userClass: Class[Polygon] = classOf[Polygon] 28 | 29 | override def serialize(obj: Polygon): ArrayData = GeoUtils.pack(obj) 30 | 31 | override def deserialize(datum: Any): Polygon = GeoUtils.unpackXapPolygon(datum.asInstanceOf[ArrayData]) 32 | 33 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/apache/spark/sql/insightedge/udt/RectangleUDT.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.udt 18 | 19 | import org.apache.spark.sql.catalyst.util.ArrayData 20 | import org.apache.spark.sql.types._ 21 | import org.openspaces.spatial.shapes.Rectangle 22 | 23 | class RectangleUDT extends UserDefinedType[Rectangle] with Serializable { 24 | 25 | override def sqlType: DataType = ArrayType(DoubleType, containsNull = false) 26 | 27 | override def userClass: Class[Rectangle] = classOf[Rectangle] 28 | 29 | override def serialize(obj: Rectangle): ArrayData = GeoUtils.pack(obj) 30 | 31 | override def deserialize(datum: Any): Rectangle = GeoUtils.unpackXapRectangle(datum.asInstanceOf[ArrayData]) 32 | 33 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/scala/annotation.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.scala 18 | 19 | object annotation { 20 | 21 | import com.gigaspaces.annotation.pojo 22 | import org.openspaces.spatial 23 | 24 | import scala.annotation.meta.beanGetter 25 | 26 | type SpaceClass = pojo.SpaceClass 27 | type SpaceClassConstructor = pojo.SpaceClassConstructor 28 | 29 | // Enhance space annotations with @beanGetter property 30 | type SpaceDynamicProperties = pojo.SpaceDynamicProperties@beanGetter 31 | type SpaceExclude = pojo.SpaceExclude@beanGetter 32 | type SpaceFifoGroupingIndex = pojo.SpaceFifoGroupingIndex@beanGetter 33 | type SpaceFifoGroupingProperty = pojo.SpaceFifoGroupingProperty@beanGetter 34 | type SpaceId = pojo.SpaceId@beanGetter 35 | type SpaceIndex = pojo.SpaceIndex@beanGetter 36 | type SpaceIndexes = pojo.SpaceIndexes@beanGetter 37 | type SpaceLeaseExpiration = pojo.SpaceLeaseExpiration@beanGetter 38 | type SpacePersist = pojo.SpacePersist@beanGetter 39 | type SpaceProperty = pojo.SpaceProperty@beanGetter 40 | type SpaceRouting = pojo.SpaceRouting@beanGetter 41 | type SpaceStorageType = pojo.SpaceStorageType@beanGetter 42 | type SpaceVersion = pojo.SpaceVersion@beanGetter 43 | type SpaceSpatialIndex = spatial.SpaceSpatialIndex@beanGetter 44 | type SpaceSpatialIndexes = spatial.SpaceSpatialIndexes@beanGetter 45 | 46 | } 47 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/impl/InsightEdgePartition.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.impl 18 | 19 | import org.apache.spark.Partition 20 | 21 | private[spark] case class InsightEdgePartition( 22 | id: Int, 23 | hostName: String, 24 | gridContainerName: String, 25 | bucketRangeBottom: Option[Int] = None, 26 | bucketRangeTop: Option[Int] = None 27 | ) extends Partition { 28 | override def index: Int = id 29 | 30 | /** 31 | * override equals and hashcode since it's already overridden in Partition and case class doesn't regenerate own implementation 32 | */ 33 | override def equals(o: Any): Boolean = o match { 34 | case that: InsightEdgePartition => 35 | that.id == id && 36 | that.hostName == hostName && 37 | that.gridContainerName == gridContainerName && 38 | that.bucketRangeBottom == bucketRangeBottom && 39 | that.bucketRangeTop == bucketRangeTop 40 | case _ => false 41 | } 42 | 43 | override def hashCode(): Int = id 44 | } 45 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/impl/InsightEdgeQueryIterator.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.impl 18 | 19 | import com.gigaspaces.client.iterator.SpaceIterator 20 | 21 | private[spark] class InsightEdgeQueryIterator[T](cur: SpaceIterator[T]) extends Iterator[T] { 22 | 23 | override def hasNext: Boolean = cur.hasNext 24 | 25 | override def next(): T = cur.next() 26 | 27 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/impl/ProfilingIterator.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.impl 18 | 19 | import org.insightedge.spark.utils.Logging 20 | 21 | private[spark] class ProfilingIterator[T](delegate: Iterator[T]) extends Iterator[T] with Logging { 22 | private var time: Long = 0 23 | private var finished: Boolean = false 24 | private var _count = 0 25 | 26 | def count() = _count 27 | 28 | 29 | override def hasNext: Boolean = { 30 | val start = System.nanoTime() 31 | val result = delegate.hasNext 32 | time += System.nanoTime() - start 33 | 34 | if (!result) { 35 | if (finished) { 36 | logInfo("iterator hasNext called after finished") 37 | } else { 38 | finished = true 39 | val shortTime = (BigDecimal(time) / 1000000000).setScale(5, BigDecimal.RoundingMode.HALF_UP) 40 | logInfo("iterator accumulated " + shortTime + " seconds") 41 | } 42 | } 43 | 44 | result 45 | } 46 | 47 | override def next(): T = { 48 | val start = System.nanoTime() 49 | val result = delegate.next() 50 | time += System.nanoTime() - start 51 | _count +=1 52 | result 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/ml/MLImplicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.ml 18 | 19 | import org.apache.spark.ml.util.MLWritable 20 | import org.apache.spark.mllib.util.Saveable 21 | import org.insightedge.spark.context.InsightEdgeSparkContext 22 | import org.insightedge.spark.mllib.MLInstance 23 | 24 | trait MLImplicits { 25 | 26 | implicit class SaveToGridExtension(model: MLWritable) { 27 | 28 | /** 29 | * Save ML instance to the grid. Limited to non-distributed models, i.e. those that can be serialized with Java serialization mechanism. 30 | * 31 | * @param sc spark context 32 | * @param name unique name of the ML instance 33 | */ 34 | def saveToGrid(sc: InsightEdgeSparkContext, name: String): Unit = { 35 | sc.grid.write(MLInstance(name, model)) 36 | } 37 | } 38 | 39 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/mllib/MLInstance.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.mllib 18 | 19 | import com.gigaspaces.metadata.StorageType 20 | import org.insightedge.scala.annotation 21 | import annotation._ 22 | 23 | import scala.beans.BeanProperty 24 | 25 | /** 26 | * A holder for MLlib and ML instances (models, pipelines, etc) 27 | */ 28 | case class MLInstance( 29 | @BeanProperty 30 | @SpaceId 31 | var id: String, 32 | 33 | @BeanProperty 34 | @SpaceStorageType(storageType = StorageType.BINARY) 35 | var instance: AnyRef 36 | ) { 37 | def this() = this(null, null) 38 | 39 | } 40 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/mllib/MLlibImplicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.mllib 18 | 19 | import org.apache.spark.mllib.util.Saveable 20 | import org.insightedge.spark.context.InsightEdgeSparkContext 21 | 22 | trait MLlibImplicits { 23 | 24 | implicit class SaveToGridExtension(model: Saveable) { 25 | /** 26 | * Save ML instance to the grid. Limited to non-distributed models, i.e. those that can be serialized with Java serialization mechanism. 27 | * 28 | * @param sc spark context 29 | * @param name unique name of the ML instance 30 | */ 31 | def saveToGrid(sc: InsightEdgeSparkContext, name: String): Unit = { 32 | sc.grid.write(MLInstance(name, model)) 33 | } 34 | } 35 | 36 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/model/BucketedGridModel.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.model 18 | 19 | import com.gigaspaces.metadata.index.SpaceIndexType 20 | import org.insightedge.scala.annotation 21 | import annotation._ 22 | 23 | import scala.annotation.meta.{beanGetter, getter} 24 | import scala.beans.BeanProperty 25 | 26 | /** 27 | * Trait used to define bucketed space classes. Bucketing allows to have more Spark partitions than Data Grid partitions, i.e. 28 | * splitting Data Grid partitions into several buckets and assigning a bucket per Spark partition. 29 | * 30 | * @author Leonid_Poliakov 31 | */ 32 | 33 | trait BucketedGridModel { 34 | 35 | // TODO: check if index is applied 36 | // compilation warning is due to https://issues.scala-lang.org/browse/SI-8813 37 | @BeanProperty 38 | @SpaceIndex(`type` = SpaceIndexType.EXTENDED) 39 | var metaBucketId: Integer = _ 40 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/rdd/InsightEdgeDocumentRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd 18 | 19 | import com.gigaspaces.document.SpaceDocument 20 | import org.apache.spark.annotation.DeveloperApi 21 | import org.apache.spark.{Partition, SparkContext, TaskContext} 22 | import org.insightedge.spark.context.InsightEdgeConfig 23 | 24 | class InsightEdgeDocumentRDD( 25 | ieConfig: InsightEdgeConfig, 26 | sc: SparkContext, 27 | typeName: String, 28 | query: String, 29 | queryParams: Seq[Any], 30 | queryFields: Seq[String], 31 | readRddBufferSize: Int 32 | ) extends InsightEdgeAbstractRDD[SpaceDocument](ieConfig, sc, None, readRddBufferSize) { 33 | 34 | @DeveloperApi 35 | override def compute(split: Partition, context: TaskContext): Iterator[SpaceDocument] = { 36 | val gsQuery = createDocumentInsightEdgeQuery(typeName, split, query, queryParams, queryFields) 37 | computeInternal[SpaceDocument](split, gsQuery, context) 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/rdd/InsightEdgeRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd 18 | 19 | import org.insightedge.spark.context.InsightEdgeConfig 20 | import org.apache.spark.annotation.DeveloperApi 21 | import org.apache.spark.{Partition, SparkContext, TaskContext} 22 | 23 | import scala.reflect.ClassTag 24 | 25 | class InsightEdgeRDD[R: ClassTag]( 26 | ieConfig: InsightEdgeConfig, 27 | sc: SparkContext, 28 | splitCount: Option[Int], 29 | readRddBufferSize: Int 30 | ) extends InsightEdgeAbstractRDD[R](ieConfig, sc, splitCount, readRddBufferSize) { 31 | 32 | @DeveloperApi 33 | override def compute(partition: Partition, context: TaskContext): Iterator[R] = { 34 | val sqlQuery = if (supportsBuckets()) bucketQuery(partition) else "" 35 | val gsQuery = createInsightEdgeQuery[R](sqlQuery, partition) 36 | computeInternal[R](partition, gsQuery, context) 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/rdd/InsightEdgeSqlRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd 18 | 19 | import org.apache.spark.annotation.DeveloperApi 20 | import org.apache.spark.{Partition, SparkContext, TaskContext} 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | 23 | import scala.reflect.ClassTag 24 | 25 | class InsightEdgeSqlRDD[R: ClassTag]( 26 | ieConfig: InsightEdgeConfig, 27 | sc: SparkContext, 28 | query: String, 29 | queryParams: Seq[Any], 30 | queryFields: Seq[String], 31 | splitCount: Option[Int], 32 | readRddBufferSize: Int 33 | ) extends InsightEdgeAbstractRDD[R](ieConfig, sc, splitCount, readRddBufferSize) { 34 | 35 | @DeveloperApi 36 | override def compute(partition: Partition, context: TaskContext): Iterator[R] = { 37 | val sqlQuery = if (supportsBuckets()) bucketize(query, partition) else query 38 | val gsQuery = createInsightEdgeQuery[R](sqlQuery, partition, queryParams, queryFields) 39 | computeInternal[R](partition, gsQuery, context) 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/streaming/SaveDStreamToGridExtension.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.streaming 18 | 19 | import org.apache.spark.streaming.dstream.DStream 20 | import org.insightedge.spark.context.InsightEdgeConfig 21 | import org.insightedge.spark.utils.GridProxyFactory 22 | import org.insightedge.spark.implicits.basic._ 23 | import scala.annotation.meta.param 24 | import scala.reflect.ClassTag 25 | 26 | /** 27 | * Extra functions available on DStream through an implicit conversion. 28 | * 29 | * @author Oleksiy_Dyagilev 30 | */ 31 | // compilation warning is due to https://issues.scala-lang.org/browse/SI-8813 32 | class SaveDStreamToGridExtension[T: ClassTag](@transient dStream: DStream[T]) extends Serializable { 33 | 34 | /** 35 | * Saves DStream to Data Grid 36 | * 37 | * @param writeBatchSize batch size for grid write operations 38 | */ 39 | def saveToGrid(writeBatchSize: Int = 1000) = { 40 | val ieConfig = dStream.context.sparkContext.ieConfig 41 | 42 | dStream.foreachRDD { rdd => 43 | rdd.foreachPartition { partitionOfRecords => 44 | val gridProxy = GridProxyFactory.getOrCreateClustered(ieConfig) 45 | val batches = partitionOfRecords.grouped(writeBatchSize) 46 | 47 | batches.foreach { batch => 48 | val arr = batch.asInstanceOf[Iterable[Object]].toArray 49 | gridProxy.writeMultiple(arr) 50 | } 51 | } 52 | } 53 | } 54 | 55 | 56 | } 57 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/streaming/StreamingImplicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.streaming 18 | 19 | import org.apache.spark.streaming.dstream.DStream 20 | 21 | import scala.reflect.ClassTag 22 | 23 | /** 24 | * Enables InsightEdge Streaming API 25 | * 26 | * @author Oleksiy_Dyagilev 27 | */ 28 | trait StreamingImplicits { 29 | 30 | implicit def saveDStreamToGridExtension[T: ClassTag](dStream: DStream[T]): SaveDStreamToGridExtension[T] = { 31 | new SaveDStreamToGridExtension(dStream) 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/utils/BucketIdSeq.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | /** 20 | * Bucket id sequence. Generates uniform distribution of bucket ids. 21 | * 22 | * The sequence is not thread safe 23 | * 24 | * @author Oleksiy_Dyagilev 25 | */ 26 | class BucketIdSeq extends Serializable { 27 | 28 | private var bucketId = -1 29 | 30 | def next(): Int = { 31 | bucketId += 1 32 | if (bucketId >= InsightEdgeConstants.BucketsCount) { 33 | bucketId = 0 34 | } 35 | bucketId 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/utils/GridProxyFactory.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import org.insightedge.spark.context.InsightEdgeConfig 20 | import org.openspaces.core.{GigaSpace, GigaSpaceConfigurer} 21 | import org.openspaces.core.space.SpaceProxyConfigurer 22 | 23 | /** 24 | * Ensures single GigaSpaces instance per JVM (Spark worker) 25 | * 26 | * @author Oleksiy_Dyagilev 27 | */ 28 | object GridProxyFactory extends Logging { 29 | 30 | System.setProperty("com.gs.protectiveMode.ambiguousQueryRoutingUsage", "false") 31 | 32 | private val clusterProxyCache = new LocalCache[InsightEdgeConfig, GigaSpace]() 33 | 34 | def getOrCreateClustered(ieConfig: InsightEdgeConfig): GigaSpace = { 35 | clusterProxyCache.getOrElseUpdate(ieConfig, createSpaceProxy(ieConfig)) 36 | } 37 | 38 | def clusteredCacheSize(): Int = clusterProxyCache.size() 39 | 40 | private def createSpaceProxy(ieConfig: InsightEdgeConfig) : GigaSpace = { 41 | profileWithInfo("createSpaceProxy") { 42 | val spaceConfigurer = new SpaceProxyConfigurer(ieConfig.spaceName) 43 | ieConfig.lookupGroups.foreach(spaceConfigurer.lookupGroups) 44 | ieConfig.lookupLocators.foreach(spaceConfigurer.lookupLocators) 45 | new GigaSpaceConfigurer(spaceConfigurer).create() 46 | } 47 | } 48 | 49 | private def profileWithInfo[T](message: String)(block: => T): T = Profiler.profile(message)(logInfo(_))(block) 50 | } -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/utils/InsightEdgeConstants.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | /** 20 | * @author Oleksiy_Dyagilev 21 | */ 22 | object InsightEdgeConstants { 23 | 24 | val BucketsCount = 128 25 | val DefaultSplitCount = 4 26 | val DefaultReadBufferSize = 1000 27 | val DefaultDriverWriteBatchSize = 1000 28 | 29 | } 30 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/utils/LocalCache.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import java.util.concurrent.ConcurrentHashMap 20 | 21 | import scala.collection.convert.decorateAsScala._ 22 | 23 | /** 24 | * @author Oleksiy_Dyagilev 25 | */ 26 | private[spark] class LocalCache[K, V] { 27 | 28 | protected[utils] val map = new ConcurrentHashMap[K, V]().asScala 29 | 30 | def getOrElseUpdate(k: K, op: => V): V = { 31 | map.getOrElse(k, updateIfRequired(k, op)) 32 | } 33 | 34 | def get(k: K): Option[V] = { 35 | map.get(k) 36 | } 37 | 38 | private def updateIfRequired(k: K, op: => V): V = { 39 | this.synchronized { 40 | map.get(k) match { 41 | case Some(v) => v 42 | case None => 43 | val v = op 44 | map.put(k, v) 45 | v 46 | } 47 | } 48 | } 49 | 50 | def size(): Int = map.size 51 | 52 | } 53 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/utils/Logging.scala: -------------------------------------------------------------------------------- 1 | package org.insightedge.spark.utils 2 | 3 | import org.slf4j.{Logger, LoggerFactory} 4 | 5 | /** 6 | * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows 7 | * logging messages at different levels using methods that only evaluate parameters lazily if the 8 | * log level is enabled. 9 | * 10 | * 11 | * This is a copy of org.apache.spark.Logging that is Spark private since 2.0. 12 | * 13 | */ 14 | trait Logging { 15 | // Make the log field transient so that objects with Logging can 16 | // be serialized and used on another machine 17 | @transient private var _log: Logger = _ 18 | 19 | // Method to get the logger name for this object 20 | protected def logName = { 21 | // Ignore trailing $'s in the class names for Scala objects 22 | this.getClass.getName.stripSuffix("$") 23 | } 24 | 25 | // Method to get or create the logger for this object 26 | protected def log: Logger = { 27 | if (_log == null) { 28 | _log = LoggerFactory.getLogger(logName) 29 | } 30 | _log 31 | } 32 | 33 | // Log methods that take only a String 34 | protected def logInfo(msg: => String) { 35 | if (log.isInfoEnabled) log.info(msg) 36 | } 37 | 38 | protected def logDebug(msg: => String) { 39 | if (log.isDebugEnabled) log.debug(msg) 40 | } 41 | 42 | protected def logTrace(msg: => String) { 43 | if (log.isTraceEnabled) log.trace(msg) 44 | } 45 | 46 | protected def logWarning(msg: => String) { 47 | if (log.isWarnEnabled) log.warn(msg) 48 | } 49 | 50 | protected def logError(msg: => String) { 51 | if (log.isErrorEnabled) log.error(msg) 52 | } 53 | 54 | // Log methods that take Throwables (Exceptions/Errors) too 55 | protected def logInfo(msg: => String, throwable: Throwable) { 56 | if (log.isInfoEnabled) log.info(msg, throwable) 57 | } 58 | 59 | protected def logDebug(msg: => String, throwable: Throwable) { 60 | if (log.isDebugEnabled) log.debug(msg, throwable) 61 | } 62 | 63 | protected def logTrace(msg: => String, throwable: Throwable) { 64 | if (log.isTraceEnabled) log.trace(msg, throwable) 65 | } 66 | 67 | protected def logWarning(msg: => String, throwable: Throwable) { 68 | if (log.isWarnEnabled) log.warn(msg, throwable) 69 | } 70 | 71 | protected def logError(msg: => String, throwable: Throwable) { 72 | if (log.isErrorEnabled) log.error(msg, throwable) 73 | } 74 | 75 | protected def isTraceEnabled(): Boolean = { 76 | log.isTraceEnabled 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/utils/LookupPartitionTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils; 18 | 19 | import com.gigaspaces.async.AsyncResult; 20 | 21 | import org.openspaces.core.GigaSpace; 22 | import org.openspaces.core.executor.DistributedTask; 23 | import org.openspaces.core.executor.TaskGigaSpace; 24 | 25 | import java.net.InetAddress; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | 29 | /** 30 | * Returns a list of [hostName, containerName, partitionId] 31 | * 32 | * @author Oleksiy_Dyagilev 33 | */ 34 | public class LookupPartitionTask implements DistributedTask, List>> { 35 | 36 | @TaskGigaSpace 37 | private transient GigaSpace gridProxy; 38 | 39 | public ArrayList execute() throws Exception { 40 | String hostName = InetAddress.getLocalHost().getHostAddress(); 41 | String containerName = gridProxy.getSpace().getContainerName(); 42 | 43 | ArrayList res = new ArrayList(); 44 | res.add(hostName); 45 | res.add(containerName); 46 | return res; 47 | } 48 | 49 | 50 | public List> reduce(List>> mapAsyncResults) throws Exception { 51 | List> reduceResult = new ArrayList>(); 52 | 53 | int id = 0; 54 | for (AsyncResult> mapAsyncResult : mapAsyncResults) { 55 | if (mapAsyncResult.getException() != null) { 56 | throw mapAsyncResult.getException(); 57 | } 58 | 59 | ArrayList mapResult = mapAsyncResult.getResult(); 60 | mapResult.add(String.valueOf(id)); 61 | id++; 62 | reduceResult.add(mapResult); 63 | } 64 | 65 | return reduceResult; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /insightedge-core/src/main/scala/org/insightedge/spark/utils/Profiler.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | object Profiler { 20 | 21 | 22 | /** 23 | * Profiles your code work time. 24 | */ 25 | def profile[R](message: String)(logFunction: String => Unit)(block: => R): R = { 26 | val start = System.nanoTime() 27 | val result = block 28 | val stop = System.nanoTime() 29 | val time = (BigDecimal(stop - start) / 1000000000).setScale(5, BigDecimal.RoundingMode.HALF_UP) 30 | logFunction(s"$message took " + time + " seconds") 31 | result 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /insightedge-core/src/test/java/org/apache/spark/sql/insightedge/JAddress.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge; 18 | 19 | import java.io.Serializable; 20 | 21 | public class JAddress implements Serializable { 22 | private String city; 23 | private String state; 24 | 25 | public JAddress() { 26 | } 27 | 28 | public JAddress(String city, String state) { 29 | this.city = city; 30 | this.state = state; 31 | } 32 | 33 | public String getCity() { 34 | return city; 35 | } 36 | 37 | public void setCity(String city) { 38 | this.city = city; 39 | } 40 | 41 | public String getState() { 42 | return state; 43 | } 44 | 45 | public void setState(String state) { 46 | this.state = state; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /insightedge-core/src/test/java/org/apache/spark/sql/insightedge/JPerson.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge; 18 | 19 | import com.gigaspaces.annotation.pojo.SpaceId; 20 | 21 | /** 22 | * @author Oleksiy_Dyagilev 23 | */ 24 | public class JPerson { 25 | 26 | private String id; 27 | private String name; 28 | private Integer age; 29 | private JAddress address; 30 | 31 | public JPerson() { 32 | } 33 | 34 | public JPerson(String id, String name, Integer age, JAddress address) { 35 | this.id = id; 36 | this.name = name; 37 | this.age = age; 38 | this.address = address; 39 | } 40 | 41 | @SpaceId(autoGenerate = true) 42 | public String getId() { 43 | return id; 44 | } 45 | 46 | public void setId(String id) { 47 | this.id = id; 48 | } 49 | 50 | public String getName() { 51 | return name; 52 | } 53 | 54 | public void setName(String name) { 55 | this.name = name; 56 | } 57 | 58 | public Integer getAge() { 59 | return age; 60 | } 61 | 62 | public void setAge(Integer age) { 63 | this.age = age; 64 | } 65 | 66 | public JAddress getAddress() { 67 | return address; 68 | } 69 | 70 | public void setAddress(JAddress address) { 71 | this.address = address; 72 | } 73 | } 74 | 75 | -------------------------------------------------------------------------------- /insightedge-core/src/test/java/org/apache/spark/sql/insightedge/JSpatialData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge; 18 | 19 | import com.gigaspaces.annotation.pojo.SpaceClass; 20 | import com.gigaspaces.annotation.pojo.SpaceId; 21 | import com.gigaspaces.annotation.pojo.SpaceRouting; 22 | import org.openspaces.spatial.shapes.Point; 23 | 24 | /** 25 | * Space class for tests 26 | */ 27 | @SpaceClass 28 | public class JSpatialData { 29 | private String id; 30 | private Long routing; 31 | private Point point; 32 | 33 | public JSpatialData() { 34 | } 35 | 36 | public JSpatialData(Long routing, Point point) { 37 | this.routing = routing; 38 | this.point = point; 39 | } 40 | 41 | @SpaceId(autoGenerate = true) 42 | public String getId() { 43 | return id; 44 | } 45 | 46 | public void setId(String id) { 47 | this.id = id; 48 | } 49 | 50 | @SpaceRouting 51 | public Long getRouting() { 52 | return routing; 53 | } 54 | 55 | public void setRouting(Long routing) { 56 | this.routing = routing; 57 | } 58 | 59 | public Point getPoint() { 60 | return point; 61 | } 62 | 63 | public void setPoint(Point point) { 64 | this.point = point; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /insightedge-core/src/test/java/org/insightedge/spark/rdd/JBucketedData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd; 18 | 19 | import com.gigaspaces.annotation.pojo.SpaceClass; 20 | import com.gigaspaces.annotation.pojo.SpaceId; 21 | import com.gigaspaces.annotation.pojo.SpaceRouting; 22 | import org.insightedge.spark.japi.JBucketedGridModel; 23 | 24 | /** 25 | * Space class for the tests that work with Java POJOs. Mirror of Data.scala 26 | * 27 | * @author Oleksiy_Dyagilev 28 | */ 29 | @SpaceClass 30 | public class JBucketedData extends JBucketedGridModel { 31 | 32 | private String id; 33 | private Long routing; 34 | private String data; 35 | private Boolean flag; 36 | 37 | public JBucketedData() { 38 | } 39 | 40 | public JBucketedData(Long routing, String data) { 41 | this.routing = routing; 42 | this.data = data; 43 | } 44 | 45 | @SpaceId(autoGenerate = true) 46 | public String getId() { 47 | return id; 48 | } 49 | 50 | public void setId(String id) { 51 | this.id = id; 52 | } 53 | 54 | @SpaceRouting 55 | public Long getRouting() { 56 | return routing; 57 | } 58 | 59 | public void setRouting(Long routing) { 60 | this.routing = routing; 61 | } 62 | 63 | public String getData() { 64 | return data; 65 | } 66 | 67 | public void setData(String data) { 68 | this.data = data; 69 | } 70 | 71 | public Boolean getFlag() { 72 | return flag; 73 | } 74 | 75 | public void setFlag(Boolean flag) { 76 | this.flag = flag; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /insightedge-core/src/test/java/org/insightedge/spark/rdd/JData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd; 18 | 19 | import com.gigaspaces.annotation.pojo.SpaceClass; 20 | import com.gigaspaces.annotation.pojo.SpaceId; 21 | import com.gigaspaces.annotation.pojo.SpaceRouting; 22 | 23 | /** 24 | * Space class for the tests that work with Java POJOs. Mirror of Data.scala 25 | * 26 | * @author Oleksiy_Dyagilev 27 | */ 28 | @SpaceClass 29 | public class JData { 30 | 31 | private String id; 32 | private Long routing; 33 | private String data; 34 | private Boolean flag; 35 | 36 | public JData() { 37 | } 38 | 39 | public JData(Long routing, String data) { 40 | this.routing = routing; 41 | this.data = data; 42 | } 43 | 44 | @SpaceId(autoGenerate = true) 45 | public String getId() { 46 | return id; 47 | } 48 | 49 | public void setId(String id) { 50 | this.id = id; 51 | } 52 | 53 | @SpaceRouting 54 | public Long getRouting() { 55 | return routing; 56 | } 57 | 58 | public void setRouting(Long routing) { 59 | this.routing = routing; 60 | } 61 | 62 | public String getData() { 63 | return data; 64 | } 65 | 66 | public void setData(String data) { 67 | this.data = data; 68 | } 69 | 70 | public Boolean getFlag() { 71 | return flag; 72 | } 73 | 74 | public void setFlag(Boolean flag) { 75 | this.flag = flag; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /insightedge-core/src/test/resources/cluster-member-config.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /insightedge-core/src/test/resources/cluster-test-config.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/dataframe/DataFrameGetWithStringSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.dataframe 18 | 19 | import org.insightedge.spark.fixture.InsightEdge 20 | import org.insightedge.spark.implicits.all._ 21 | import org.insightedge.spark.rdd.Data 22 | import org.insightedge.spark.utils.ScalaSpaceClass 23 | import org.scalatest.fixture 24 | 25 | import scala.reflect.classTag 26 | 27 | class DataFrameGetWithStringSpec extends fixture.FlatSpec with InsightEdge { 28 | 29 | it should "Read data frame as String which was written as POJO with a valid struct Type " taggedAs ScalaSpaceClass in { ie => 30 | 31 | writeDataSeqToDataGrid(1000) 32 | val spark= ie.spark 33 | val df = spark.read.grid("org.insightedge.spark.rdd.Data") 34 | val fields = classTag[Data].runtimeClass.getDeclaredFields 35 | 36 | // Make sure all the original fields exist. 37 | assert(fields.size == df.schema.fields.length, "Dataframe should have exactly the number of fields as the class it represents") 38 | for (field <- fields) { 39 | assert(df.schema.fieldNames.contains(field.getName)) 40 | } 41 | } 42 | 43 | it should "Read data frame as Class which was written as POJO with a valid struct Type " taggedAs ScalaSpaceClass in { ie => 44 | 45 | writeDataSeqToDataGrid(1000) 46 | val spark= ie.spark 47 | val df = spark.read.grid[Data] 48 | val fields = classTag[Data].runtimeClass.getDeclaredFields 49 | 50 | // Make sure all the original fields exist. 51 | assert(fields.size == df.schema.fields.length, "Dataframe should have exactly the number of fields as the class it represents") 52 | for (field <- fields) { 53 | assert(df.schema.fieldNames.contains(field.getName)) 54 | } 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/model/Address.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.model 18 | 19 | /** 20 | * Embedded class for tests 21 | */ 22 | case class Address(city: String, state: String) -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/model/AllClassesSupport.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.model 18 | 19 | import java.sql.{Date, Timestamp} 20 | 21 | import org.insightedge.scala.annotation 22 | import annotation._ 23 | import org.apache.spark.sql.types.{DateType, MapType, TimestampType} 24 | import org.apache.spark.unsafe.types.CalendarInterval 25 | 26 | import scala.beans.BeanProperty 27 | 28 | /** 29 | * Space class for tests 30 | */ 31 | case class AllClassesSupport( 32 | 33 | @BeanProperty @SpaceId(autoGenerate = true) var id: String 34 | ,@BeanProperty @SpaceRouting var routing: Long 35 | ,@BeanProperty var decimal1: java.math.BigDecimal 36 | ,@BeanProperty var byte1: Array[Byte] 37 | ,@BeanProperty var timeStamp1: java.sql.Timestamp 38 | ,@BeanProperty var date1: java.sql.Date 39 | ,@BeanProperty var arrInt: Array[Int] 40 | ,@BeanProperty var list1: List[Int] 41 | ,@BeanProperty var list2: java.util.List[Integer] 42 | ,@BeanProperty var listString: java.util.List[String] 43 | ) { 44 | 45 | def this(routing: Long) = this(null, routing, null, null, null, null, null, null, null, null) 46 | 47 | def this() = this(-1) 48 | } -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/model/DummyPerson.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.model 18 | import org.apache.spark.sql.insightedge.JAddress 19 | 20 | /** 21 | * class for tests 22 | */ 23 | case class DummyPerson( 24 | var personId: String, 25 | 26 | var name: String, 27 | 28 | var surname: String, 29 | 30 | var age: Int, 31 | 32 | var address: Address 33 | 34 | // var jaddress: JAddress 35 | ) { 36 | 37 | def this() = this(null, null, null, -1, null ) 38 | 39 | } -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/model/NotGridModel.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.model 18 | 19 | case class NotGridModel() 20 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/model/Person.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.model 18 | 19 | import org.insightedge.scala.annotation 20 | import annotation._ 21 | 22 | import scala.beans.BeanProperty 23 | 24 | /** 25 | * Space class for tests 26 | */ 27 | case class Person( 28 | @BeanProperty 29 | @SpaceId(autoGenerate = true) 30 | var id: String, 31 | 32 | @BeanProperty 33 | var name: String, 34 | 35 | @BeanProperty 36 | var age: Int, 37 | 38 | @BeanProperty 39 | var address: Address 40 | ) { 41 | 42 | def this() = this(null, null, -1, null) 43 | 44 | } 45 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/model/SpatialData.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.model 18 | 19 | import org.insightedge.scala.annotation 20 | import annotation._ 21 | import org.openspaces.spatial.shapes.{Circle, Point, Rectangle} 22 | 23 | import scala.beans.BeanProperty 24 | 25 | /** 26 | * Space class for tests 27 | */ 28 | case class SpatialData( 29 | @BeanProperty 30 | @SpaceId(autoGenerate = true) 31 | var id: String, 32 | 33 | @BeanProperty 34 | @SpaceRouting 35 | var routing: Long, 36 | 37 | @BeanProperty 38 | @SpaceSpatialIndex 39 | var circle: Circle, 40 | 41 | @BeanProperty 42 | var rect: Rectangle, 43 | 44 | @BeanProperty 45 | var point: Point 46 | ) { 47 | 48 | def this(routing: Long) = this(null, routing, null, null, null) 49 | 50 | def this() = this(-1) 51 | 52 | } -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/apache/spark/sql/insightedge/model/SpatialEmbeddedData.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.insightedge.model 18 | 19 | import org.insightedge.scala.annotation 20 | import annotation._ 21 | import org.openspaces.spatial.shapes.Point 22 | 23 | import scala.beans.BeanProperty 24 | 25 | /** 26 | * Space class for tests 27 | */ 28 | case class SpatialEmbeddedData( 29 | @BeanProperty 30 | @SpaceId(autoGenerate = true) 31 | var id: String, 32 | 33 | @BeanProperty 34 | @SpaceSpatialIndex(path = "point") 35 | var location: Location 36 | ) { 37 | 38 | def this() = this(null, null) 39 | 40 | } 41 | 42 | case class Location(@BeanProperty point: Point) -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/insightedge/spark/fixture/InsightEdgeStreaming.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.fixture 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.streaming.{Seconds, StreamingContext} 21 | import org.insightedge.spark.implicits.basic._ 22 | import org.scalatest.concurrent.Eventually 23 | import org.scalatest.time.Span 24 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Suite, time} 25 | 26 | /** 27 | * @author Oleksiy_Dyagilev 28 | */ 29 | trait InsightEdgeStreaming extends InsightEdge with BeforeAndAfterAll with BeforeAndAfterEach { 30 | self: Suite => 31 | 32 | var ssc: StreamingContext = _ 33 | 34 | override protected def beforeEach() = { 35 | super.beforeEach() 36 | 37 | val sparkConf = new SparkConf() 38 | .setAppName("insightedge-streaming-test") 39 | .setMaster("local[2]") 40 | .setInsightEdgeConfig(ieConfig) 41 | 42 | ssc = new StreamingContext(sparkConf, Seconds(1)) 43 | } 44 | 45 | override protected def afterEach() = { 46 | ssc.stop() 47 | ssc.sparkContext.stopInsightEdgeContext() 48 | super.afterEach() 49 | } 50 | 51 | def timeout(sec: Int) = Eventually.PatienceConfig(Span(sec, time.Seconds)) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/insightedge/spark/rdd/BucketedData.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd 18 | 19 | import org.insightedge.scala.annotation 20 | import annotation._ 21 | import org.insightedge.spark.model.BucketedGridModel 22 | 23 | import scala.beans.{BeanProperty, BooleanBeanProperty} 24 | 25 | /** 26 | * Space class for tests 27 | */ 28 | case class BucketedData( 29 | @BeanProperty 30 | @SpaceId(autoGenerate = true) 31 | var id: String, 32 | 33 | @BeanProperty 34 | @SpaceRouting 35 | var routing: Long, 36 | 37 | @BeanProperty 38 | var data: String, 39 | 40 | @BooleanBeanProperty 41 | var flag: Boolean 42 | ) extends BucketedGridModel { 43 | 44 | def this(routing: Long, data: String) = this(null, routing, data, false) 45 | 46 | def this() = this(-1, null) 47 | 48 | def this(routing: Long) = this(routing, null) 49 | } -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/insightedge/spark/rdd/BucketedGridString.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd 18 | 19 | import org.insightedge.scala.annotation 20 | import annotation._ 21 | import org.insightedge.spark.model.BucketedGridModel 22 | 23 | import scala.beans.BeanProperty 24 | 25 | /** 26 | * Grid class for test purposes 27 | * 28 | * @author Oleksiy_Dyagilev 29 | */ 30 | case class BucketedGridString( 31 | @BeanProperty 32 | @SpaceId(autoGenerate = true) 33 | var id: String, 34 | 35 | @BeanProperty 36 | var string: String 37 | ) extends BucketedGridModel { 38 | def this() = this(null, null) 39 | 40 | def this(string: String) = this(null, string) 41 | } 42 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/insightedge/spark/rdd/Data.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd 18 | 19 | import org.insightedge.scala.annotation 20 | import annotation._ 21 | import org.insightedge.spark.model.BucketedGridModel 22 | 23 | import scala.beans.{BeanProperty, BooleanBeanProperty} 24 | 25 | /** 26 | * Space class for tests 27 | */ 28 | case class Data( 29 | @BeanProperty 30 | @SpaceId(autoGenerate = true) 31 | var id: String, 32 | 33 | @BeanProperty 34 | @SpaceRouting 35 | var routing: Long, 36 | 37 | @BeanProperty 38 | var data: String, 39 | 40 | @BooleanBeanProperty 41 | var flag: java.lang.Boolean 42 | ) { 43 | def this(routing: Long, data: String) = this(null, routing, data, null) 44 | 45 | def this() = this(-1, null) 46 | 47 | def this(routing: Long) = this(routing, null) 48 | } -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/insightedge/spark/rdd/GridString.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.rdd 18 | 19 | import org.insightedge.scala.annotation 20 | import annotation._ 21 | 22 | import scala.beans.BeanProperty 23 | 24 | /** 25 | * Grid class for test purposes 26 | * 27 | * @author Oleksiy_Dyagilev 28 | */ 29 | case class GridString( 30 | @BeanProperty 31 | @SpaceId(autoGenerate = true) 32 | var id: String, 33 | 34 | @BeanProperty 35 | var string: String 36 | ) { 37 | def this() = this(null, null) 38 | 39 | def this(string: String) = this(null, string) 40 | } 41 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/insightedge/spark/streaming/InsightEdgeStreamingSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.streaming 18 | 19 | import org.insightedge.spark.implicits 20 | import implicits.basic._ 21 | import implicits.streaming._ 22 | import org.insightedge.spark.rdd.Data 23 | import org.apache.spark.SparkContext 24 | import org.apache.spark.rdd.RDD 25 | import org.apache.spark.streaming.dstream.InputDStream 26 | import org.insightedge.spark.fixture.InsightEdgeStreaming 27 | import org.scalatest.{FunSpec, fixture} 28 | import org.scalatest.concurrent.Eventually._ 29 | 30 | import scala.collection.mutable 31 | import scala.util.Random 32 | 33 | /** 34 | * @author Oleksiy_Dyagilev 35 | */ 36 | class InsightEdgeStreamingSpec extends fixture.FlatSpec with InsightEdgeStreaming { 37 | 38 | it should "should save single object from Spark driver" in { ie => 39 | val sc = ssc.sparkContext 40 | 41 | val stream: InputDStream[String] = ssc.queueStream(stringQueue(sc)) 42 | 43 | stream.foreachRDD { rdd => 44 | val str = rdd.first() 45 | val data = new Data(Random.nextLong(), str) 46 | sc.saveToGrid(data) 47 | } 48 | 49 | ssc.start() 50 | 51 | eventually { 52 | val savedData = ie.spaceProxy.readMultiple(dataQuery()) 53 | assert(savedData.length == 1) 54 | }(timeout(2)) 55 | } 56 | 57 | it should "save multiple objects from Spark driver" in { ie => 58 | val sc = ssc.sparkContext 59 | 60 | val stream: InputDStream[String] = ssc.queueStream(stringQueue(sc)) 61 | 62 | stream.foreachRDD { rdd => 63 | val stringsArray = rdd.take(2) 64 | val datas = stringsArray.map(str => new Data(Random.nextLong(), str)) 65 | sc.saveMultipleToGrid(datas) 66 | } 67 | 68 | ssc.start() 69 | 70 | eventually { 71 | val savedData = ie.spaceProxy.readMultiple(dataQuery()) 72 | assert(savedData.nonEmpty) 73 | }(timeout(2)) 74 | } 75 | 76 | it should "save DStream" in { ie => 77 | val sc = ssc.sparkContext 78 | 79 | val stream: InputDStream[String] = ssc.queueStream(stringQueue(sc)) 80 | 81 | stream 82 | .map(str => new Data(Random.nextLong(), str)) 83 | .saveToGrid() 84 | 85 | ssc.start() 86 | 87 | eventually { 88 | val savedData = ie.spaceProxy.readMultiple(dataQuery()) 89 | assert(savedData.nonEmpty) 90 | }(timeout(2)) 91 | } 92 | 93 | def stringQueue(sc: SparkContext) = { 94 | val q = mutable.Queue[RDD[String]]() 95 | q += sc.makeRDD(Seq("aa", "bb", "cc")) 96 | q += sc.makeRDD(Seq("dd", "ee", "ff")) 97 | q 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /insightedge-core/src/test/scala/org/insightedge/spark/utils/Tags.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import org.scalatest.Tag 20 | 21 | /** 22 | * Test tags 23 | * 24 | * @author Oleksiy_Dyagilev 25 | */ 26 | object ScalaSpaceClass extends Tag("org.insightedge.spark.utils.ScalaSpaceClass") 27 | object JavaSpaceClass extends Tag("org.insightedge.spark.utils.JavaSpaceClass") 28 | -------------------------------------------------------------------------------- /insightedge-examples/Jenkinsfile: -------------------------------------------------------------------------------- 1 | node { 2 | stage 'Checkout' 3 | checkout scm 4 | load 'tools/jenkins.groovy' 5 | } -------------------------------------------------------------------------------- /insightedge-examples/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /insightedge-examples/build.sbt: -------------------------------------------------------------------------------- 1 | name := "insightedge-examples" 2 | 3 | val insightEdgeVersion = sys.props.getOrElse("insightEdgeVersion", "16.4.0-SNAPSHOT") 4 | 5 | version := insightEdgeVersion 6 | 7 | scalaVersion := "2.11.8" 8 | 9 | 10 | resolvers += Resolver.mavenLocal 11 | resolvers += "Openspaces Maven Repository" at "http://maven-repository.openspaces.org" 12 | 13 | libraryDependencies ++= Seq( 14 | "org.gigaspaces.insightedge" % "insightedge-core" % insightEdgeVersion % "provided", 15 | "org.apache.bahir" %% "spark-streaming-twitter" % "2.4.0", 16 | "org.scalatest" % "scalatest_2.11" % "3.0.3" % "test" 17 | ) 18 | 19 | test in assembly := {} 20 | 21 | assemblyOutputPath in assembly := new File("target/insightedge-examples.jar") 22 | 23 | assemblyMergeStrategy in assembly := { 24 | case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class") => MergeStrategy.first 25 | case x => (assemblyMergeStrategy in assembly).value(x) 26 | } 27 | 28 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) -------------------------------------------------------------------------------- /insightedge-examples/doc/images/idea-configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightEdge/insightedge/36a3ee9f1d84cf2d185be5aeae55627efa1fea4c/insightedge-examples/doc/images/idea-configuration.png -------------------------------------------------------------------------------- /insightedge-examples/doc/images/idea-configuration_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightEdge/insightedge/36a3ee9f1d84cf2d185be5aeae55627efa1fea4c/insightedge-examples/doc/images/idea-configuration_1.png -------------------------------------------------------------------------------- /insightedge-examples/project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.2") -------------------------------------------------------------------------------- /insightedge-examples/python/sf_salaries.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from pyspark.sql import SparkSession 4 | 5 | # InsightEdge config 6 | if len(sys.argv) == 1: 7 | spaceName = os.environ['INSIGHTEDGE_SPACE_NAME'] 8 | else: 9 | spaceName = sys.argv[1] 10 | 11 | print("InsightEdge config: %s" % spaceName) 12 | 13 | spark = SparkSession \ 14 | .builder \ 15 | .appName("SF Salaries Example") \ 16 | .config("spark.insightedge.space.name", spaceName) \ 17 | .getOrCreate() 18 | 19 | 20 | # load SF salaries dataset from file 21 | gsHome = os.getenv("XAP_HOME") 22 | if gsHome is None: 23 | gsHome = os.environ["GS_HOME"] 24 | 25 | jsonFilePath = os.path.join(gsHome, "insightedge/data/sf_salaries_sample.json") 26 | jsonDf = spark.read.json(jsonFilePath) 27 | 28 | # save DataFrame to the grid 29 | jsonDf.write.format("org.apache.spark.sql.insightedge").mode("overwrite").save("salaries") 30 | 31 | # load DataFrame from the grid 32 | gridDf = spark.read.format("org.apache.spark.sql.insightedge").option("collection", "salaries").load() 33 | gridDf.printSchema() 34 | 35 | # register this DataFrame as a table 36 | gridDf.registerTempTable("salaries") 37 | 38 | # run SQL query 39 | averagePay = spark.sql( 40 | """SELECT JobTitle, AVG(TotalPay) as AveragePay 41 | FROM salaries 42 | WHERE Year = 2012 43 | GROUP BY JobTitle 44 | ORDER BY AVG(TotalPay) DESC 45 | LIMIT 15""") 46 | 47 | for each in averagePay.collect(): 48 | print("%s: %s" % (each[0], each[1])) 49 | 50 | spark.stop() -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/LoadDataFrame.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.all._ 23 | 24 | /** 25 | * Loads Products from Data Grid as DataFrame and runs filtering. 26 | */ 27 | object LoadDataFrame { 28 | 29 | def main(args: Array[String]): Unit = { 30 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 31 | 32 | //args: 33 | val settings = if (args.length > 0) args 34 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | initConfig.spaceName) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: LoadDataFrame ") 39 | System.exit(1) 40 | } 41 | val Array(master, space) = settings 42 | val ieConfig = initConfig.copy(spaceName = space) 43 | val spark = SparkSession.builder 44 | .appName("example-load-dataframe") 45 | .master(master) 46 | .insightEdgeConfig(ieConfig) 47 | .getOrCreate() 48 | 49 | val df = spark.read.grid[Product] 50 | df.printSchema() 51 | val count = df.filter(df("quantity") < 5).count() 52 | println(s"Number of products with quantity < 5: $count") 53 | spark.stopInsightEdgeContext() 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/LoadDataset.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.all._ 23 | 24 | /** 25 | * Loads Products from Data Grid as DataFrame and runs filtering. 26 | */ 27 | object LoadDataset { 28 | 29 | def main(args: Array[String]): Unit = { 30 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 31 | 32 | //args: 33 | val settings = if (args.length > 0) args 34 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | initConfig.spaceName) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: LoadDataset ") 39 | System.exit(1) 40 | } 41 | val Array(master, space) = settings 42 | val ieConfig = initConfig.copy(spaceName = space) 43 | val spark = SparkSession.builder 44 | .appName("example-load-dataset") 45 | .master(master) 46 | .insightEdgeConfig(ieConfig) 47 | .getOrCreate() 48 | 49 | import spark.implicits._ 50 | val ds = spark.read.grid[Product].as[Product] 51 | ds.printSchema() 52 | val count = ds.filter( o => o.quantity < 5).count() 53 | println(s"Number of products with quantity < 5: $count") 54 | spark.stopInsightEdgeContext() 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/LoadRdd.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.basic._ 23 | 24 | /** 25 | * Loads Product RDD from Data Grid and prints objects count. 26 | */ 27 | object LoadRdd { 28 | 29 | def main(args: Array[String]): Unit = { 30 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 31 | 32 | //args: 33 | val settings = if (args.length > 0) args 34 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | initConfig.spaceName) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: LoadRdd ") 39 | System.exit(1) 40 | } 41 | val Array(master, space) = settings 42 | val ieConfig = initConfig.copy(spaceName = space) 43 | val spark = SparkSession.builder 44 | .appName("example-load-rdd") 45 | .master(master) 46 | .insightEdgeConfig(ieConfig) 47 | .getOrCreate() 48 | val sc = spark.sparkContext 49 | 50 | val rdd = sc.gridRdd[Product]() 51 | println(s"Products RDD count: ${rdd.count()}") 52 | spark.stopInsightEdgeContext() 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/LoadRddWithSql.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.basic._ 23 | 24 | /** 25 | * Partially loads Product RDD from Data Grid using SQL query (filter on Data Grid side) and prints objects count. 26 | */ 27 | object LoadRddWithSql { 28 | 29 | def main(args: Array[String]): Unit = { 30 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 31 | 32 | //args: 33 | val settings = if (args.length > 0) args 34 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | initConfig.spaceName) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: LoadRddWithSql ") 39 | System.exit(1) 40 | } 41 | val Array(master, space) = settings 42 | val ieConfig = initConfig.copy(spaceName = space) 43 | val spark = SparkSession.builder 44 | .appName("example-load-rdd-sql") 45 | .master(master) 46 | .insightEdgeConfig(ieConfig) 47 | .getOrCreate() 48 | val sc = spark.sparkContext 49 | 50 | val rdd = sc.gridSql[Product]("quantity < 5") 51 | println(s"Number of products with quantity < 5: ${rdd.count()}") 52 | spark.stopInsightEdgeContext() 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/PersistDataFrame.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.{SaveMode, SparkSession} 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.all._ 23 | 24 | /** 25 | * Persists a selection of Products to Data Grid, then loads it as new DataFrame. 26 | */ 27 | object PersistDataFrame { 28 | 29 | def main(args: Array[String]): Unit = { 30 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 31 | 32 | //args: 33 | val settings = if (args.length > 0) args 34 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | initConfig.spaceName) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: PersistDataFrame ") 39 | System.exit(1) 40 | } 41 | val Array(master, space) = settings 42 | val ieConfig = initConfig.copy(spaceName = space) 43 | val spark = SparkSession.builder 44 | .appName("example-persist-dataframe") 45 | .master(master) 46 | .insightEdgeConfig(ieConfig) 47 | .getOrCreate() 48 | 49 | val df = spark.read.grid[Product] 50 | println("Product schema:") 51 | df.printSchema() 52 | 53 | df.select("id", "quantity").filter(df("quantity") < 5).write.mode(SaveMode.Overwrite).grid("smallStock") 54 | val persistedDf = spark.read.grid("smallStock") 55 | 56 | val count = persistedDf.count() 57 | 58 | println(s"Number of products with quantity < 5: $count") 59 | spark.stopInsightEdgeContext() 60 | } 61 | 62 | } -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/PersistDataset.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.{SaveMode, SparkSession} 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.all._ 23 | 24 | /** 25 | * Persists a selection of Products to Data Grid, then loads it as new DataFrame. 26 | */ 27 | object PersistDataset { 28 | 29 | def main(args: Array[String]): Unit = { 30 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 31 | 32 | //args: 33 | val settings = if (args.length > 0) args 34 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | initConfig.spaceName) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: PersistDataset ") 39 | System.exit(1) 40 | } 41 | val Array(master, space) = settings 42 | val ieConfig = initConfig.copy(spaceName = space) 43 | val spark = SparkSession.builder 44 | .appName("example-persist-dataset") 45 | .master(master) 46 | .insightEdgeConfig(ieConfig) 47 | .getOrCreate() 48 | 49 | import spark.implicits._ 50 | 51 | val ds = spark.read.grid[Product].as[Product] 52 | println("Product schema:") 53 | ds.printSchema() 54 | 55 | ds.filter( o => o.quantity < 5).write.mode(SaveMode.Overwrite).grid("smallStock") 56 | val persistedDS = spark.read.grid("smallStock").as[Product] 57 | 58 | val count = persistedDS.count() 59 | 60 | println(s"Number of products with quantity < 5: $count") 61 | spark.stopInsightEdgeContext() 62 | } 63 | 64 | } -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/Product.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.insightedge.scala.annotation._ 20 | 21 | import scala.beans.{BeanProperty, BooleanBeanProperty} 22 | 23 | case class Product( 24 | @BeanProperty 25 | @SpaceId 26 | @SpaceProperty(nullValue = "-1") 27 | var id: Long, 28 | 29 | @BeanProperty 30 | var description: String, 31 | 32 | @BeanProperty 33 | @SpaceProperty(nullValue = "-1") 34 | var quantity: Int, 35 | 36 | @BooleanBeanProperty 37 | @SpaceProperty(nullValue = "false") 38 | var featuredProduct: Boolean 39 | ) { 40 | 41 | def this() = this(-1, null, -1, false) 42 | 43 | } -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/SaveRdd.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.basic._ 23 | 24 | import scala.util.Random 25 | 26 | /** 27 | * Generates 100000 Products, converts to Spark RDD and saves to Data Grid. Products have fixed IDs. 28 | */ 29 | object SaveRdd { 30 | 31 | def main(args: Array[String]): Unit = { 32 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 33 | 34 | //args: 35 | val settings = if (args.length > 0) args 36 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 37 | initConfig.spaceName) 38 | 39 | if (settings.length != 2) { 40 | System.err.println("Usage: SaveRdd ") 41 | System.exit(1) 42 | } 43 | 44 | val Array(master, space) = settings 45 | val ieConfig = initConfig.copy(spaceName = space) 46 | val spark = SparkSession.builder 47 | .appName("example-save-rdd") 48 | .master(master) 49 | .insightEdgeConfig(ieConfig) 50 | .getOrCreate() 51 | val sc = spark.sparkContext 52 | 53 | val productsNum = 100000 54 | println(s"Saving $productsNum products RDD to the space") 55 | val rdd = sc.parallelize(1 to productsNum).map { i => 56 | Product(i, "Description of product " + i, Random.nextInt(10), Random.nextBoolean()) 57 | } 58 | rdd.saveToGrid() 59 | sc.stopInsightEdgeContext() 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/SaveRddNewContextInitApi.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.basic._ 23 | import org.openspaces.core.GigaSpaceConfigurer 24 | import org.openspaces.core.space.EmbeddedSpaceConfigurer 25 | 26 | import scala.util.Random 27 | 28 | /** 29 | * Generates 100000 Products, converts to Spark RDD and saves to Data Grid. Products have fixed IDs. 30 | */ 31 | object SaveRddNewContextInitApi { 32 | 33 | def main(args: Array[String]): Unit = { 34 | val settings = if (args.length > 0) args else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | sys.env.getOrElse(InsightEdgeConfig.INSIGHTEDGE_SPACE_NAME, InsightEdgeConfig.INSIGHTEDGE_SPACE_NAME_DEFAULT)) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: SaveRdd ") 39 | System.exit(1) 40 | } 41 | 42 | val Array(master, space) = settings 43 | val config = InsightEdgeConfig(space) 44 | val spark = SparkSession.builder 45 | .appName("example-save-rdd-new-context-init-api") 46 | .master(master) 47 | .getOrCreate() 48 | 49 | //initializing the insightedge context via the spark context 50 | spark.sparkContext.initializeInsightEdgeContext(config) 51 | 52 | val sc = spark.sparkContext 53 | 54 | val productsNum = 100 55 | println(s"Saving $productsNum products RDD to the space") 56 | val rdd = sc.parallelize(1 to productsNum).map { i => 57 | Product(i, "Description of product " + i, Random.nextInt(10), Random.nextBoolean()) 58 | } 59 | 60 | rdd.saveToGrid() 61 | 62 | sc.stopInsightEdgeContext() 63 | } 64 | 65 | } 66 | 67 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/basic/SaveRddNewSessionInitApi.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.basic 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.basic._ 23 | import org.openspaces.core.GigaSpaceConfigurer 24 | import org.openspaces.core.space.EmbeddedSpaceConfigurer 25 | 26 | import scala.util.Random 27 | 28 | /** 29 | * Generates 100000 Products, converts to Spark RDD and saves to Data Grid. Products have fixed IDs. 30 | */ 31 | object SaveRddNewSessionInitApi { 32 | 33 | def main(args: Array[String]): Unit = { 34 | val settings = if (args.length > 0) args else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 35 | sys.env.getOrElse(InsightEdgeConfig.INSIGHTEDGE_SPACE_NAME, InsightEdgeConfig.INSIGHTEDGE_SPACE_NAME_DEFAULT)) 36 | 37 | if (settings.length != 2) { 38 | System.err.println("Usage: SaveRdd ") 39 | System.exit(1) 40 | } 41 | 42 | val Array(master, space) = settings 43 | val config = InsightEdgeConfig(space) 44 | val spark = SparkSession.builder 45 | .appName("example-save-rdd-new-session-init-api") 46 | .master(master) 47 | .getOrCreate() 48 | 49 | //initializing the insightedge context via the spark session 50 | spark.initializeInsightEdgeContext(config) 51 | 52 | val sc = spark.sparkContext 53 | 54 | 55 | 56 | val productsNum = 100 57 | println(s"Saving $productsNum products RDD to the space") 58 | val rdd = sc.parallelize(1 to productsNum).map { i => 59 | Product(i, "Description of product " + i, Random.nextInt(10), Random.nextBoolean()) 60 | } 61 | 62 | rdd.saveToGrid() 63 | 64 | sc.stopInsightEdgeContext() 65 | } 66 | 67 | } 68 | 69 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/geospatial/GasStation.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.geospatial 18 | 19 | import org.insightedge.scala.annotation._ 20 | import org.openspaces.spatial.shapes.Point 21 | 22 | import scala.beans.BeanProperty 23 | 24 | case class GasStation( 25 | 26 | @BeanProperty 27 | @SpaceId 28 | var id: Long, 29 | 30 | @BeanProperty 31 | var name: String, 32 | 33 | @BeanProperty 34 | @SpaceSpatialIndex 35 | var location: Point 36 | 37 | ) { 38 | 39 | def this() = this(-1, null, null) 40 | 41 | } -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/geospatial/LoadDataFrameWithGeospatial.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.geospatial 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.all._ 23 | import org.openspaces.spatial.ShapeFactory 24 | import org.openspaces.spatial.shapes.Point 25 | 26 | import scala.util.Random 27 | 28 | /** 29 | * Saves Gas Stations with indexed location field to Data Grid, loads them with dataframes API. See all operations at http://insightedge.io/docs 30 | */ 31 | object LoadDataFrameWithGeospatial { 32 | 33 | def main(args: Array[String]): Unit = { 34 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 35 | 36 | //args: 37 | val settings = if (args.length > 0) args 38 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 39 | initConfig.spaceName) 40 | 41 | if (settings.length != 2) { 42 | System.err.println("Usage: LoadDataFrameWithGeospatial ") 43 | System.exit(1) 44 | } 45 | val Array(master, space) = settings 46 | val ieConfig = initConfig.copy(spaceName = space) 47 | val spark = SparkSession.builder 48 | .appName("example-load-dataframe-geospatial") 49 | .master(master) 50 | .insightEdgeConfig(ieConfig) 51 | .getOrCreate() 52 | val sc = spark.sparkContext 53 | 54 | val stations = (1 to 100000).map { i => GasStation(i, "Station" + i, randomPoint(-50, 50)) } 55 | println(s"Saving ${stations.size} gas stations RDD to the space") 56 | sc.parallelize(stations).saveToGrid() 57 | 58 | val userLocation = ShapeFactory.point(10, 10) 59 | val searchArea = ShapeFactory.circle(userLocation, 10) 60 | val df = spark.read.grid[GasStation] 61 | val countNearby = df.filter(df("location") geoWithin searchArea).count() 62 | println(s"Number of stations within 10 radius around user: $countNearby") 63 | 64 | spark.stopInsightEdgeContext() 65 | } 66 | 67 | def randomPoint(min: Double, max: Double): Point = { 68 | ShapeFactory.point(randomInRange(min, max), randomInRange(min, max)) 69 | } 70 | 71 | def randomInRange(min: Double, max: Double): Double = { 72 | Random.nextDouble() * (max - min) + min 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/geospatial/LoadRddWithGeospatial.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.geospatial 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.basic._ 23 | import org.openspaces.spatial.ShapeFactory 24 | import org.openspaces.spatial.shapes.Point 25 | 26 | import scala.util.Random 27 | 28 | /** 29 | * Saves Gas Stations with indexed location field to Data Grid, loads them with SQL query. See all operations at http://insightedge.io/docs 30 | */ 31 | object LoadRddWithGeospatial { 32 | 33 | def main(args: Array[String]): Unit = { 34 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 35 | 36 | //args: 37 | val settings = if (args.length > 0) args 38 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 39 | initConfig.spaceName) 40 | 41 | if (settings.length != 2) { 42 | System.err.println("Usage: LoadRddWithGeospatial ") 43 | System.exit(1) 44 | } 45 | val Array(master, space) = settings 46 | val ieConfig = initConfig.copy(spaceName = space) 47 | val spark = SparkSession.builder 48 | .appName("example-load-rdd-geospatial") 49 | .master(master) 50 | .insightEdgeConfig(ieConfig) 51 | .getOrCreate() 52 | val sc = spark.sparkContext 53 | 54 | val stations = (1 to 100000).map { i => GasStation(i, "Station" + i, randomPoint(-50, 50)) } 55 | println(s"Saving ${stations.size} gas stations RDD to the space") 56 | sc.parallelize(stations).saveToGrid() 57 | 58 | val userLocation = ShapeFactory.point(10, 10) 59 | val searchArea = ShapeFactory.circle(userLocation, 10) 60 | val stationsNearby = sc.gridSql[GasStation]("location spatial:within ?", Seq(searchArea)) 61 | println(s"Number of stations within 10 radius around user: ${stationsNearby.count()}") 62 | 63 | spark.stopInsightEdgeContext() 64 | } 65 | 66 | def randomPoint(min: Double, max: Double): Point = { 67 | ShapeFactory.point(randomInRange(min, max), randomInRange(min, max)) 68 | } 69 | 70 | def randomInRange(min: Double, max: Double): Double = { 71 | Random.nextDouble() * (max - min) + min 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/mllib/SaveAndLoadMLModel.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.mllib 18 | 19 | import org.apache.spark.mllib.clustering.{KMeans, KMeansModel} 20 | import org.apache.spark.mllib.linalg.Vectors 21 | import org.apache.spark.sql.SparkSession 22 | import org.apache.spark.{SparkConf, SparkContext} 23 | import org.insightedge.spark.context.InsightEdgeConfig 24 | import org.insightedge.spark.implicits.all._ 25 | 26 | /** 27 | * Saves/reloads the ML model to/from Data Grid. 28 | */ 29 | object SaveAndLoadMLModel { 30 | 31 | def main(args: Array[String]): Unit = { 32 | val initConfig = InsightEdgeConfig.fromSparkConf(new SparkConf()) 33 | 34 | //args: 35 | val settings = if (args.length > 0) args 36 | else Array( new SparkConf().get("spark.master", InsightEdgeConfig.SPARK_MASTER_LOCAL_URL_DEFAULT), 37 | initConfig.spaceName) 38 | 39 | if (settings.length != 2) { 40 | System.err.println("Usage: SaveAndLoadMLModel ") 41 | System.exit(1) 42 | } 43 | val Array(master, space) = settings 44 | val ieConfig = initConfig.copy(spaceName = space) 45 | val spark = SparkSession.builder 46 | .appName("example-mllib") 47 | .master(master) 48 | .insightEdgeConfig(ieConfig) 49 | .getOrCreate() 50 | val sc = spark.sparkContext 51 | 52 | val modelName = "decisionTreeModel" 53 | val model = createModel(sc) 54 | println(s"Saving ${model.getClass.getSimpleName} to the datagrid") 55 | model.saveToGrid(sc, modelName) 56 | println(s"Loading $modelName from the datagrid") 57 | val loadedModel = sc.loadMLInstance[KMeansModel](modelName).get 58 | println(s"Model ${loadedModel.getClass.getSimpleName} is loaded") 59 | spark.stopInsightEdgeContext() 60 | } 61 | 62 | private def createModel(sc: SparkContext) = { 63 | val vectors = List(Vectors.dense(1.0, 1.0, 3.0), Vectors.dense(2.0, 0.0, 1.0), Vectors.dense(2.0, 1.0, 0.0)) 64 | val rdd = sc.parallelize(vectors) 65 | val k = 2 66 | val maxIterations = 100 67 | KMeans.train(rdd, k, maxIterations) 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/offheap/OffHeapPersistence.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightEdge/insightedge/36a3ee9f1d84cf2d185be5aeae55627efa1fea4c/insightedge-examples/src/main/scala/org/insightedge/examples/offheap/OffHeapPersistence.scala -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/streaming/HashTag.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.streaming 18 | 19 | import org.insightedge.scala.annotation._ 20 | 21 | import scala.beans.BeanProperty 22 | 23 | /** 24 | * @author Oleksiy_Dyagilev 25 | */ 26 | case class HashTag( 27 | 28 | @SpaceId(autoGenerate = true) 29 | @BeanProperty 30 | var id: String, 31 | 32 | @BeanProperty 33 | var tag: String 34 | 35 | ) { 36 | 37 | def this(tag: String) = this(null, tag) 38 | 39 | def this() = this(null, null) 40 | 41 | } 42 | -------------------------------------------------------------------------------- /insightedge-examples/src/main/scala/org/insightedge/examples/streaming/TopTags.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples.streaming 18 | 19 | import org.insightedge.scala.annotation._ 20 | 21 | import scala.beans.BeanProperty 22 | 23 | /** 24 | * @author Oleksiy_Dyagilev 25 | */ 26 | case class TopTags( 27 | @SpaceId(autoGenerate = true) 28 | @BeanProperty 29 | var id: String, 30 | 31 | @BeanProperty 32 | var tagsCount: java.util.Map[Int, String], 33 | 34 | @BeanProperty 35 | var batchTime: Long 36 | ) { 37 | 38 | def this(tagsCount: java.util.Map[Int, String]) = this(null, tagsCount, System.currentTimeMillis) 39 | 40 | def this() = this(null) 41 | 42 | } 43 | -------------------------------------------------------------------------------- /insightedge-examples/src/test/scala/org/insightedge/examples/InsightedgeExamplesSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.examples 18 | 19 | import org.insightedge.examples.basic._ 20 | import org.insightedge.examples.geospatial.{LoadDataFrameWithGeospatial, LoadRddWithGeospatial} 21 | import org.insightedge.examples.mllib.SaveAndLoadMLModel 22 | import org.openspaces.core.space.EmbeddedSpaceConfigurer 23 | import org.openspaces.core.{GigaSpace, GigaSpaceConfigurer} 24 | import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSpec} 25 | 26 | class InsightedgeExamplesSpec extends FunSpec with BeforeAndAfterAll with BeforeAndAfterEach { 27 | val spaceName = "insightedge-examples-space" 28 | val args = Array("local[2]", spaceName) 29 | 30 | var datagrid: GigaSpace = _ 31 | 32 | it("should successfully save RDD to Data Grid") { 33 | SaveRdd.main(args) 34 | } 35 | 36 | it("should successfully load RDD from Data Grid") { 37 | LoadRdd.main(args) 38 | } 39 | 40 | it("should successfully load RDD from Data Grid with SQL") { 41 | LoadRddWithSql.main(args) 42 | } 43 | 44 | it("should successfully load DataFrame from Data Grid") { 45 | LoadDataFrame.main(args) 46 | } 47 | 48 | it("should successfully persist DataFrame to Data Grid") { 49 | PersistDataFrame.main(args) 50 | } 51 | 52 | it("should successfully save and load MLModel to/from from Data Grid") { 53 | SaveAndLoadMLModel.main(args) 54 | } 55 | 56 | it("should successfully load rdd with geospatial SQL") { 57 | LoadRddWithGeospatial.main(args) 58 | } 59 | 60 | it("should successfully load dataframe with geospatial SQL") { 61 | LoadDataFrameWithGeospatial.main(args) 62 | } 63 | 64 | override protected def beforeAll() = { 65 | datagrid = new GigaSpaceConfigurer(new EmbeddedSpaceConfigurer("insightedge-examples-space")).create() 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | insightedge-integration-tests 7 | org.gigaspaces.insightedge 8 | 16.4.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | jobs 13 | 14 | 15 | 16 | provided 17 | 18 | 19 | 20 | 21 | 22 | org.gigaspaces.insightedge 23 | insightedge-core 24 | ${distribution.dependencies.scope} 25 | 16.4.0-SNAPSHOT 26 | 27 | 28 | 29 | 30 | ${project.artifactId} 31 | 32 | 33 | com.gigaspaces 34 | xap-build-plugin 35 | ${xap.version} 36 | 37 | 38 | none 39 | 40 | 41 | 42 | 43 | net.alchim31.maven 44 | scala-maven-plugin 45 | 3.2.1 46 | 47 | 48 | scala-compile-first 49 | process-resources 50 | 51 | add-source 52 | compile 53 | 54 | 55 | 56 | 57 | 58 | org.apache.maven.plugins 59 | maven-deploy-plugin 60 | 61 | true 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/java/org/insightedge/spark/jobs/Country.java: -------------------------------------------------------------------------------- 1 | package org.insightedge.spark.jobs; 2 | 3 | import java.io.Serializable; 4 | 5 | public enum Country implements Serializable { 6 | SWEDEN, ISRAEL, FRANCE; 7 | } -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/java/org/insightedge/spark/jobs/Person.java: -------------------------------------------------------------------------------- 1 | package org.insightedge.spark.jobs; 2 | 3 | import com.gigaspaces.annotation.pojo.SpaceId; 4 | import java.io.Serializable; 5 | 6 | public class Person implements Serializable { 7 | private String id; 8 | private String name; 9 | private Country country; 10 | 11 | public Person() {} 12 | 13 | public Person(String id, String name, Country country) { 14 | this.id = id; 15 | this.name = name; 16 | this.country = country; 17 | } 18 | 19 | @SpaceId 20 | public String getId() { 21 | return id; 22 | } 23 | 24 | public void setId(String id) { 25 | this.id = id; 26 | } 27 | 28 | public String getName() { 29 | return name; 30 | } 31 | 32 | public void setName(String name) { 33 | this.name = name; 34 | } 35 | 36 | public Country getCountry() { 37 | return country; 38 | } 39 | 40 | public void setCountry(Country country) { 41 | this.country = country; 42 | } 43 | } -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/scala/org/insightedge/spark/jobs/ContinuosLoadRdd.scala: -------------------------------------------------------------------------------- 1 | package org.insightedge.spark.jobs 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | import org.insightedge.spark.context.InsightEdgeConfig 5 | import org.insightedge.spark.implicits.basic._ 6 | 7 | object ContinuosLoadRdd { 8 | 9 | def main(args: Array[String]): Unit = { 10 | val settings = if (args.length > 0) args else Array("spark://127.0.0.1:7077", sys.env("INSIGHTEDGE_SPACE_NAME")) 11 | if (settings.length != 2) { 12 | System.err.println("Usage: LoadRdd ") 13 | System.exit(1) 14 | } 15 | val Array(master, space) = settings 16 | val config = InsightEdgeConfig(space) 17 | val sc = new SparkContext(new SparkConf().setAppName("example-load-rdd").setMaster(master).setInsightEdgeConfig(config)) 18 | 19 | val rdd = sc.gridRdd[Product]() 20 | println(s"Products RDD count: ${rdd.countByValue()(QuantityOrdering).values.sum}") 21 | sc.stopInsightEdgeContext() 22 | } 23 | 24 | object QuantityOrdering extends Ordering[Product]{ 25 | override def compare(x: Product, y: Product): Int = x.quantity compare y.quantity 26 | } 27 | 28 | } 29 | 30 | -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/scala/org/insightedge/spark/jobs/Data.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.jobs 18 | 19 | import org.insightedge.scala.annotation._ 20 | 21 | import scala.beans.{BeanProperty, BooleanBeanProperty} 22 | 23 | /** 24 | * Space class for tests 25 | */ 26 | case class Data( 27 | @BeanProperty 28 | @SpaceId(autoGenerate = true) 29 | var id: String, 30 | 31 | @BeanProperty 32 | @SpaceRouting 33 | var routing: Long, 34 | 35 | @BeanProperty 36 | var data: String, 37 | 38 | @BooleanBeanProperty 39 | var flag: Boolean 40 | ) { 41 | def this(routing: Long, data: String) = this(null, routing, data, false) 42 | 43 | def this() = this(-1, null) 44 | 45 | def this(routing: Long) = this(routing, null) 46 | } -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/scala/org/insightedge/spark/jobs/LoadDataFrame.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.jobs 18 | 19 | import org.apache.spark.{SparkConf, SparkContext} 20 | import org.apache.spark.sql.SparkSession 21 | import org.insightedge.spark.context.InsightEdgeConfig 22 | import org.insightedge.spark.implicits.all._ 23 | 24 | 25 | /** 26 | * Loads DataFrame from Data Grid and prints objects count. 27 | * converts Pojo with enum to DataFrame 28 | * @since 14.5 29 | */ 30 | object LoadDataFrame { 31 | 32 | def main(args: Array[String]): Unit = { 33 | val settings = if (args.length > 0) args else Array("spark://127.0.0.1:7077", sys.env("INSIGHTEDGE_SPACE_NAME")) 34 | if (settings.length != 2) { 35 | System.err.println("Usage: LoadRdd ") 36 | System.exit(1) 37 | } 38 | val Array(master, space) = settings 39 | val config = InsightEdgeConfig(space) 40 | val sc = new SparkContext(new SparkConf().setAppName("example-load-df").setMaster(master).setInsightEdgeConfig(config)) 41 | 42 | val spark = SparkSession.builder 43 | .appName("example-load-df") 44 | .master(master) 45 | .insightEdgeConfig(config) 46 | .getOrCreate() 47 | 48 | writeInitialDataToSpace(spark) 49 | 50 | readDataFromSpace(spark) 51 | 52 | sc.stopInsightEdgeContext() 53 | spark.stopInsightEdgeContext() 54 | } 55 | 56 | private def readDataFromSpace(spark: SparkSession): Unit = { 57 | val df = spark.read.grid[Person] 58 | df.printSchema() 59 | df.show() 60 | 61 | println(s"Person DF count: ${df.count()}") 62 | val c1 = df.count() 63 | assert(c1 == 3, "count should equal to 3") 64 | 65 | val filteredDf = df.filter(df("country.name").equalTo("ISRAEL")) 66 | filteredDf.show() 67 | 68 | println(s"Filtered Person DF count: ${filteredDf.count()}") 69 | val c2 = filteredDf.count() 70 | assert(c2 == 1, "count should equal to 1") 71 | } 72 | 73 | private def writeInitialDataToSpace(spark: SparkSession) = { 74 | val p1 = new Person("1", "foo", Country.ISRAEL) 75 | val p2 = new Person("2", "bar", Country.SWEDEN) 76 | val p3 = new Person("3", "zoo", Country.FRANCE) 77 | 78 | spark.sparkContext.grid.write(p1) 79 | spark.sparkContext.grid.write(p2) 80 | spark.sparkContext.grid.write(p3) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/scala/org/insightedge/spark/jobs/LoadRdd.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.jobs 18 | 19 | import org.apache.spark.{SparkConf, SparkContext} 20 | import org.insightedge.spark.context.InsightEdgeConfig 21 | import org.insightedge.spark.implicits.basic._ 22 | 23 | /** 24 | * Loads Product RDD from Data Grid and prints objects count. 25 | */ 26 | object LoadRdd { 27 | 28 | def main(args: Array[String]): Unit = { 29 | val settings = if (args.length > 0) args else Array("spark://127.0.0.1:7077", sys.env("INSIGHTEDGE_SPACE_NAME")) 30 | if (settings.length != 2) { 31 | System.err.println("Usage: LoadRdd ") 32 | System.exit(1) 33 | } 34 | val Array(master, space) = settings 35 | val config = InsightEdgeConfig(space) 36 | val sc = new SparkContext(new SparkConf().setAppName("example-load-rdd").setMaster(master).setInsightEdgeConfig(config)) 37 | 38 | val rdd = sc.gridRdd[Product]() 39 | println(s"Products RDD count: ${rdd.count()}") 40 | sc.stopInsightEdgeContext() 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/scala/org/insightedge/spark/jobs/Product.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.jobs 18 | 19 | import org.insightedge.scala.annotation._ 20 | 21 | import scala.beans.{BeanProperty, BooleanBeanProperty} 22 | 23 | case class Product( 24 | @BeanProperty 25 | @SpaceId 26 | var id: Long, 27 | 28 | @BeanProperty 29 | var description: String, 30 | 31 | @BeanProperty 32 | var quantity: Int, 33 | 34 | @BooleanBeanProperty 35 | var featuredProduct: Boolean 36 | ) { 37 | 38 | def this() = this(-1, null, -1, false) 39 | 40 | } -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/scala/org/insightedge/spark/jobs/SaveRdd.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.jobs 18 | 19 | import org.apache.spark.{SparkConf, SparkContext} 20 | import org.insightedge.spark.context.InsightEdgeConfig 21 | import org.insightedge.spark.implicits.basic._ 22 | 23 | import scala.util.Random 24 | 25 | /** 26 | * Generates 100000 Products, converts to Spark RDD and saves to Data Grid. Products have fixed IDs. 27 | */ 28 | object SaveRdd { 29 | 30 | def main(args: Array[String]): Unit = { 31 | val settings = if (args.length > 0) args else Array("spark://127.0.0.1:7077", sys.env("INSIGHTEDGE_SPACE_NAME")) 32 | if (settings.length != 2) { 33 | System.err.println("Usage: SaveRdd ") 34 | System.exit(1) 35 | } 36 | val Array(master, space) = settings 37 | val config = InsightEdgeConfig(space) 38 | val sc = new SparkContext(new SparkConf().setAppName("example-save-rdd").setMaster(master).setInsightEdgeConfig(config)) 39 | 40 | val products = (1 to 100000).map { i => Product(i, "Description of product " + i, Random.nextInt(100), Random.nextBoolean()) } 41 | println(s"Saving ${products.size} products RDD to the space") 42 | val rdd = sc.parallelize(products, 9) 43 | rdd.saveToGrid() 44 | sc.stopInsightEdgeContext() 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /insightedge-integration-tests/jobs/src/main/scala/org/insightedge/spark/jobs/StreamExample.scala: -------------------------------------------------------------------------------- 1 | package org.insightedge.spark.jobs 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.apache.spark.sql.SQLContext 5 | import org.apache.spark.streaming.dstream.InputDStream 6 | import org.apache.spark.streaming.{Seconds, StreamingContext} 7 | import org.apache.spark.{SparkConf, SparkContext} 8 | import org.insightedge.spark.context.InsightEdgeConfig 9 | import org.insightedge.spark.implicits.all._ 10 | 11 | import scala.collection.mutable 12 | import scala.util.Random 13 | 14 | /** 15 | * Created by kobikis on 23/11/16. 16 | * 17 | * @since 12.0.1 18 | */ 19 | object StreamExample { 20 | 21 | def main(args: Array[String]): Unit = { 22 | val settings = if (args.length > 0) args else Array("spark://127.0.0.1:7077", sys.env("INSIGHTEDGE_SPACE_NAME")) 23 | if (settings.length != 2) { 24 | System.err.println("Usage: SaveRdd ") 25 | System.exit(1) 26 | } 27 | 28 | val Array(master, space) = settings 29 | val ieConfig = InsightEdgeConfig(space) 30 | val sparkConf = new SparkConf().setAppName("StreamExample").setMaster(master).setInsightEdgeConfig(ieConfig) 31 | 32 | val ssc = new StreamingContext(sparkConf, Seconds(1)) 33 | 34 | val sc = ssc.sparkContext 35 | val sqlContext = new SQLContext(sc) 36 | val df = sqlContext.read.grid[Data] 37 | df.printSchema() 38 | 39 | 40 | val stream: InputDStream[String] = ssc.queueStream(stringQueue(sc)) 41 | 42 | stream.foreachRDD { rdd => 43 | val str = rdd.first() 44 | val data = new Data(Random.nextLong(), str) 45 | sc.saveToGrid(data) 46 | val count = df.count() 47 | } 48 | 49 | ssc.start() 50 | 51 | Thread.sleep(120000) 52 | 53 | sc.stopInsightEdgeContext() 54 | } 55 | 56 | 57 | def stringQueue(sc: SparkContext) = { 58 | val q = mutable.Queue[RDD[String]]() 59 | implicit class Rep(n: Int) { 60 | def times[A](f: => A) { 1 to n foreach(_ => f) } 61 | } 62 | 63 | 100000.times { 64 | q += sc.makeRDD(Seq("aa", "bb", "cc")) 65 | } 66 | q 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /insightedge-integration-tests/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 4.0.0 5 | 6 | insightedge-integration-tests 7 | pom 8 | InsightEdge Integration Tests 9 | 10 | 11 | org.gigaspaces.insightedge 12 | insightedge-package 13 | 16.4.0-SNAPSHOT 14 | 15 | 16 | 17 | jobs 18 | tests 19 | 20 | 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-deploy-plugin 26 | 27 | true 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/resources/docker/demo-mode/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Runs InsightEdge in a demo mode 3 | # 4 | 5 | FROM centos:7.2.1511 6 | MAINTAINER Oleksiy Dyagilev oleksiy.dyagilev@gigaspaces.com 7 | 8 | ENV STORAGE_SERVER="gs-storage-server.s3.amazonaws.com" 9 | RUN echo "Using STORAGE_SERVER=${STORAGE_SERVER}" 10 | RUN if [[ "${STORAGE_SERVER}" == "" ]]; then echo "STORAGE_SERVER can't be empty"; exit 1; fi 11 | 12 | # upgrade system 13 | RUN yum -y update 14 | RUN yum clean all 15 | RUN yum -y install curl wget unzip 16 | 17 | # java 18 | ENV ZIPPED_JDK=jdk-8u45-linux-x64.tar.gz 19 | ENV TAR_JDK=jdk-8u45-linux-x64.tar 20 | 21 | ENV JAVA_TARGET=jdk1.8.0_45 22 | ENV JAVA_HOME /usr/$JAVA_TARGET 23 | ENV PATH $PATH:$JAVA_HOME/bin 24 | 25 | RUN wget http://${STORAGE_SERVER}/jdk/$ZIPPED_JDK 26 | RUN gunzip $ZIPPED_JDK && tar -xvf $TAR_JDK -C /usr/ 27 | RUN ln -s $JAVA_HOME /usr/java && rm -rf $JAVA_HOME/man 28 | 29 | # add InsightEdge distr 30 | RUN mkdir -p /opt/gigaspaces-insightedge 31 | RUN mkdir -p /tmp/spark-events 32 | 33 | ADD bootstrap.sh /etc/bootstrap.sh 34 | RUN chown root:root /etc/bootstrap.sh 35 | RUN chmod 700 /etc/bootstrap.sh 36 | 37 | # start InsightEdge 38 | CMD ["/etc/bootstrap.sh", "-d"] 39 | 40 | EXPOSE 8090 41 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/resources/docker/demo-mode/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /opt/gigaspaces-insightedge/bin/gs.sh demo > /opt/gigaspaces-insightedge/logs/bootstrap.log 2>&1 4 | 5 | 6 | if [[ $1 == "-d" ]]; then 7 | while true; do sleep 1000; done 8 | fi 9 | 10 | if [[ $1 == "-bash" ]]; then 11 | /bin/bash 12 | fi -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/resources/docker/failover/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Runs InsightEdge in a demo mode 3 | # 4 | 5 | FROM centos:7.2.1511 6 | MAINTAINER Kobi Kisos kobi@gigaspaces.com 7 | 8 | ENV STORAGE_SERVER="gs-storage-server.s3.amazonaws.com" 9 | RUN echo "Using STORAGE_SERVER=${STORAGE_SERVER}" 10 | RUN if [[ "${STORAGE_SERVER}" == "" ]]; then echo "STORAGE_SERVER can't be empty"; exit 1; fi 11 | 12 | # upgrade system 13 | RUN yum -y update 14 | RUN yum clean all 15 | RUN yum -y install curl wget unzip 16 | RUN yum -y install net-tools 17 | 18 | # java 19 | ENV ZIPPED_JDK=jdk-8u45-linux-x64.tar.gz 20 | ENV TAR_JDK=jdk-8u45-linux-x64.tar 21 | 22 | ENV JAVA_TARGET=jdk1.8.0_45 23 | ENV JAVA_HOME /usr/$JAVA_TARGET 24 | ENV PATH $PATH:$JAVA_HOME/bin 25 | 26 | RUN wget http://${STORAGE_SERVER}/jdk/$ZIPPED_JDK 27 | RUN gunzip $ZIPPED_JDK && tar -xvf $TAR_JDK -C /usr/ 28 | RUN ln -s $JAVA_HOME /usr/java && rm -rf $JAVA_HOME/man 29 | 30 | # add InsightEdge distr 31 | RUN mkdir -p /opt/insightedge 32 | 33 | RUN mkdir -p /tmp/spark-events 34 | ENV EXT_JAVA_OPTIONS "-Dcom.gs.transport_protocol.lrmi.bind-port=10000-10100 -Dcom.gigaspaces.start.httpPort=9104 -Dcom.gigaspaces.system.registryPort=7102 -Dcom.gs.deploy=/deploy -Dcom.gs.work=/work" 35 | 36 | # ssh 37 | EXPOSE 22 38 | # spark 39 | EXPOSE 8090 8080 7077 18080 40 | # datagrid (some might be redundant, not sure) 41 | EXPOSE 9104 42 | EXPOSE 7102 43 | EXPOSE 4174 44 | EXPOSE 7000-7010 45 | EXPOSE 10000-10100 46 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=INFO, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/jobs/LoadDataFrameSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.jobs 18 | 19 | import org.insightedge.spark.fixture.InsightedgeDemoModeDocker 20 | import org.insightedge.spark.utils.DockerUtils.dockerExec 21 | import org.insightedge.spark.utils.TestUtils.printLnWithTimestamp 22 | import org.scalatest.{FlatSpec, Suite} 23 | 24 | 25 | /** 26 | * Test load DataFrame of Pojo, which also contains enum 27 | * 28 | * @Since 14.5 Support for Enum 29 | * @author Moran 30 | */ 31 | class LoadDataFrameSpec extends FlatSpec with InsightedgeDemoModeDocker { 32 | self: Suite => 33 | 34 | override protected def beforeAll(): Unit = { 35 | printLnWithTimestamp("beforeAll - LoadDataFrameSpec") 36 | super.beforeAll() 37 | } 38 | 39 | "insightedge-submit " should "submit LoadDataFrame job" in { 40 | val fullClassName = s"org.insightedge.spark.jobs.LoadDataFrame" 41 | 42 | val command = 43 | s"""/opt/gigaspaces-insightedge/insightedge/bin/insightedge-submit 44 | |--class $fullClassName 45 | |--master spark://127.0.0.1:7077 46 | |/opt/gigaspaces-insightedge/insightedge/examples/jars/jobs.jar""".stripMargin 47 | 48 | printLnWithTimestamp( "command:" + command ) 49 | val exitCode = dockerExec(containerId, command) 50 | printLnWithTimestamp( "exitCode:" + exitCode ) 51 | assert(exitCode == 0) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/utils/BuildUtils.scala: -------------------------------------------------------------------------------- 1 | package org.insightedge.spark.utils 2 | 3 | /** 4 | * @author Oleksiy_Dyagilev 5 | */ 6 | object BuildUtils { 7 | val BuildVersion = Option(System.getProperty("dist.version")).getOrElse(throw new Exception("System property 'dist.version' is not set")) 8 | val BuildEdition = Option(System.getProperty("dist.edition")).getOrElse(throw new Exception("System property 'dist.edition' is not set")) 9 | val GitBranch = Option(System.getProperty("git.branch")).getOrElse(throw new Exception("System property 'git.branch' is not set")) 10 | val IEHome = Option(System.getProperty("dist.dir")).getOrElse(throw new Exception("System property 'dist.dir' is not set")) 11 | val TestFolder = Option(System.getProperty("test.folder")).getOrElse(throw new Exception("System property 'test.folder' is not set")) 12 | } -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/utils/DockerUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import scala.language.postfixOps 20 | import sys.process._ 21 | import org.insightedge.spark.utils.TestUtils.printLnWithTimestamp 22 | 23 | /** 24 | * @author Oleksiy_Dyagilev 25 | */ 26 | object DockerUtils { 27 | 28 | /** 29 | * runs command in container, blocks until process is finished and returns the exit code 30 | */ 31 | def dockerExec(containerId: String, command: String): Int = { 32 | val processCommand = s"docker exec $containerId $command" 33 | printLnWithTimestamp(s"running command: $processCommand") 34 | processCommand ! 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/utils/FsUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import java.io.File 20 | import org.insightedge.spark.utils.TestUtils.printLnWithTimestamp 21 | 22 | /** 23 | * @author Danylo_Hurin. 24 | */ 25 | object FsUtils { 26 | 27 | val PackagerDirName = "insightedge-packager" 28 | 29 | /** 30 | * Looks for `packager` directory no matter where this test executed from ... command line, IDE, etc 31 | */ 32 | def findPackagerDir(findFrom: File): Option[File] = { 33 | def log(s: File) = printLnWithTimestamp(s"Looking for $PackagerDirName ... checking $s") 34 | log(findFrom) 35 | 36 | findFrom.getName match { 37 | case "" => None 38 | case PackagerDirName => Some(findFrom) 39 | case _ => 40 | val parent = new File(findFrom.getAbsoluteFile.getParent) 41 | parent 42 | .listFiles() 43 | .filter(_.isDirectory) 44 | .find(dir => {log(dir); dir.getName == PackagerDirName}) 45 | .orElse(findPackagerDir(parent)) 46 | 47 | } 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/utils/ProcessUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import sys.process._ 20 | import org.insightedge.spark.utils.TestUtils.printLnWithTimestamp 21 | 22 | /** 23 | * @author Oleksiy_Dyagilev 24 | */ 25 | object ProcessUtils { 26 | 27 | 28 | /** 29 | * Executes given command, blocks until it exits, asserts zero exit code 30 | */ 31 | def execAssertSucc(cmd: String) = { 32 | printLnWithTimestamp(s"Executing: $cmd") 33 | val exitCode = cmd.! 34 | assert(exitCode == 0, s"Non zero exit code executing $cmd") 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/utils/RestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import org.scalatest.Assertions._ 20 | import play.api.libs.json.{JsValue, Json} 21 | import play.api.libs.ws.WSResponse 22 | import play.api.libs.json._ 23 | import play.api.libs.functional.syntax._ 24 | 25 | import scala.concurrent.{Await, Future} 26 | import scala.concurrent.duration._ 27 | 28 | 29 | /** 30 | * @author Oleksiy_Dyagilev 31 | */ 32 | object RestUtils { 33 | 34 | def jsonBody(respFuture: Future[WSResponse], timeout: Duration = 1.second): JsValue = { 35 | val res = Await.result(respFuture, timeout) 36 | assert(res.status == 200, res) 37 | println(res.body) 38 | Json.parse(res.body) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/utils/Tags.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, GigaSpaces Technologies, Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.insightedge.spark.utils 18 | 19 | import org.scalatest.Tag 20 | 21 | /** 22 | * @author Danylo_Hurin. 23 | */ 24 | object LongRunningTestTag extends Tag("org.insightedge.spark.utils.LongRunningTestTag") -------------------------------------------------------------------------------- /insightedge-integration-tests/tests/src/test/scala/org/insightedge/spark/utils/TestUtils.scala: -------------------------------------------------------------------------------- 1 | package org.insightedge.spark.utils 2 | 3 | import java.time.LocalDateTime 4 | import java.time.format.DateTimeFormatter 5 | 6 | object TestUtils{ 7 | 8 | def printLnWithTimestamp(x: Any): Unit = { 9 | printf(s"${LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss,SSS"))} ") 10 | println(x) 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /insightedge-packager/src/main/assembly/tests/integration-tests.xml: -------------------------------------------------------------------------------- 1 | 4 | ie-integration-tests-sources 5 | 6 | zip 7 | 8 | false 9 | 10 | 11 | ${project.basedir}/.. 12 | 13 | pom.xml 14 | 15 | true 16 | 17 | 18 | ${project.basedir}/../insightedge-integration-tests 19 | true 20 | 21 | **/target/** 22 | **/*.iml 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/insightedge-pyspark: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | DIRNAME=$(dirname ${BASH_SOURCE[0]}) 21 | source ${DIRNAME}/../conf/insightedge-env.sh 22 | 23 | source "${SPARK_HOME}"/bin/load-spark-env.sh 24 | export _SPARK_CMD_USAGE="Usage: ./bin/insightedge-pyspark [options]" 25 | 26 | # In Spark 2.0, IPYTHON and IPYTHON_OPTS are removed and pyspark fails to launch if either option 27 | # is set in the user's environment. Instead, users should set PYSPARK_DRIVER_PYTHON=ipython 28 | # to use IPython and set PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver 29 | # (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython 30 | # and executor Python executables. 31 | 32 | # Fail noisily if removed options are set 33 | if [[ -n "$IPYTHON" || -n "$IPYTHON_OPTS" ]]; then 34 | echo "Error in pyspark startup:" 35 | echo "IPYTHON and IPYTHON_OPTS are removed in Spark 2.0+. Remove these from the environment and set PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS instead." 36 | exit 1 37 | fi 38 | 39 | # Default to standard python interpreter unless told otherwise 40 | if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then 41 | PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"python"}" 42 | fi 43 | 44 | WORKS_WITH_IPYTHON=$(python -c 'import sys; print(sys.version_info >= (2, 7, 0))') 45 | 46 | #### PYSPARK_PYTHON is also defined in insightedge-env.sh 47 | # Determine the Python executable to use for the executors: 48 | if [[ -z "$PYSPARK_PYTHON" ]]; then 49 | if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && ! $WORKS_WITH_IPYTHON ]]; then 50 | echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2 51 | exit 1 52 | else 53 | PYSPARK_PYTHON=python 54 | fi 55 | fi 56 | export PYSPARK_PYTHON 57 | 58 | #### PYTHONPATH is also defined in insightedge-env.sh 59 | # Add the PySpark classes to the Python path: 60 | export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" 61 | export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH" 62 | 63 | # Load the PySpark shell.py script when ./pyspark is used interactively: 64 | export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" 65 | export PYTHONSTARTUP="${GS_HOME}/insightedge/bin/shell-init.py" 66 | 67 | # For pyspark tests 68 | if [[ -n "$SPARK_TESTING" ]]; then 69 | unset YARN_CONF_DIR 70 | unset HADOOP_CONF_DIR 71 | export PYTHONHASHSEED=0 72 | exec "$PYSPARK_DRIVER_PYTHON" -m "$1" 73 | exit 74 | fi 75 | 76 | export PYSPARK_DRIVER_PYTHON 77 | export PYSPARK_DRIVER_PYTHON_OPTS 78 | exec "${SPARK_HOME}/bin/spark-submit" pyspark-shell-main --name "PySparkShell" "$@" 79 | -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/insightedge-pyspark.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | call %~dp0..\conf\insightedge-env.cmd 4 | 5 | rem Figure out where the Spark framework is installed 6 | 7 | if "x%SPARK_HOME%"=="x" ( 8 | set SPARK_HOME="%GS_HOME%\insightedge\spark" 9 | ) 10 | 11 | call %SPARK_HOME%\bin\load-spark-env.cmd 12 | set _SPARK_CMD_USAGE=Usage: bin\insightedge-pyspark.cmd [options] 13 | 14 | rem Figure out which Python to use. 15 | 16 | rem PYSPARK_PYTHON is also defined in insightedge-env 17 | if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( 18 | set PYSPARK_DRIVER_PYTHON=python 19 | if not [%PYSPARK_PYTHON%] == [] set PYSPARK_DRIVER_PYTHON=%PYSPARK_PYTHON% 20 | ) 21 | 22 | rem PYTHONPATH is also defined in insightedge-env 23 | set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% 24 | set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH% 25 | 26 | rem Load the InsighEdge version of shell.py script: 27 | set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% 28 | set PYTHONSTARTUP=%GS_HOME%\insightedge\bin\shell-init.py 29 | 30 | call "%SPARK_HOME%\bin\spark-submit" pyspark-shell-main --name "PySparkShell" %* 31 | -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/insightedge-shell: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # Shell script for starting the Spark Shell REPL 22 | 23 | DIRNAME=$(dirname ${BASH_SOURCE[0]}) 24 | source ${DIRNAME}/../conf/insightedge-env.sh 25 | 26 | "${SPARK_HOME}/bin/spark-shell" -i ${GS_HOME}/insightedge/bin/shell-init.scala "$@" -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/insightedge-shell.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | call %~dp0..\conf\insightedge-env.cmd 3 | 4 | "%SPARK_HOME%\bin\spark-shell2.cmd" -i %~dp0\shell-init.scala %* -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/insightedge-submit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | DIRNAME=$(dirname ${BASH_SOURCE[0]}) 6 | source ${DIRNAME}/../conf/insightedge-env.sh 7 | 8 | #Because of SPARK-21642, the driver url is composed from hostname instead of ip. This is a workaround - setting SPARK_LOCAL_HOSTNAME to the submitting machine IP 9 | if [[ -z "${SPARK_LOCAL_HOSTNAME}" ]]; then 10 | export SPARK_LOCAL_HOSTNAME="${GS_NIC_ADDRESS}" 11 | fi 12 | 13 | SUBMIT_ARGS=( "$@" ) 14 | # get length of an array 15 | length=${#SUBMIT_ARGS[@]} 16 | 17 | # loop over arguments to find if in cluster deploy-mode 18 | for (( i=0; i<${length}; i++ )); 19 | do 20 | if [[ ${SUBMIT_ARGS[$i]} == "--deploy-mode" && ${SUBMIT_ARGS[$(($i + 1))]} == "cluster" ]]; then 21 | # In cluster mode, local env variables override remote machine env variables (spark JIRA SPARK-24456) 22 | # To work around this behavior: 23 | 24 | # 1. stop spark submit from sourcing spark-env 25 | export SPARK_ENV_LOADED=1 26 | 27 | # 2. empty i9e local classpaths env variable 28 | unset SPARK_DIST_CLASSPATH 29 | fi 30 | done 31 | 32 | "${SPARK_HOME}"/bin/spark-submit $@ -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/insightedge-submit.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | call %~dp0..\conf\insightedge-env.cmd 3 | 4 | rem Because of SPARK-21642, the driver url is composed from hostname instead of ip. This is a workaround - setting SPARK_LOCAL_HOSTNAME to the submitting machine IP 5 | if not defined SPARK_LOCAL_HOSTNAME set SPARK_LOCAL_HOSTNAME=%GS_NIC_ADDRESS% 6 | 7 | rem loop over arguments to find if in cluster deploy-mode 8 | :loop 9 | 10 | if [%1] == [] goto END_LOOP 11 | if [%1] == [--deploy-mode] ( 12 | 13 | if [%2] == [cluster] ( 14 | rem In cluster mode, local env variables override remote machine env variables (spark JIRA SPARK-24456) 15 | rem To work around this behavior: 16 | rem 1. stop spark submit to source spark-env 17 | set SPARK_ENV_LOADED=1 18 | 19 | rem 2. Unset i9e local classpaths env variable 20 | set SPARK_DIST_CLASSPATH= 21 | 22 | goto END_LOOP 23 | ) 24 | ) 25 | shift 26 | goto loop 27 | 28 | :END_LOOP 29 | call %SPARK_HOME%\bin\spark-submit %* -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/shell-init.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | An interactive shell. 20 | 21 | This file is designed to be launched as a PYTHONSTARTUP script. 22 | """ 23 | 24 | import atexit 25 | import os 26 | import platform 27 | import sys 28 | 29 | import py4j 30 | 31 | import pyspark 32 | from pyspark.context import SparkContext 33 | from pyspark.sql import SparkSession, SQLContext 34 | from pyspark.storagelevel import StorageLevel 35 | 36 | # InsightEdge config 37 | if len(sys.argv) == 1: 38 | spaceName = os.environ['INSIGHTEDGE_SPACE_NAME'] 39 | else: 40 | spaceName = sys.argv[1] 41 | 42 | print("InsightEdge config: %s" % spaceName) 43 | 44 | if os.environ.get("SPARK_EXECUTOR_URI"): 45 | SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"]) 46 | 47 | SparkContext._ensure_initialized() 48 | 49 | try: 50 | # Try to access HiveConf, it will raise exception if Hive is not added 51 | SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf() 52 | spark = SparkSession.builder\ 53 | .enableHiveSupport()\ 54 | .config("spark.insightedge.space.name", spaceName) \ 55 | .getOrCreate() 56 | except py4j.protocol.Py4JError: 57 | spark = SparkSession.builder.config("spark.insightedge.space.name", spaceName).getOrCreate() 58 | except TypeError: 59 | spark = SparkSession.builder.config("spark.insightedge.space.name", spaceName).getOrCreate() 60 | 61 | sc = spark.sparkContext 62 | sql = spark.sql 63 | atexit.register(lambda: sc.stop()) 64 | 65 | # for compatibility 66 | sqlContext = spark._wrapped 67 | sqlCtx = sqlContext 68 | 69 | print("""Welcome to 70 | ____ __ 71 | / __/__ ___ _____/ /__ 72 | _\ \/ _ \/ _ `/ __/ '_/ 73 | /__ / .__/\_,_/_/ /_/\_\ version %s 74 | /_/ 75 | """ % sc.version) 76 | print("Using Python version %s (%s, %s)" % ( 77 | platform.python_version(), 78 | platform.python_build()[0], 79 | platform.python_build()[1])) 80 | print("SparkSession available as 'spark'.") 81 | 82 | # The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP, 83 | # which allows us to execute the user's PYTHONSTARTUP file: 84 | _pythonstartup = os.environ.get('OLD_PYTHONSTARTUP') 85 | if _pythonstartup and os.path.isfile(_pythonstartup): 86 | with open(_pythonstartup) as f: 87 | code = compile(f.read(), _pythonstartup, 'exec') 88 | exec(code) 89 | -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/bin/shell-init.scala: -------------------------------------------------------------------------------- 1 | import org.insightedge.spark.context.InsightEdgeConfig 2 | import org.insightedge.spark.implicits.all._ 3 | 4 | System.setProperty("net.jini.discovery.LookupLocatorDiscovery.level", "OFF") 5 | 6 | val ieConfig = InsightEdgeConfig(sys.env("INSIGHTEDGE_SPACE_NAME")) 7 | sc.initializeInsightEdgeContext(ieConfig) -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/conf/insightedge-env.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | rem *********************************************************************************************************** 3 | rem * This script is used to initialize common environment to GigaSpaces InsightEdge platform. * 4 | rem * It is highly recommended NOT TO MODIFY THIS SCRIPT, to simplify future upgrades. * 5 | rem * If you need to override the defaults, please modify $GS_HOME\bin\setenv-overrides.bat or set * 6 | rem * the GS_SETTINGS_FILE environment variable to your custom script. * 7 | rem * For more information see https://docs.gigaspaces.com/latest/started/common-environment-variables.html * 8 | rem *********************************************************************************************************** 9 | rem Source XAP environment; 10 | call "%~dp0..\..\bin\setenv.bat" 11 | 12 | set JACKSON_CLASSPATH="%GS_HOME%\tools\jdbc\lib\jackson-databind-2.6.3.jar;%GS_HOME%\tools\jdbc\lib\jackson-core-2.6.3.jar;%GS_HOME%\tools\jdbc\lib\jackson-annotations-2.6.3.jar" 13 | rem Set InsightEdge defaults; 14 | set INSIGHTEDGE_CLASSPATH=%GS_HOME%\insightedge\lib\*;%JACKSON_CLASSPATH%;%GS_HOME%\lib\platform\jdbc\*;%GS_HOME%\lib\required\*;%GS_HOME%\lib\optional\spatial\* 15 | 16 | if defined INSIGHTEDGE_CLASSPATH_EXT set INSIGHTEDGE_CLASSPATH=%INSIGHTEDGE_CLASSPATH%;%INSIGHTEDGE_CLASSPATH_EXT% 17 | 18 | if not defined HADOOP_HOME set HADOOP_HOME=%GS_HOME%\insightedge\tools\winutils 19 | if not defined SPARK_HOME set SPARK_HOME=%GS_HOME%\insightedge\spark 20 | 21 | rem InsightEdge dependencies to Spark 22 | if not defined SPARK_DIST_CLASSPATH set SPARK_DIST_CLASSPATH=%INSIGHTEDGE_CLASSPATH% 23 | 24 | 25 | rem Zeppelin 26 | if not defined ZEPPELIN_PORT set ZEPPELIN_PORT=9090 27 | rem Spark jars are added to interpreter classpath because of Analytics Xtreme 28 | if not defined ZEPPELIN_INTP_CLASSPATH_OVERRIDES set ZEPPELIN_INTP_CLASSPATH_OVERRIDES=%INSIGHTEDGE_CLASSPATH% 29 | if not defined ZEPPELIN_LOG_DIR set ZEPPELIN_LOG_DIR=%GS_HOME%\logs 30 | 31 | if not defined INSIGHTEDGE_SPACE_NAME set INSIGHTEDGE_SPACE_NAME=demo 32 | 33 | rem PYSPARK_PYTHON is also defined in insightedge-pyspark 34 | if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( 35 | set PYSPARK_DRIVER_PYTHON=python 36 | if not [%PYSPARK_PYTHON%] == [] set PYSPARK_DRIVER_PYTHON=%PYSPARK_PYTHON% 37 | ) 38 | 39 | rem PYTHONPATH is also defined in insightedge-pyspark 40 | set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% 41 | set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH% 42 | 43 | rem GS-14003 installing new interpreters fails due to Zeppelin accessing maven central via http 44 | if not defined ZEPPELIN_INTERPRETER_DEP_MVNREPO set ZEPPELIN_INTERPRETER_DEP_MVNREPO=https://repo1.maven.org/maven2 -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/conf/insightedge-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # *********************************************************************************************************** 3 | # * This script is used to initialize common environment to GigaSpaces InsightEdge platform. * 4 | # * It is highly recommended NOT TO MODIFY THIS SCRIPT, to simplify future upgrades. * 5 | # * If you need to override the defaults, please modify $GS_HOME\bin\setenv-overrides.sh or set * 6 | # * the GS_SETTINGS_FILE environment variable to your custom script. * 7 | # * For more information see https://docs.gigaspaces.com/latest/started/common-environment-variables.html * 8 | # *********************************************************************************************************** 9 | # Source XAP environment: 10 | DIRNAME=$(dirname ${BASH_SOURCE[0]}) 11 | source "${DIRNAME}/../../bin/setenv.sh" 12 | 13 | export JACKSON_CLASSPATH="${GS_HOME}/tools/jdbc/lib/jackson-databind-2.6.3.jar:${GS_HOME}/tools/jdbc/lib/jackson-core-2.6.3.jar:${GS_HOME}/tools/jdbc/lib/jackson-annotations-2.6.3.jar" 14 | # Set InsightEdge defaults: 15 | export INSIGHTEDGE_CLASSPATH="${GS_HOME}/insightedge/lib/*:${JACKSON_CLASSPATH}:${GS_HOME}/lib/platform/jdbc/*:${GS_HOME}/lib/required/*:${GS_HOME}/lib/optional/spatial/*" 16 | 17 | if [ -n "${INSIGHTEDGE_CLASSPATH_EXT}" ]; then 18 | export INSIGHTEDGE_CLASSPATH="${INSIGHTEDGE_CLASSPATH_EXT}:${INSIGHTEDGE_CLASSPATH}" 19 | fi 20 | 21 | # Set SPARK_HOME if not set 22 | if [ -z "${SPARK_HOME}" ]; then 23 | export SPARK_HOME="${GS_HOME}/insightedge/spark" 24 | fi 25 | 26 | #Add InsightEdge dependencies to Spark 27 | if [ -z "${SPARK_DIST_CLASSPATH}" ]; then 28 | export SPARK_DIST_CLASSPATH="${INSIGHTEDGE_CLASSPATH}" 29 | fi 30 | 31 | # Zeppelin 32 | # Spark jars are added to interpreter classpath because of Analytics Xtreme 33 | export ZEPPELIN_INTP_CLASSPATH_OVERRIDES="${INSIGHTEDGE_CLASSPATH}" 34 | 35 | if [ -z "${ZEPPELIN_PORT}" ]; then 36 | export ZEPPELIN_PORT=9090 37 | fi 38 | 39 | if [ -z "${ZEPPELIN_LOG_DIR}" ]; then 40 | export ZEPPELIN_LOG_DIR="${GS_HOME}/logs/" 41 | fi 42 | 43 | if [ -z "${INSIGHTEDGE_SPACE_NAME}" ]; then 44 | export INSIGHTEDGE_SPACE_NAME="demo" 45 | fi 46 | 47 | #### PYSPARK_PYTHON is also defined in insightedge-pyspark 48 | # Determine the Python executable to use for the executors: 49 | if [[ -z "$PYSPARK_PYTHON" ]]; then 50 | if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && ! $WORKS_WITH_IPYTHON ]]; then 51 | echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2 52 | exit 1 53 | else 54 | PYSPARK_PYTHON=python 55 | fi 56 | fi 57 | export PYSPARK_PYTHON 58 | 59 | #### PYTHONPATH is also defined in insightedge-pyspark 60 | # Add the PySpark classes to the Python path: 61 | export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" 62 | export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH" 63 | 64 | # GS-14003 installing new interpreters fails due to Zeppelin accessing maven via http 65 | if [[ -z "$ZEPPELIN_INTERPRETER_DEP_MVNREPO" ]]; then 66 | export ZEPPELIN_INTERPRETER_DEP_MVNREPO="https://repo1.maven.org/maven2" 67 | fi 68 | -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/conf/spark_log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=INFO, console, file 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %p [%c] - %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=WARN 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=WARN 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 35 | log4j.logger.org.apache.parquet=ERROR 36 | log4j.logger.parquet=ERROR 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | 42 | #Set the file appender 43 | log4j.appender.file=org.apache.log4j.rolling.RollingFileAppender 44 | #log4j.appender.file.File=${xap.home}/logs/DATE~TIME-gigaspaces-ROLE-HOST-PID.log 45 | log4j.appender.file.RollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy 46 | log4j.appender.file.RollingPolicy.FileNamePattern=${xap.home}/logs/%d{yyyy-MM-dd~HH.mm}-gigaspaces-${spark.role}.log 47 | log4j.appender.file.TriggeringPolicy=org.apache.log4j.rolling.SizeBasedTriggeringPolicy 48 | log4j.appender.file.Append=true 49 | log4j.appender.file.TriggeringPolicy.MaxFileSize=2000000 50 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 51 | log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %p [%c] - %m%n 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/spark/conf/spark-env.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | rem Source InsightEdge environment: 3 | call "%~dp0..\..\conf\insightedge-env.cmd" -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/spark/conf/spark-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Source InsightEdge environment: 3 | DIRNAME=$(dirname ${BASH_SOURCE[0]}) 4 | source "${DIRNAME}/../../conf/insightedge-env.sh" -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/winutils/hadoop-winutils-2.6.0.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightEdge/insightedge/36a3ee9f1d84cf2d185be5aeae55627efa1fea4c/insightedge-packager/src/main/resources/insightedge/winutils/hadoop-winutils-2.6.0.zip -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/zeppelin/conf/zeppelin-env.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM Source InsightEdge environment: 3 | call "%~dp0..\..\conf\insightedge-env.cmd" -------------------------------------------------------------------------------- /insightedge-packager/src/main/resources/insightedge/zeppelin/conf/zeppelin-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Source InsightEdge environment: 3 | DIRNAME=$(dirname ${BASH_SOURCE[0]}) 4 | source "${DIRNAME}/../../conf/insightedge-env.sh" 5 | -------------------------------------------------------------------------------- /insightedge-packager/updateGitInfo.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | mkdir target 3 | set SHA= 4 | for /f "delims=" %%a in ('git rev-parse HEAD') do @set SHA=%%a 5 | echo InsightEdge:https://github.com/InsightEdge/insightedge/commit/%SHA% > target\metadata.txt -------------------------------------------------------------------------------- /insightedge-packager/updateGitInfo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p target 3 | 4 | echo "TAG_NAME = ${TAG_NAME}" 5 | echo "IE_SHA = ${IE_SHA}" 6 | 7 | 8 | if [ ! -z "${TAG_NAME}" ] 9 | then 10 | echo InsightEdge:https://github.com/InsightEdge/insightedge/tree/${TAG_NAME} > target/metadata.txt 11 | exit 0 12 | fi 13 | 14 | if [ -z "$IE_SHA" ] 15 | then 16 | SHA=`git rev-parse HEAD` 17 | echo InsightEdge:https://github.com/InsightEdge/insightedge/commit/${SHA} > target/metadata.txt 18 | else 19 | SHA="$IE_SHA" 20 | echo InsightEdge:https://github.com/InsightEdge/insightedge/tree/${SHA} > target/metadata.txt 21 | fi 22 | -------------------------------------------------------------------------------- /insightedge-zeppelin/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.gigaspaces.insightedge 8 | insightedge-zeppelin 9 | 16.4.0-SNAPSHOT 10 | 11 | 12 | org.gigaspaces.insightedge 13 | insightedge-package 14 | 16.4.0-SNAPSHOT 15 | 16 | 17 | 18 | ${project.artifactId} 19 | 20 | 21 | 22 | org.apache.zeppelin 23 | zeppelin-interpreter 24 | ${zeppelin.version} 25 | 26 | 27 | org.apache.zeppelin 28 | spark-interpreter 29 | ${zeppelin.version} 30 | 31 | 32 | org.gigaspaces.insightedge 33 | insightedge-core 34 | 16.4.0-SNAPSHOT 35 | 36 | 37 | 38 | 39 | --------------------------------------------------------------------------------