├── Makefile ├── zeppelin └── Dockerfile ├── README.md ├── docker-compose.yml └── notebook ├── 2CF34ERK6 └── note.json └── 2CFH4E3TG └── note.json /Makefile: -------------------------------------------------------------------------------- 1 | up: 2 | docker network create spark-net 3 | docker-compose build 4 | docker-compose up 5 | 6 | down: 7 | docker-compose down 8 | docker network rm spark-net 9 | 10 | bash: 11 | docker exec -it dockerzeppelin_zeppelin_1 bash 12 | 13 | run: 14 | docker build -t zeppelin ./zeppelin/. 15 | docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook -v $(shell pwd)/zeppelin-0.7.2-bin-all:/opt/zeppelin zeppelin /bin/bash 16 | #docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook zeppelin /opt/zeppelin/bin/zeppelin.sh 17 | 18 | build: 19 | docker build -t earthquakesan/zeppelin:0.7.2 ./zeppelin/. 20 | -------------------------------------------------------------------------------- /zeppelin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bde2020/spark-base:2.1.0-hadoop2.8-hive-java8 2 | MAINTAINER Ivan Ermilov 3 | 4 | ENV APACHE_SPARK_VERSION 2.1.0 5 | ENV APACHE_HADOOP_VERSION 2.8.0 6 | ENV ZEPPELIN_VERSION 0.7.2 7 | 8 | RUN apt-get update && apt-get install wget 9 | RUN set -x \ 10 | && curl -fSL "http://www-eu.apache.org/dist/zeppelin/zeppelin-0.7.2/zeppelin-0.7.2-bin-all.tgz" -o /tmp/zeppelin.tgz \ 11 | && tar -xzvf /tmp/zeppelin.tgz -C /opt/ \ 12 | && mv /opt/zeppelin-* /opt/zeppelin \ 13 | && rm /tmp/zeppelin.tgz 14 | 15 | ENV SPARK_SUBMIT_OPTIONS "--jars /opt/zeppelin/sansa-examples-spark-2016-12.jar" 16 | 17 | WORKDIR /opt/zeppelin 18 | 19 | CMD ["/opt/zeppelin/bin/zeppelin.sh"] 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/big-data-europe/Lobby) 2 | 3 | # Docker Zeppelin 4 | 5 | This repository contains [Apache Zeppelin](https://zeppelin.apache.org/) docker image, which is tuned to work with BDE clusters. 6 | 7 | # Example Usage 8 | 9 | For example usage see [docker-compose.yml](./docker-compose.yml) and [SANSA-Notebooks repository](https://github.com/SANSA-Stack/SANSA-Notebooks). 10 | 11 | # Dev 12 | Start Hadoop/Spark cluster with Zeppelin notebook: 13 | ``` 14 | make up 15 | ``` 16 | Tear down Hadoop/Spark cluster with Zeppelin notebook: 17 | ``` 18 | make down 19 | ``` 20 | Bash into Zeppelin container: 21 | ``` 22 | make bash 23 | ``` 24 | Build and run Zeppelin separately: 25 | ``` 26 | make up 27 | docker stop dockerzeppelin_zeppelin_1 && docker rm dockerzeppelin_zeppelin_1 28 | make run 29 | ``` 30 | Build Zeppelin: 31 | ``` 32 | make build 33 | ``` 34 | For more details see the Makefile. 35 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2.1" 2 | 3 | services: 4 | namenode: 5 | image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8 6 | container_name: namenode 7 | volumes: 8 | - ./data/namenode:/hadoop/dfs/name 9 | environment: 10 | - CLUSTER_NAME=test 11 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 12 | healthcheck: 13 | interval: 5s 14 | retries: 100 15 | networks: 16 | - spark-net 17 | datanode: 18 | image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8 19 | container_name: datanode 20 | volumes: 21 | - ./data/datanode:/hadoop/dfs/data 22 | environment: 23 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 24 | depends_on: 25 | namenode: 26 | condition: service_healthy 27 | healthcheck: 28 | interval: 5s 29 | retries: 100 30 | networks: 31 | - spark-net 32 | spark-master: 33 | image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8 34 | container_name: spark-master 35 | ports: 36 | - "8080:8080" 37 | - "7077:7077" 38 | environment: 39 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 40 | depends_on: 41 | namenode: 42 | condition: service_healthy 43 | datanode: 44 | condition: service_healthy 45 | healthcheck: 46 | interval: 5s 47 | retries: 100 48 | networks: 49 | - spark-net 50 | spark-worker: 51 | image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8 52 | environment: 53 | - "SPARK_MASTER=spark://spark-master:7077" 54 | environment: 55 | - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 56 | depends_on: 57 | spark-master: 58 | condition: service_healthy 59 | healthcheck: 60 | interval: 5s 61 | retries: 100 62 | networks: 63 | - spark-net 64 | zeppelin: 65 | build: ./zeppelin 66 | ports: 67 | - 80:8080 68 | volumes: 69 | - ./notebook:/opt/zeppelin/notebook 70 | environment: 71 | CORE_CONF_fs_defaultFS: "hdfs://namenode:8020" 72 | SPARK_MASTER: "spark://spark-master:7077" 73 | MASTER: "spark://spark-master:7077" 74 | #SPARK_SUBMIT_OPTIONS: "--jars /opt/sansa-examples/jars/sansa-examples-spark-2016-12.jar" 75 | depends_on: 76 | spark-master: 77 | condition: service_healthy 78 | namenode: 79 | condition: service_healthy 80 | networks: 81 | - spark-net 82 | 83 | networks: 84 | spark-net: 85 | external: 86 | name: spark-net 87 | -------------------------------------------------------------------------------- /notebook/2CF34ERK6/note.json: -------------------------------------------------------------------------------- 1 | { 2 | "paragraphs": [ 3 | { 4 | "text": "import scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nval slices \u003d 2\nval n \u003d math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow\nval count \u003d spark.sparkContext.parallelize(1 until n, slices).map { i \u003d\u003e\n val x \u003d random * 2 - 1\n val y \u003d random * 2 - 1\n if (x*x + y*y \u003c\u003d 1) 1 else 0\n}.reduce(_ + _)\nprintln(\"Pi is roughly \" + 4.0 * count / (n - 1))", 5 | "user": "anonymous", 6 | "dateUpdated": "May 11, 2017 8:47:28 AM", 7 | "config": { 8 | "colWidth": 12.0, 9 | "enabled": true, 10 | "results": {}, 11 | "editorSetting": { 12 | "language": "text", 13 | "editOnDblClick": false 14 | }, 15 | "editorMode": "ace/mode/text", 16 | "editorHide": false, 17 | "tableHide": true 18 | }, 19 | "settings": { 20 | "params": {}, 21 | "forms": {} 22 | }, 23 | "results": { 24 | "code": "SUCCESS", 25 | "msg": [ 26 | { 27 | "type": "TEXT", 28 | "data": "\nimport scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nslices: Int \u003d 2\n\nn: Int \u003d 200000\n\ncount: Int \u003d 156835\nPi is roughly 3.136715683578418\n" 29 | } 30 | ] 31 | }, 32 | "apps": [], 33 | "jobName": "paragraph_1494446151737_1359394047", 34 | "id": "20170510-195551_400594820", 35 | "dateCreated": "May 10, 2017 7:55:51 PM", 36 | "dateStarted": "May 10, 2017 8:40:36 PM", 37 | "dateFinished": "May 10, 2017 8:41:04 PM", 38 | "status": "FINISHED", 39 | "progressUpdateIntervalMs": 500 40 | }, 41 | { 42 | "text": "", 43 | "user": "anonymous", 44 | "dateUpdated": "May 11, 2017 8:49:13 AM", 45 | "config": { 46 | "colWidth": 12.0, 47 | "enabled": true, 48 | "results": {}, 49 | "editorSetting": { 50 | "language": "scala", 51 | "editOnDblClick": false 52 | }, 53 | "editorMode": "ace/mode/scala", 54 | "tableHide": true 55 | }, 56 | "settings": { 57 | "params": {}, 58 | "forms": {} 59 | }, 60 | "apps": [], 61 | "jobName": "paragraph_1494446793336_527866307", 62 | "id": "20170510-200633_2002902352", 63 | "dateCreated": "May 10, 2017 8:06:33 PM", 64 | "status": "FINISHED", 65 | "progressUpdateIntervalMs": 500 66 | } 67 | ], 68 | "name": "SparkPi", 69 | "id": "2CF34ERK6", 70 | "angularObjects": { 71 | "2CHD267MK:shared_process": [], 72 | "2CFCYW8ZZ:shared_process": [], 73 | "2CGGU1AUC:shared_process": [], 74 | "2CHDUK5RT:shared_process": [], 75 | "2CFPWN8ZX:shared_process": [], 76 | "2CJT3A9WM:shared_process": [], 77 | "2CFTT4BX1:shared_process": [], 78 | "2CHPV1WNR:shared_process": [], 79 | "2CH8NUNKD:shared_process": [], 80 | "2CG24PUFX:shared_process": [], 81 | "2CJHA5F79:shared_process": [], 82 | "2CFD8HYGS:shared_process": [], 83 | "2CH5TSP4J:shared_process": [], 84 | "2CF3WY7WY:shared_process": [], 85 | "2CFYWKGJK:shared_process": [], 86 | "2CJFHW9TZ:shared_process": [], 87 | "2CGEFHREK:shared_process": [], 88 | "2CG4M1FG9:shared_process": [], 89 | "2CH3SASQ1:shared_process": [] 90 | }, 91 | "config": {}, 92 | "info": {} 93 | } -------------------------------------------------------------------------------- /notebook/2CFH4E3TG/note.json: -------------------------------------------------------------------------------- 1 | { 2 | "paragraphs": [ 3 | { 4 | "text": "import net.sansa_stack.owl.spark.dataset.{FunctionalSyntaxOWLAxiomsDatasetBuilder, ManchesterSyntaxOWLAxiomsDatasetBuilder}\n\nval input \u003d \"hdfs://namenode:8020/data/ont_functional.owl\"\n\nval dataset \u003d FunctionalSyntaxOWLAxiomsDatasetBuilder.build(spark, input)\ndataset.take(10).foreach(println(_))", 5 | "user": "anonymous", 6 | "dateUpdated": "May 11, 2017 9:28:35 AM", 7 | "config": { 8 | "colWidth": 12.0, 9 | "enabled": true, 10 | "results": {}, 11 | "editorSetting": { 12 | "language": "text", 13 | "editOnDblClick": false 14 | }, 15 | "editorMode": "ace/mode/text" 16 | }, 17 | "settings": { 18 | "params": {}, 19 | "forms": {} 20 | }, 21 | "results": { 22 | "code": "SUCCESS", 23 | "msg": [ 24 | { 25 | "type": "TEXT", 26 | "data": "\nimport net.sansa_stack.owl.spark.dataset.{FunctionalSyntaxOWLAxiomsDatasetBuilder, ManchesterSyntaxOWLAxiomsDatasetBuilder}\n\ninput: String \u003d hdfs://namenode:8020/data/ont_functional.owl\n\ndataset: net.sansa_stack.owl.spark.dataset.OWLAxiomsDataset \u003d [value: binary]\nDeclaration(Annotation(\u003chttp://ex.com/foo#ann\u003e \"some annotation\"^^xsd:string) Class(\u003chttp://ex.com/bar#Cls1\u003e))\nDeclaration(Class(\u003chttp://ex.com/bar#Cls2\u003e))\nDeclaration(Datatype(\u003chttp://ex.com/bar#dtype1\u003e))\nDeclaration(Datatype(\u003chttp://ex.com/bar#dtype2\u003e))\nDeclaration(ObjectProperty(\u003chttp://ex.com/bar#objProp1\u003e))\nDeclaration(ObjectProperty(\u003chttp://ex.com/bar#objProp2\u003e))\nDeclaration(DataProperty(\u003chttp://ex.com/bar#dataProp1\u003e))\nDeclaration(DataProperty(\u003chttp://ex.com/bar#dataProp2\u003e))\nDeclaration(AnnotationProperty(\u003chttp://ex.com/bar#annProp1\u003e))\nDeclaration(AnnotationProperty(\u003chttp://ex.com/bar#annProp2\u003e))\n" 27 | } 28 | ] 29 | }, 30 | "apps": [], 31 | "jobName": "paragraph_1494492505471_454897908", 32 | "id": "20170511-084825_834127049", 33 | "dateCreated": "May 11, 2017 8:48:25 AM", 34 | "dateStarted": "May 11, 2017 9:28:35 AM", 35 | "dateFinished": "May 11, 2017 9:28:48 AM", 36 | "status": "FINISHED", 37 | "progressUpdateIntervalMs": 500 38 | }, 39 | { 40 | "user": "anonymous", 41 | "config": {}, 42 | "settings": { 43 | "params": {}, 44 | "forms": {} 45 | }, 46 | "apps": [], 47 | "jobName": "paragraph_1494494845390_-1659187861", 48 | "id": "20170511-092725_657991002", 49 | "dateCreated": "May 11, 2017 9:27:25 AM", 50 | "status": "READY", 51 | "progressUpdateIntervalMs": 500 52 | } 53 | ], 54 | "name": "OWL", 55 | "id": "2CFH4E3TG", 56 | "angularObjects": { 57 | "2CF3RJ5MG:shared_process": [], 58 | "2CG82CXC6:shared_process": [], 59 | "2CGUETVFU:shared_process": [], 60 | "2CJSY2U8W:shared_process": [], 61 | "2CG2FJB5M:shared_process": [], 62 | "2CG3MZ35T:shared_process": [], 63 | "2CH1NDZTZ:shared_process": [], 64 | "2CF1A3GZY:shared_process": [], 65 | "2CFP46J5J:shared_process": [], 66 | "2CEXCBT9Q:shared_process": [], 67 | "2CFTSHR86:shared_process": [], 68 | "2CFVTZA26:shared_process": [], 69 | "2CFKV2E7E:shared_process": [], 70 | "2CJ4DKA2S:shared_process": [], 71 | "2CFBSCM9Q:shared_process": [], 72 | "2CGQHTNJC:shared_process": [], 73 | "2CHTUVCMC:shared_process": [], 74 | "2CHUU16RS:shared_process": [], 75 | "2CHE8XKZD:shared_process": [] 76 | }, 77 | "config": {}, 78 | "info": {} 79 | } --------------------------------------------------------------------------------