├── Makefile
├── zeppelin
    └── Dockerfile
├── README.md
├── docker-compose.yml
└── notebook
    ├── 2CF34ERK6
        └── note.json
    └── 2CFH4E3TG
        └── note.json


/Makefile:
--------------------------------------------------------------------------------
 1 | up:
 2 | 	docker network create spark-net
 3 | 	docker-compose build
 4 | 	docker-compose up
 5 | 
 6 | down:
 7 | 	docker-compose down
 8 | 	docker network rm spark-net
 9 | 
10 | bash:
11 | 	docker exec -it dockerzeppelin_zeppelin_1 bash
12 | 
13 | run:
14 | 	docker build -t zeppelin ./zeppelin/.
15 | 	docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook -v $(shell pwd)/zeppelin-0.7.2-bin-all:/opt/zeppelin zeppelin /bin/bash
16 | 	#docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook zeppelin /opt/zeppelin/bin/zeppelin.sh
17 | 
18 | build:
19 | 	docker build -t earthquakesan/zeppelin:0.7.2 ./zeppelin/.
20 | 


--------------------------------------------------------------------------------
/zeppelin/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM bde2020/spark-base:2.1.0-hadoop2.8-hive-java8
 2 | MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
 3 | 
 4 | ENV APACHE_SPARK_VERSION 2.1.0
 5 | ENV APACHE_HADOOP_VERSION 2.8.0
 6 | ENV ZEPPELIN_VERSION 0.7.2
 7 | 
 8 | RUN apt-get update && apt-get install wget
 9 | RUN set -x \
10 |     && curl -fSL "http://www-eu.apache.org/dist/zeppelin/zeppelin-0.7.2/zeppelin-0.7.2-bin-all.tgz" -o /tmp/zeppelin.tgz \
11 |     && tar -xzvf /tmp/zeppelin.tgz -C /opt/ \
12 |     && mv /opt/zeppelin-* /opt/zeppelin \
13 |     && rm /tmp/zeppelin.tgz
14 | 
15 | ENV SPARK_SUBMIT_OPTIONS "--jars /opt/zeppelin/sansa-examples-spark-2016-12.jar"
16 | 
17 | WORKDIR /opt/zeppelin
18 | 
19 | CMD ["/opt/zeppelin/bin/zeppelin.sh"]
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/big-data-europe/Lobby)
 2 | 
 3 | # Docker Zeppelin
 4 | 
 5 | This repository contains [Apache Zeppelin](https://zeppelin.apache.org/) docker image, which is tuned to work with BDE clusters.
 6 | 
 7 | # Example Usage
 8 | 
 9 | For example usage see [docker-compose.yml](./docker-compose.yml) and [SANSA-Notebooks repository](https://github.com/SANSA-Stack/SANSA-Notebooks).
10 | 
11 | # Dev
12 | Start Hadoop/Spark cluster with Zeppelin notebook:
13 | ```
14 | make up
15 | ```
16 | Tear down Hadoop/Spark cluster with Zeppelin notebook:
17 | ```
18 | make down
19 | ```
20 | Bash into Zeppelin container:
21 | ```
22 | make bash
23 | ```
24 | Build and run Zeppelin separately:
25 | ```
26 | make up
27 | docker stop dockerzeppelin_zeppelin_1 && docker rm dockerzeppelin_zeppelin_1
28 | make run
29 | ```
30 | Build Zeppelin:
31 | ```
32 | make build
33 | ```
34 | For more details see the Makefile.
35 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "2.1"
 2 | 
 3 | services:
 4 |   namenode:
 5 |     image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8
 6 |     container_name: namenode
 7 |     volumes:
 8 |       - ./data/namenode:/hadoop/dfs/name
 9 |     environment:
10 |       - CLUSTER_NAME=test
11 |       - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
12 |     healthcheck:
13 |       interval: 5s
14 |       retries: 100 
15 |     networks:
16 |       - spark-net
17 |   datanode:
18 |     image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
19 |     container_name: datanode
20 |     volumes:
21 |       - ./data/datanode:/hadoop/dfs/data
22 |     environment:
23 |       - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
24 |     depends_on:
25 |       namenode:
26 |         condition: service_healthy
27 |     healthcheck:
28 |       interval: 5s
29 |       retries: 100 
30 |     networks:
31 |       - spark-net
32 |   spark-master:
33 |     image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8
34 |     container_name: spark-master
35 |     ports:
36 |       - "8080:8080"
37 |       - "7077:7077"
38 |     environment:
39 |       - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
40 |     depends_on:
41 |       namenode:
42 |         condition: service_healthy
43 |       datanode:
44 |         condition: service_healthy
45 |     healthcheck:
46 |       interval: 5s
47 |       retries: 100 
48 |     networks:
49 |       - spark-net
50 |   spark-worker:
51 |     image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8
52 |     environment:
53 |       - "SPARK_MASTER=spark://spark-master:7077"
54 |     environment:
55 |       - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
56 |     depends_on:
57 |       spark-master:
58 |         condition: service_healthy
59 |     healthcheck:
60 |       interval: 5s
61 |       retries: 100 
62 |     networks:
63 |       - spark-net
64 |   zeppelin:
65 |     build: ./zeppelin
66 |     ports:
67 |       - 80:8080
68 |     volumes:
69 |       - ./notebook:/opt/zeppelin/notebook
70 |     environment:
71 |       CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
72 |       SPARK_MASTER: "spark://spark-master:7077"
73 |       MASTER: "spark://spark-master:7077"
74 |       #SPARK_SUBMIT_OPTIONS: "--jars /opt/sansa-examples/jars/sansa-examples-spark-2016-12.jar"
75 |     depends_on:
76 |       spark-master:
77 |         condition: service_healthy
78 |       namenode:
79 |         condition: service_healthy
80 |     networks:
81 |       - spark-net
82 | 
83 | networks:
84 |   spark-net:
85 |     external:
86 |       name: spark-net
87 | 


--------------------------------------------------------------------------------
/notebook/2CF34ERK6/note.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "paragraphs": [
 3 |     {
 4 |       "text": "import scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nval slices \u003d 2\nval n \u003d math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow\nval count \u003d spark.sparkContext.parallelize(1 until n, slices).map { i \u003d\u003e\n    val x \u003d random * 2 - 1\n    val y \u003d random * 2 - 1\n    if (x*x + y*y \u003c\u003d 1) 1 else 0\n}.reduce(_ + _)\nprintln(\"Pi is roughly \" + 4.0 * count / (n - 1))",
 5 |       "user": "anonymous",
 6 |       "dateUpdated": "May 11, 2017 8:47:28 AM",
 7 |       "config": {
 8 |         "colWidth": 12.0,
 9 |         "enabled": true,
10 |         "results": {},
11 |         "editorSetting": {
12 |           "language": "text",
13 |           "editOnDblClick": false
14 |         },
15 |         "editorMode": "ace/mode/text",
16 |         "editorHide": false,
17 |         "tableHide": true
18 |       },
19 |       "settings": {
20 |         "params": {},
21 |         "forms": {}
22 |       },
23 |       "results": {
24 |         "code": "SUCCESS",
25 |         "msg": [
26 |           {
27 |             "type": "TEXT",
28 |             "data": "\nimport scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nslices: Int \u003d 2\n\nn: Int \u003d 200000\n\ncount: Int \u003d 156835\nPi is roughly 3.136715683578418\n"
29 |           }
30 |         ]
31 |       },
32 |       "apps": [],
33 |       "jobName": "paragraph_1494446151737_1359394047",
34 |       "id": "20170510-195551_400594820",
35 |       "dateCreated": "May 10, 2017 7:55:51 PM",
36 |       "dateStarted": "May 10, 2017 8:40:36 PM",
37 |       "dateFinished": "May 10, 2017 8:41:04 PM",
38 |       "status": "FINISHED",
39 |       "progressUpdateIntervalMs": 500
40 |     },
41 |     {
42 |       "text": "",
43 |       "user": "anonymous",
44 |       "dateUpdated": "May 11, 2017 8:49:13 AM",
45 |       "config": {
46 |         "colWidth": 12.0,
47 |         "enabled": true,
48 |         "results": {},
49 |         "editorSetting": {
50 |           "language": "scala",
51 |           "editOnDblClick": false
52 |         },
53 |         "editorMode": "ace/mode/scala",
54 |         "tableHide": true
55 |       },
56 |       "settings": {
57 |         "params": {},
58 |         "forms": {}
59 |       },
60 |       "apps": [],
61 |       "jobName": "paragraph_1494446793336_527866307",
62 |       "id": "20170510-200633_2002902352",
63 |       "dateCreated": "May 10, 2017 8:06:33 PM",
64 |       "status": "FINISHED",
65 |       "progressUpdateIntervalMs": 500
66 |     }
67 |   ],
68 |   "name": "SparkPi",
69 |   "id": "2CF34ERK6",
70 |   "angularObjects": {
71 |     "2CHD267MK:shared_process": [],
72 |     "2CFCYW8ZZ:shared_process": [],
73 |     "2CGGU1AUC:shared_process": [],
74 |     "2CHDUK5RT:shared_process": [],
75 |     "2CFPWN8ZX:shared_process": [],
76 |     "2CJT3A9WM:shared_process": [],
77 |     "2CFTT4BX1:shared_process": [],
78 |     "2CHPV1WNR:shared_process": [],
79 |     "2CH8NUNKD:shared_process": [],
80 |     "2CG24PUFX:shared_process": [],
81 |     "2CJHA5F79:shared_process": [],
82 |     "2CFD8HYGS:shared_process": [],
83 |     "2CH5TSP4J:shared_process": [],
84 |     "2CF3WY7WY:shared_process": [],
85 |     "2CFYWKGJK:shared_process": [],
86 |     "2CJFHW9TZ:shared_process": [],
87 |     "2CGEFHREK:shared_process": [],
88 |     "2CG4M1FG9:shared_process": [],
89 |     "2CH3SASQ1:shared_process": []
90 |   },
91 |   "config": {},
92 |   "info": {}
93 | }


--------------------------------------------------------------------------------
/notebook/2CFH4E3TG/note.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "paragraphs": [
 3 |     {
 4 |       "text": "import net.sansa_stack.owl.spark.dataset.{FunctionalSyntaxOWLAxiomsDatasetBuilder, ManchesterSyntaxOWLAxiomsDatasetBuilder}\n\nval input \u003d \"hdfs://namenode:8020/data/ont_functional.owl\"\n\nval dataset \u003d FunctionalSyntaxOWLAxiomsDatasetBuilder.build(spark, input)\ndataset.take(10).foreach(println(_))",
 5 |       "user": "anonymous",
 6 |       "dateUpdated": "May 11, 2017 9:28:35 AM",
 7 |       "config": {
 8 |         "colWidth": 12.0,
 9 |         "enabled": true,
10 |         "results": {},
11 |         "editorSetting": {
12 |           "language": "text",
13 |           "editOnDblClick": false
14 |         },
15 |         "editorMode": "ace/mode/text"
16 |       },
17 |       "settings": {
18 |         "params": {},
19 |         "forms": {}
20 |       },
21 |       "results": {
22 |         "code": "SUCCESS",
23 |         "msg": [
24 |           {
25 |             "type": "TEXT",
26 |             "data": "\nimport net.sansa_stack.owl.spark.dataset.{FunctionalSyntaxOWLAxiomsDatasetBuilder, ManchesterSyntaxOWLAxiomsDatasetBuilder}\n\ninput: String \u003d hdfs://namenode:8020/data/ont_functional.owl\n\ndataset: net.sansa_stack.owl.spark.dataset.OWLAxiomsDataset \u003d [value: binary]\nDeclaration(Annotation(\u003chttp://ex.com/foo#ann\u003e \"some annotation\"^^xsd:string) Class(\u003chttp://ex.com/bar#Cls1\u003e))\nDeclaration(Class(\u003chttp://ex.com/bar#Cls2\u003e))\nDeclaration(Datatype(\u003chttp://ex.com/bar#dtype1\u003e))\nDeclaration(Datatype(\u003chttp://ex.com/bar#dtype2\u003e))\nDeclaration(ObjectProperty(\u003chttp://ex.com/bar#objProp1\u003e))\nDeclaration(ObjectProperty(\u003chttp://ex.com/bar#objProp2\u003e))\nDeclaration(DataProperty(\u003chttp://ex.com/bar#dataProp1\u003e))\nDeclaration(DataProperty(\u003chttp://ex.com/bar#dataProp2\u003e))\nDeclaration(AnnotationProperty(\u003chttp://ex.com/bar#annProp1\u003e))\nDeclaration(AnnotationProperty(\u003chttp://ex.com/bar#annProp2\u003e))\n"
27 |           }
28 |         ]
29 |       },
30 |       "apps": [],
31 |       "jobName": "paragraph_1494492505471_454897908",
32 |       "id": "20170511-084825_834127049",
33 |       "dateCreated": "May 11, 2017 8:48:25 AM",
34 |       "dateStarted": "May 11, 2017 9:28:35 AM",
35 |       "dateFinished": "May 11, 2017 9:28:48 AM",
36 |       "status": "FINISHED",
37 |       "progressUpdateIntervalMs": 500
38 |     },
39 |     {
40 |       "user": "anonymous",
41 |       "config": {},
42 |       "settings": {
43 |         "params": {},
44 |         "forms": {}
45 |       },
46 |       "apps": [],
47 |       "jobName": "paragraph_1494494845390_-1659187861",
48 |       "id": "20170511-092725_657991002",
49 |       "dateCreated": "May 11, 2017 9:27:25 AM",
50 |       "status": "READY",
51 |       "progressUpdateIntervalMs": 500
52 |     }
53 |   ],
54 |   "name": "OWL",
55 |   "id": "2CFH4E3TG",
56 |   "angularObjects": {
57 |     "2CF3RJ5MG:shared_process": [],
58 |     "2CG82CXC6:shared_process": [],
59 |     "2CGUETVFU:shared_process": [],
60 |     "2CJSY2U8W:shared_process": [],
61 |     "2CG2FJB5M:shared_process": [],
62 |     "2CG3MZ35T:shared_process": [],
63 |     "2CH1NDZTZ:shared_process": [],
64 |     "2CF1A3GZY:shared_process": [],
65 |     "2CFP46J5J:shared_process": [],
66 |     "2CEXCBT9Q:shared_process": [],
67 |     "2CFTSHR86:shared_process": [],
68 |     "2CFVTZA26:shared_process": [],
69 |     "2CFKV2E7E:shared_process": [],
70 |     "2CJ4DKA2S:shared_process": [],
71 |     "2CFBSCM9Q:shared_process": [],
72 |     "2CGQHTNJC:shared_process": [],
73 |     "2CHTUVCMC:shared_process": [],
74 |     "2CHUU16RS:shared_process": [],
75 |     "2CHE8XKZD:shared_process": []
76 |   },
77 |   "config": {},
78 |   "info": {}
79 | }


--------------------------------------------------------------------------------