├── .dockerignore
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── README.md
├── docs
├── _config.yml
├── how.md
├── index.md
├── install.md
├── jsdoc
│ ├── classes.list.html
│ ├── fonts
│ │ ├── glyphicons-halflings-regular.eot
│ │ ├── glyphicons-halflings-regular.svg
│ │ ├── glyphicons-halflings-regular.ttf
│ │ ├── glyphicons-halflings-regular.woff
│ │ └── glyphicons-halflings-regular.woff2
│ ├── img
│ │ ├── glyphicons-halflings-white.png
│ │ └── glyphicons-halflings.png
│ ├── index.html
│ ├── module-CellMonitor-CellMonitor.html
│ ├── module-CellMonitor.html
│ ├── module-SparkMonitor-SparkMonitor.html
│ ├── module-SparkMonitor.html
│ ├── module-TaskChart-TaskChart.html
│ ├── module-TaskChart.html
│ ├── module-Timeline-Timeline.html
│ ├── module-Timeline.html
│ ├── module-currentcell.html
│ ├── module-module.html
│ ├── module-taskdetails.html
│ ├── modules.list.html
│ ├── quicksearch.html
│ ├── scripts
│ │ ├── docstrap.lib.js
│ │ ├── fulltext-search-ui.js
│ │ ├── fulltext-search.js
│ │ ├── lunr.min.js
│ │ ├── prettify
│ │ │ ├── Apache-License-2.0.txt
│ │ │ ├── jquery.min.js
│ │ │ ├── lang-css.js
│ │ │ └── prettify.js
│ │ ├── sunlight.js
│ │ └── toc.js
│ └── styles
│ │ ├── darkstrap.css
│ │ ├── prettify-tomorrow.css
│ │ ├── site.cerulean.css
│ │ ├── site.cosmo.css
│ │ ├── site.cyborg.css
│ │ ├── site.darkly.css
│ │ ├── site.darkstrap.css
│ │ ├── site.dibs-bootstrap.css
│ │ ├── site.flatly.css
│ │ ├── site.journal.css
│ │ ├── site.lumen.css
│ │ ├── site.paper.css
│ │ ├── site.readable.css
│ │ ├── site.sandstone.css
│ │ ├── site.simplex.css
│ │ ├── site.slate.css
│ │ ├── site.spacelab.css
│ │ ├── site.superhero.css
│ │ ├── site.united.css
│ │ ├── site.yeti.css
│ │ ├── sunlight.dark.css
│ │ └── sunlight.default.css
├── usecase_distroot.md
├── usecase_sparktraining.md
├── usecase_testing.md
└── usecases.md
├── extension
├── MANIFEST.in
├── VERSION
├── conf.json
├── js
│ ├── CellMonitor.js
│ ├── SparkMonitor.js
│ ├── TaskChart.js
│ ├── Timeline.js
│ ├── cellmonitor.html
│ ├── currentcell.js
│ ├── images
│ │ └── spinner.gif
│ ├── jobtable.css
│ ├── module.js
│ ├── styles.css
│ ├── taskdetails.css
│ ├── taskdetails.html
│ ├── taskdetails.js
│ └── timeline.css
├── package.json
├── scalalistener
│ ├── CustomListener.scala
│ └── build.sbt
├── setup.py
├── sparkmonitor
│ ├── __init__.py
│ ├── kernelextension.py
│ └── serverextension.py
├── webpack.config.js
└── yarn.lock
└── notebooks
├── DistROOT.ipynb
└── Testing Extension.ipynb
/.dockerignore:
--------------------------------------------------------------------------------
1 | extension/node_modules
2 | extension/scalalistener
3 | extension/js
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.log
3 | **/.ipynb_checkpoints/
4 | extension/scalalistener/project/
5 | extension/scalalistener/target/
6 | *.egg-info/
7 | extension/dist/
8 | extension/build/
9 | extension/node_modules/
10 | extension/sparkmonitor/static/
11 | *.jar
12 | extension/docs/
13 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | # dist: trusty
3 | sudo: required
4 | env:
5 | global:
6 | secure: Qw9zE4MOOkcPiUYF4jzt6tFhJogvA3d0u2pA/jMAybECjeQDMaio5AN67NbtXDU5/7VF2bab4ScWb4HMVhd30j6Mo8FmTAeuxYwVSldze82wM7/Aw2E6GARmWugC/Q5RNJKd+oIpRgHgWuepNWWpdXMuDx2mtHmL8KPShmjYRFQ9rEeiyrnUPqaO+JRT6BD13KZTb1004cFa3kK4piwOlQ7hrQ5t21YJAqkfgWMX9yybgCPIgMbPoQZFLsK2xdc0tYsBJDFkblXLleQjUyn1y7PbwgbscvLr+I75g2sN8mqR58x4ly3rpb1+8SQ3aRgCJHZUwCl1Ci2dO9RLVSgse4JPyfJ9/50epEU7TD9oNz+rXImOOlSCoVFdNEW3BOGDle9hci6AiA2ON3jESVWID7FY9dv7Fe1fEiwFoSg2cVIlPLRAF/Lt81v6OGurd7xFUUscUgELLYXTgZRnltUR7P5Na/1C/Atk/YjoenMlnhOy7DgTm54OpuZ4TZLLJOeWHS5RpeMXyl9d4SQ8ZExJCjWK0+DHb0CIze/uO6lSIIWsf2MIQKIbnUARIF34sW6/Ms3gvXQdC8qJNxkYZYbD3fcUNnsPsLAZMGC9nco4TxwO3M7oeTOpTlGJshnTg8l8XB+ppMqRFQhZyo6GQEUZQ8QVbSRGDrm+gSZnsnqFN+s=
7 | services:
8 | - docker
9 | scala: "-2.11.8"
10 | cache:
11 | yarn: true
12 | directories:
13 | - "$TRAVIS_BUILD_DIR/extension/node_modules"
14 | - "$HOME/.sbt"
15 | - "$HOME/.ivy2/cache"
16 | before_install:
17 | - nvm install 6.11.1
18 | - nvm use 6.11.1
19 | install:
20 | - cd $TRAVIS_BUILD_DIR/extension/
21 | - yarn install
22 | before_script: ''
23 | script:
24 | - cd $TRAVIS_BUILD_DIR/extension/
25 | - yarn run webpack
26 | - cd $TRAVIS_BUILD_DIR/extension/scalalistener/
27 | - sbt package
28 | - cd $TRAVIS_BUILD_DIR/extension/
29 | - python setup.py sdist --formats=gztar,zip
30 | - cd $TRAVIS_BUILD_DIR/extension/dist/
31 | - cp sparkmonitor*.tar.gz sparkmonitor.tar.gz
32 | - cp sparkmonitor*.zip sparkmonitor.zip
33 | after_success:
34 | - cd $TRAVIS_BUILD_DIR/
35 | - 'docker login -u=$DOCKER_USER -p=$DOCKER_PASS && docker build -f Dockerfile -t krishnanr/sparkmonitor
36 | . && docker push krishnanr/sparkmonitor'
37 |
38 | deploy:
39 |
40 | - provider: releases
41 | skip_cleanup: true
42 | file:
43 | - $TRAVIS_BUILD_DIR/extension/dist/sparkmonitor.tar.gz
44 | - $TRAVIS_BUILD_DIR/extension/dist/sparkmonitor.zip
45 | api_key:
46 | secure: a3buqLV2wwwAY6mkCSKT8/qHf8pFTa9/UP4Op3WPlkS2rYTAKw8cI3dAWd99dr5oCXkbbJc4aIA/e9voS1xAR+9mGYW+X3EakB8fRWnwQjg1/mRdsNp4S9wyeat1ETt4+/M1etcZed+uuuIUDfCyWgGGlu+bLGubHYqHIWhAE3lrT3PKjQVNGP/A5Tkctmoz+YE9gHoCNkFwa4cQ0p/hUeE97UT86u1RriHNJ1yKbqfX5/0FaOU6BbZogsGzk5tKBJyJtdACi3fgbLKlBPS7+aIE2wydl7PtQxwnLUV8Gitcb6+rfPQTYzQfc3vX2izfKBx6sC4hFZ433MdGw/3neyfUj1/Gh/tHnHrl4tSkt5VgKD7i4TPQiVLjt2N9tdkgZyqhjfJwit5r5IxSoSbdPsUK6uNIxDLsInFiakPyjwciczDToJihP/drelAuuWqIymKPPE70AGX1VsRI8H5JakXfkzI741ZpyiFiq3Z/b3WPqC/+bXnNS7aF5+P8SkYJVhGbMq6toq8gfYP36rYY2OAo55X7qTCqGkbxv8nAIwJNtrg3U0u5ra7ciamp6/ht8LmVVBfy0XERHzdejhqbHeyNpA1LFSiCkhvpZYWxpPgJOUbW+jkf4Ujk4fJwmvmS2FLsqMNzeOYqhUYwY5mwpmoaDjTFQ50Eu2BcouByVCw=
47 | on:
48 | repo: krishnan-r/sparkmonitor
49 | tags: true
50 | - provider: script
51 | skip_cleanup: true
52 | on:
53 | tags: true
54 | script: 'docker tag krishnanr/sparkmonitor krishnanr/sparkmonitor:$TRAVIS_TAG && docker push krishnanr/sparkmonitor:$TRAVIS_TAG'
55 |
56 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM krishnanr/docker-jupyter-spark
2 |
3 | ADD ./extension/ /extension/
4 | ADD ./notebooks/ /notebooks/
5 |
6 | RUN pip install -e /extension/ && \
7 | jupyter nbextension install sparkmonitor --py --user --symlink && \
8 | jupyter nbextension enable sparkmonitor --py --user && \
9 | jupyter serverextension enable --py --user sparkmonitor && \
10 | ipython profile create && \
11 | echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py
12 |
13 | WORKDIR /notebooks/
14 |
15 | EXPOSE 8888
16 |
17 | CMD jupyter notebook --port=8888 --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token=''
18 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/krishnan-r/sparkmonitor)
2 | # Spark Monitor - An extension for Jupyter Notebook
3 |
4 | ### Note: This project is now maintained at https://github.com/swan-cern/sparkmonitor
5 |
6 | ## [Google Summer of Code - Final Report](https://krishnan-r.github.io/sparkmonitor/)
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | For the google summer of code final report of this project [click here](https://krishnan-r.github.io/sparkmonitor/)
20 |
21 | ## About
22 |
23 |
24 |
25 |
+
26 |
27 |
=
28 |
29 |
30 |
31 | SparkMonitor is an extension for Jupyter Notebook that enables the live monitoring of Apache Spark Jobs spawned from a notebook. The extension provides several features to monitor and debug a Spark job from within the notebook interface itself.
32 |
33 | ***
34 |
35 | 
36 |
37 | ## Features
38 | * Automatically displays a live monitoring tool below cells that run Spark jobs in a Jupyter notebook
39 | * A table of jobs and stages with progressbars
40 | * A timeline which shows jobs, stages, and tasks
41 | * A graph showing number of active tasks & executor cores vs time
42 | * A notebook server extension that proxies the Spark UI and displays it in an iframe popup for more details
43 | * For a detailed list of features see the use case [notebooks](https://krishnan-r.github.io/sparkmonitor/#common-use-cases-and-tests)
44 | * [How it Works](https://krishnan-r.github.io/sparkmonitor/how.html)
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 | ## Quick Installation
61 | ```bash
62 | pip install sparkmonitor
63 | jupyter nbextension install sparkmonitor --py --user --symlink
64 | jupyter nbextension enable sparkmonitor --py --user
65 | jupyter serverextension enable --py --user sparkmonitor
66 | ipython profile create && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py
67 | ```
68 | #### For more detailed instructions [click here](https://krishnan-r.github.io/sparkmonitor/install.html)
69 | #### To do a quick test of the extension:
70 | ```bash
71 | docker run -it -p 8888:8888 krishnanr/sparkmonitor
72 | ```
73 |
74 | ## Integration with ROOT and SWAN
75 | At CERN, the SparkMonitor extension would find two main use cases:
76 | * Distributed analysis with [ROOT](https://root.cern.ch/) and Apache Spark using the DistROOT module. [Here](https://krishnan-r.github.io/sparkmonitor/usecase_distroot.html) is an example demonstrating this use case.
77 | * Integration with [SWAN](https://swan.web.cern.ch/), A service for web based analysis, via a modified [container image](https://github.com/krishnan-r/sparkmonitorhub) for SWAN user sessions.
78 |
79 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | title: "SparkMonitor"
2 | description: An extension to monitor Apache Spark from Jupyter Notebook
3 |
4 | show_downloads: "false"
5 |
--------------------------------------------------------------------------------
/docs/how.md:
--------------------------------------------------------------------------------
1 |
2 | **[Final Report](index.md)** |
3 | **[Installation](install.md)** |
4 | **[How it Works](how.md)** |
5 | **[Use Cases](usecases.md)** |
6 | **[Code](https://github.com/krishnan-r/sparkmonitor)** |
7 | **[License](https://github.com/krishnan-r/sparkmonitor/blob/master/LICENSE.md)**
8 |
9 |
10 | # SparkMonitor - How the extension works
11 |
12 | 
13 |
14 | [Jupyter Notebook](http://jupyter.org/) is a web based application that follows a client-server architecture. It consists of a JavaScript browser client that renders the notebook interface and a web server process on the back end. The computation of the cells are outsourced to a separate kernel process running on the server. To extend the notebook, it is required to implement a separate extension component for each part.
15 |
16 | The SparkMonitor extension for Jupyter Notebook has 4 components.
17 |
18 | 1. Notebook Frontend extension written in JavaScript.
19 | 2. [IPython](https://ipython.org/) Kernel extension written in Python.
20 | 3. Notebook web server extension written in Python.
21 | 4. An implementation of [SparkListener](https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.scheduler.SparkListener) interface written in Scala.
22 |
23 | ---
24 | ## The Frontend Extension
25 | 
26 | - Written in JavaScript.
27 | - Receives data from the IPython kernel through Jupyter's comm API mechanism for widgets.
28 | - Jupyter frontend extensions are requirejs modules that are loaded when the browser page loads.
29 | - Contains the logic for displaying the progress bars, graphs and timeline.
30 | - Keeps track of cells running using a queue by tracking execution requests and kernel busy/idle events.
31 | - Creates and renders the display if a job start event is received while a cell is running.
32 |
33 | ---
34 | ## [IPython](https://ipython.org/) Kernel Extension
35 | 
36 | - The kernel extension is an importable Python module called `sparkmonitor.kernelextension`
37 | - It is configured to load when the IPython kernel process starts.
38 | - The extension acts as a bridge between the frontend and the SparkListener callback interface.
39 | - To communicate with the SparkListener the extension opens a socket and waits for connections.
40 | - The port of the socket is exported as an environment variable. When a Spark application starts, the custom SparkListener connects to this port and forwards data.
41 | - To communicate with the frontend the extension uses the IPython Comm API provided by Jupyter.
42 | - The extension also adds to the users namespace a [SparkConf](http://spark.apache.org/docs/2.1.0/api/python/pyspark.html#pyspark.SparkConf) instance named as `conf`. This object is configured with the Spark properties that makes Spark load the custom SparkListener as well as adds the necessary JAR file paths to the Java class path.
43 |
44 |
45 | ---
46 | ## Scala [SparkListener](https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.scheduler.SparkListener)
47 | 
48 | - Written in Scala.
49 | - The listener receives notifications of [Apache Spark](https://spark.apache.org/) application lifecycle events as callbacks.
50 | - The custom implementation used in this extension connects to a socket opened by the IPython kernel extension.
51 | - All the data is forwarded to the kernel through this socket which forwards it to the frontend JavaScript.
52 |
53 | ---
54 | ## The Notebook Webserver Extension - A Spark Web UI proxy
55 | 
56 | - Written in Python.
57 | - This module proxies the Spark UI running typically on 127.0.0.1:4040 to the user through Jupyter's web server.
58 | - Jupyter notebook is based on the [Tornado](http://www.tornadoweb.org/en/stable/) web server back end. Tornado is a Python webserver.
59 | - Jupyter webserver extensions are custom request handlers sub-classing the `IPythonHandler` class. They provide custom endpoints with additional content.
60 | - This module provides the Spark UI as an endpoint at `notebook_base_url/sparkmonitor`.
61 | - In the front end extension, the Spark UI can also be accessed as an IFrame dialog through the monitoring display.
62 | - For the Spark UI web application to work as expected, the server extension replaces all relative URLs in the requested page, adding the endpoints base URL to each.
63 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 |
2 | **[Final Report](index.md)** |
3 | **[Installation](install.md)** |
4 | **[How it Works](how.md)** |
5 | **[Use Cases](usecases.md)** |
6 | **[Code](https://github.com/krishnan-r/sparkmonitor)** |
7 | **[License](https://github.com/krishnan-r/sparkmonitor/blob/master/LICENSE.md)**
8 |
9 |
10 | # Google Summer of Code 2017 Final Report
11 | # Big Data Tools for Physics Analysis
12 |
13 | ## Introduction
14 | Jupyter Notebook is an interactive computing environment that is used to create notebooks which contain code, output, plots, widgets and theory. Jupyter notebook offers a convenient platform for interactive data analysis, scientific computing and rapid prototyping of code. A powerful tool used to perform complex computation intensive tasks is Apache Spark. Spark is a framework for large scale cluster computing in Big Data contexts. This project leverages these existing big data tools for use in an interactive scientific analysis environment. Spark jobs can be called from an IPython kernel in Jupyter Notebook using the pySpark module. The results of the computation can be visualized and plotted within the notebook interface. However to know what is happening to a running job, it is required to connect separately to the Spark web UI server. This project implements an extension called SparkMonitor to Jupyter Notebook that enables the monitoring of jobs sent from a notebook application, from within the notebook itself. The extension seamlessly integrates with the cell structure of the notebook and provides real time monitoring capabilities.
15 |
16 | ## Features
17 | - The extension integrates with the cell structure of the notebook and automatically detects jobs submitted from a notebook cell.
18 |
19 | 
20 |
21 | - It displays the jobs and stages spawned from a cell, with real time progress bars, status and resource utilization.
22 |
23 | 
24 |
25 | - The extension provides an aggregated view of the number of active tasks and available executor cores in the cluster.
26 |
27 | 
28 |
29 | - An event timeline displays the overall workload split into jobs, stages and tasks across executors in the cluster.
30 |
31 | 
32 |
33 | - The extension also integrates the Spark Web UI within the notebook page by displaying it in an IFrame pop-up.
34 |
35 | 
36 |
37 | ## Example Use Cases
38 | The extension has been tested with a range of Spark applications. [Here](usecases.md) is a list of use cases the extension has been run with.
39 |
40 |
41 | ## Integration in SWAN and CERN IT Infrastructure
42 | - The extension has been successfully integrated with a test instance of [SWAN](http://swan.web.cern.ch/), a Service for Web based ANalysis at [CERN](https://home.cern/).
43 | - SWAN allows the submission of Spark Jobs from a notebook interface to Spark clusters deployed at CERN.
44 | - SWAN encapsulates user sessions in Docker containers. The extension is installed by modifying the docker container image.
45 | - The extension is loaded to Jupyter whenever the user attaches a Spark Cluster to the notebook environment.
46 | - The customized docker image for the user environment can be found [here](https://github.com/krishnan-r/sparkmonitorhub).
47 | - Using this integration, it is now possible to monitor and debug Spark Jobs running on CERN Clusters using the notebook interface.
48 |
49 | ## Documentation
50 | ### How it Works
51 | - A detailed explanation of how different components in the extension work together can be found [here](how.md).
52 |
53 | ### Code Documentation
54 | - Documentation for the JavaScript code is available [here](jsdoc).
55 | - All the documentation for the code in Python and Scala is available within the [source files](https://github.com/krishnan-r/sparkmonitor) itself.
56 |
57 | ### Installation
58 | - The extension is available as a pip python package through [Github Releases](https://github.com/krishnan-r/sparkmonitor/releases).
59 | - To install and configure the extension or to build from source, follow the instructions [here](install.md).
60 |
61 | ## Gallery
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | ## Future Work
76 | ### Pending Work
77 | - Ability to control and cancel running jobs.
78 |
79 | ### Future Ideas
80 | - Support for Scala Notebooks
81 | - Interface for easier configuration of Spark Applications
82 |
83 | ## Useful Links
84 | - [SparkMonitor](https://github.com/krishnan-r/sparkmonitor) Github Repository
85 | - [SparkMonitorHub](https://github.com/krishnan-r/sparkmonitorhub) - An integration for [SWAN](https://swan.web.cern.ch/) - A service for web-based analysis at CERN
86 | - [Initial Project Proposal](https://docs.google.com/document/d/1J2zIRnEAvey8HcDyqrKZ2DeQJXLvhU5HR2WdxZ9o8Yk/edit?usp=sharing)
87 | - [Initial Idea Page of Organization](http://hepsoftwarefoundation.org/gsoc/proposal_ROOTspark.html)
88 | - [Travis Build for SparkMonitor](https://travis-ci.org/krishnan-r/sparkmonitor)
89 | - [Docker image](https://hub.docker.com/r/krishnanr/sparkmonitor/) for testing locally based on Scientific Linux CERN 6
90 | - [Docker image](https://hub.docker.com/r/krishnanr/sparkmonitorhub/) for SWAN
91 | - [SparkMonitor Python Package](https://github.com/krishnan-r/sparkmonitor/releases) - Github Release
92 |
--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
1 |
2 | **[Final Report](index.md)** |
3 | **[Installation](install.md)** |
4 | **[How it Works](how.md)** |
5 | **[Use Cases](usecases.md)** |
6 | **[Code](https://github.com/krishnan-r/sparkmonitor)** |
7 | **[License](https://github.com/krishnan-r/sparkmonitor/blob/master/LICENSE.md)**
8 |
9 |
10 | # Installation
11 | ## Prerequisites
12 | - PySpark on [Apache Spark](https://spark.apache.org/) version 2.1.1 or higher
13 | - [Jupyter Notebook](http://jupyter.org/) version 4.4.0 or higher
14 |
15 | ## Quick Install
16 | ```bash
17 | pip install sparkmonitor
18 | jupyter nbextension install sparkmonitor --py --user --symlink
19 | jupyter nbextension enable sparkmonitor --py --user
20 | jupyter serverextension enable --py --user sparkmonitor
21 | ipython profile create && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py
22 | ```
23 | ## Detailed Instructions
24 |
25 | 1. Install the python package in the latest tagged github release. The python package contains the JavaScript resources and the listener jar file.
26 |
27 | ```bash
28 | pip install sparkmonitor
29 | ```
30 |
31 | 2. The frontend extension is symlinked (```--symlink```) into the jupyter configuration directory by `jupyter nbextension` command. The second line configures the frontend extension to load on notebook startup.
32 |
33 | ```bash
34 | jupyter nbextension install --py sparkmonitor --user --symlink
35 | jupyter nbextension enable sparkmonitor --user --py
36 | ```
37 | 3. Configure the server extension to load when the notebook server starts
38 |
39 | ```bash
40 | jupyter serverextension enable --py --user sparkmonitor
41 | ```
42 |
43 | 4. Create the default profile configuration files (Skip if config file already exists)
44 | ```bash
45 | ipython profile create
46 | ```
47 | 5. Configure the kernel to load the extension on startup. This is added to the configuration files in users home directory
48 | ```bash
49 | echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py
50 | ```
51 |
52 | ## Configuration
53 | By default the Spark Web UI runs on `localhost:4040`. If this is not the case, setting the environment variable `SPARKMONITOR_UI_HOST` and `SPARKMONITOR_UI_PORT` overrides the default Spark UI hostname `localhost` and port 4040 used by the Spark UI proxy.
54 |
55 | ## Build from Source
56 | Building the extension involves three parts:
57 | 1. Bundle and minify the JavaScript
58 | 2. Compile the Scala listener into a JAR file.
59 | 3. Package and install the python package.
60 |
61 | ```bash
62 | git clone https://github.com/krishnan-r/sparkmonitor
63 | cd sparkmonitor/extension
64 | #Build Javascript
65 | yarn install
66 | yarn run webpack
67 | #Build SparkListener Scala jar
68 | cd scalalistener/
69 | sbt package
70 | ```
71 | ```bash
72 | #Install the python package (in editable format -e for development)
73 | cd sparkmonitor/extension/
74 | pip install -e .
75 | # The sparkmonitor python package is now installed. Configure with jupyter as above.
76 | ```
77 |
--------------------------------------------------------------------------------
/docs/jsdoc/classes.list.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | SparkMonitor JavaScript Documentation Classes
8 |
9 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |