├── config.json
├── sparkmagic
    ├── sparkmagic
    │   ├── auth
    │   │   ├── __init__.py
    │   │   ├── kerberos.py
    │   │   ├── customauth.py
    │   │   └── basic.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_endpoint.py
    │   │   ├── test_kernels.py
    │   │   ├── test_heartbeatthread.py
    │   │   ├── test_usercodeparser.py
    │   │   ├── test_configurableretrypolicy.py
    │   │   ├── test_exceptions.py
    │   │   ├── test_livyreliablehttpclient.py
    │   │   ├── test_pd_data_coerce.py
    │   │   ├── test_sendstringtosparkcommand.py
    │   │   └── test_sessionmanager.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── sparklogger.py
    │   │   ├── constants.py
    │   │   └── utils.py
    │   ├── livyclientlib
    │   │   ├── __init__.py
    │   │   ├── linearretrypolicy.py
    │   │   ├── endpoint.py
    │   │   ├── configurableretrypolicy.py
    │   │   ├── sendstringtosparkcommand.py
    │   │   ├── sendtosparkcommand.py
    │   │   ├── livyreliablehttpclient.py
    │   │   ├── sessionmanager.py
    │   │   ├── sendpandasdftosparkcommand.py
    │   │   └── reliablehttpclient.py
    │   ├── controllerwidget
    │   │   ├── __init__.py
    │   │   ├── abstractmenuwidget.py
    │   │   ├── createsessionwidget.py
    │   │   ├── managesessionwidget.py
    │   │   ├── addendpointwidget.py
    │   │   └── magicscontrollerwidget.py
    │   ├── serverextension
    │   │   └── __init__.py
    │   ├── kernels
    │   │   ├── wrapperkernel
    │   │   │   ├── __init__.py
    │   │   │   └── usercodeparser.py
    │   │   ├── pysparkkernel
    │   │   │   ├── __init__.py
    │   │   │   ├── kernel.json
    │   │   │   ├── kernel.js
    │   │   │   └── pysparkkernel.py
    │   │   ├── sparkkernel
    │   │   │   ├── __init__.py
    │   │   │   ├── kernel.json
    │   │   │   ├── kernel.js
    │   │   │   └── sparkkernel.py
    │   │   ├── sparkrkernel
    │   │   │   ├── __init__.py
    │   │   │   ├── kernel.json
    │   │   │   ├── kernel.js
    │   │   │   └── sparkrkernel.py
    │   │   └── __init__.py
    │   ├── magics
    │   │   └── __init__.py
    │   └── __init__.py
    ├── setup.cfg
    ├── MANIFEST.in
    ├── requirements.txt
    ├── example_config.json
    └── setup.py
├── autovizwidget
    ├── autovizwidget
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_plotlygraphrenderer.py
    │   │   ├── test_sparkevents.py
    │   │   ├── test_utils.py
    │   │   └── test_encodingwidget.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── events.py
    │   │   └── configuration.py
    │   ├── widget
    │   │   ├── __init__.py
    │   │   ├── invalidencodingerror.py
    │   │   ├── encoding.py
    │   │   └── utils.py
    │   ├── plotlygraphs
    │   │   ├── __init__.py
    │   │   ├── scattergraph.py
    │   │   ├── bargraph.py
    │   │   ├── linegraph.py
    │   │   ├── areagraph.py
    │   │   ├── datagraph.py
    │   │   ├── graphrenderer.py
    │   │   └── piegraph.py
    │   └── __init__.py
    ├── MANIFEST.in
    ├── setup.cfg
    ├── README.md
    ├── requirements.txt
    ├── examples
    │   ├── Capture0.PNG
    │   └── Capture1.PNG
    ├── setup.py
    └── LICENSE.md
├── hdijupyterutils
    ├── hdijupyterutils
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_ipythondisplay.py
    │   │   ├── test_events.py
    │   │   ├── test_filesystemreaderwriter.py
    │   │   ├── test_configuration.py
    │   │   └── test_logger.py
    │   ├── __init__.py
    │   ├── guid.py
    │   ├── constants.py
    │   ├── eventshandler.py
    │   ├── events.py
    │   ├── utils.py
    │   ├── ipythondisplay.py
    │   ├── filehandler.py
    │   ├── filesystemreaderwriter.py
    │   ├── log.py
    │   ├── ipywidgetfactory.py
    │   └── configuration.py
    ├── MANIFEST.in
    ├── setup.cfg
    ├── requirements.txt
    ├── README.md
    ├── setup.py
    └── LICENSE.md
├── screenshots
    ├── help.png
    ├── autoviz.png
    ├── diagram.png
    ├── matplotlib.png
    └── sparkcontext.png
├── examples
    └── images
    │   ├── cleanup.PNG
    │   ├── widget.PNG
    │   ├── addendpoint.PNG
    │   ├── addsession.PNG
    │   └── addsession_s.PNG
├── helm
    ├── charts
    │   └── sparkmagic-0.1.0.tgz
    ├── values.yaml
    ├── templates
    │   ├── jupyter-service.yaml
    │   ├── sparkmagic-service.yaml
    │   ├── sparkmagic-deployment.yaml
    │   └── jupyter-deployment.yaml
    └── Chart.yaml
├── .git-blame-ignore-revs
├── .github
    ├── workflows
    │   ├── lint.yaml
    │   ├── tests.yml
    │   ├── release.yml
    │   ├── publish.yml
    │   └── docker_build.yml
    ├── dependabot.yml
    ├── pull_request_template.md
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── RELEASING.md
├── SECURITY.md
├── docker-compose.yml
├── .bumpversion.cfg
├── pyproject.toml
├── .gitignore
├── Dockerfile.jupyter
├── Dockerfile.spark
├── .vscode
    └── tasks.json
├── LICENSE.md
└── CODE_OF_CONDUCT.md


/config.json:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/auth/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/widget/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/controllerwidget/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/serverextension/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autovizwidget/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/wrapperkernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hdijupyterutils/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | 
3 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.23.0"
2 | 


--------------------------------------------------------------------------------
/autovizwidget/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/sparkmagic/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.23.0"
2 | 


--------------------------------------------------------------------------------
/hdijupyterutils/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/pysparkkernel/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1"
2 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkkernel/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1"
2 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkrkernel/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1"
2 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/__init__.py:
--------------------------------------------------------------------------------
1 | from sparkmagic.kernels.kernelmagics import *
2 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/magics/__init__.py:
--------------------------------------------------------------------------------
1 | from sparkmagic.magics.remotesparkmagics import *
2 | 


--------------------------------------------------------------------------------
/autovizwidget/README.md:
--------------------------------------------------------------------------------
1 | # autovizwidget
2 | 
3 | An Auto-Visualization library for pandas dataframes


--------------------------------------------------------------------------------
/screenshots/help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/help.png


--------------------------------------------------------------------------------
/sparkmagic/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include sparkmagic/kernels *.js *.json
2 | include requirements.txt
3 | 


--------------------------------------------------------------------------------
/screenshots/autoviz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/autoviz.png


--------------------------------------------------------------------------------
/screenshots/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/diagram.png


--------------------------------------------------------------------------------
/examples/images/cleanup.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/cleanup.PNG


--------------------------------------------------------------------------------
/examples/images/widget.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/widget.PNG


--------------------------------------------------------------------------------
/screenshots/matplotlib.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/matplotlib.png


--------------------------------------------------------------------------------
/screenshots/sparkcontext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/sparkcontext.png


--------------------------------------------------------------------------------
/autovizwidget/requirements.txt:
--------------------------------------------------------------------------------
1 | plotly>=3
2 | ipywidgets>5.0.0
3 | hdijupyterutils>=0.6
4 | notebook>=4.2
5 | pandas<3.0.0
6 | 


--------------------------------------------------------------------------------
/examples/images/addendpoint.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/addendpoint.PNG


--------------------------------------------------------------------------------
/examples/images/addsession.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/addsession.PNG


--------------------------------------------------------------------------------
/examples/images/addsession_s.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/addsession_s.PNG


--------------------------------------------------------------------------------
/helm/charts/sparkmagic-0.1.0.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/helm/charts/sparkmagic-0.1.0.tgz


--------------------------------------------------------------------------------
/autovizwidget/examples/Capture0.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/autovizwidget/examples/Capture0.PNG


--------------------------------------------------------------------------------
/autovizwidget/examples/Capture1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/autovizwidget/examples/Capture1.PNG


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # .git-blame-ignore-revs
2 | # Re-formatted entire code base with black
3 | 7ebf0753485c931db4135953dcd0864b4d089ed5
4 | 
5 | 


--------------------------------------------------------------------------------
/hdijupyterutils/requirements.txt:
--------------------------------------------------------------------------------
1 | ipython>=4.1.2
2 | ipywidgets>5.0.0
3 | ipykernel>=4.2.2
4 | jupyter>=1
5 | pandas<3.0.0
6 | numpy>=1.16.5
7 | notebook>=4.2
8 | 


--------------------------------------------------------------------------------
/hdijupyterutils/README.md:
--------------------------------------------------------------------------------
1 | # Hdi Jupyter Utils
2 | 
3 | Project with useful classes/methods for all projects created by the HDInsight team at Microsoft around Jupyter.
4 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/guid.py:
--------------------------------------------------------------------------------
1 | from .utils import generate_uuid
2 | 
3 | 
4 | class ObjectWithGuid(object):
5 |     def __init__(self):
6 |         self.guid = generate_uuid()
7 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/utils/constants.py:
--------------------------------------------------------------------------------
1 | HOME_PATH = "~/.autovizwidget"
2 | CONFIG_FILE = "config.json"
3 | 
4 | GRAPH_RENDER_EVENT = "notebookGraphRender"
5 | GRAPH_TYPE = "GraphType"
6 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   lint:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: psf/black@stable
11 | 


--------------------------------------------------------------------------------
/sparkmagic/requirements.txt:
--------------------------------------------------------------------------------
 1 | hdijupyterutils>=0.6
 2 | autovizwidget>=0.6
 3 | ipython>=4.1.2
 4 | pandas<3.0.0
 5 | numpy
 6 | requests
 7 | ipykernel>=4.2.2
 8 | ipywidgets>5.0.0
 9 | notebook>=4.2
10 | tornado>=4
11 | requests_kerberos>=0.8.0
12 | nest_asyncio>1.5.5
13 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/widget/invalidencodingerror.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2015  aggftw@gmail.com
2 | # Distributed under the terms of the Modified BSD License.
3 | 
4 | 
5 | class InvalidEncodingError(Exception):
6 |     """An exception for encodings you can't work with."""
7 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkkernel/kernel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "argv": [
 3 |     "python",
 4 |     "-m",
 5 |     "sparkmagic.kernels.sparkkernel.sparkkernel",
 6 |     "-f",
 7 |     "{connection_file}"
 8 |   ],
 9 |   "display_name": "Spark",
10 |   "language": "scala"
11 | }
12 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkrkernel/kernel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "argv": [
 3 |     "python",
 4 |     "-m",
 5 |     "sparkmagic.kernels.sparkrkernel.sparkrkernel",
 6 |     "-f",
 7 |     "{connection_file}"
 8 |   ],
 9 |   "display_name": "SparkR",
10 |   "language": "r"
11 | }
12 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/constants.py:
--------------------------------------------------------------------------------
1 | LOGGING_CONFIG_CLASS_NAME = "hdijupyterutils.filehandler.MagicsFileHandler"
2 | 
3 | EVENTS_HANDLER_CLASS_NAME = "hdijupyterutils.eventshandler.EventsHandler"
4 | INSTANCE_ID = "InstanceId"
5 | TIMESTAMP = "Timestamp"
6 | EVENT_NAME = "EventName"
7 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/pysparkkernel/kernel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "argv": [
 3 |     "python",
 4 |     "-m",
 5 |     "sparkmagic.kernels.pysparkkernel.pysparkkernel",
 6 |     "-f",
 7 |     "{connection_file}"
 8 |   ],
 9 |   "display_name": "PySpark",
10 |   "language": "python"
11 | }
12 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/pysparkkernel/kernel.js:
--------------------------------------------------------------------------------
1 | define(['base/js/namespace'], function(IPython){
2 |         var onload = function() {
3 |             IPython.CodeCell.config_defaults.highlight_modes['magic_text/x-sql'] = {'reg':[/^%%sql/]};
4 |         }
5 | 
6 |         return { onload: onload }
7 |     })


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkrkernel/kernel.js:
--------------------------------------------------------------------------------
 1 | define(["base/js/namespace"], function(IPython) {
 2 |   var onload = function() {
 3 |     IPython.CodeCell.config_defaults.highlight_modes["magic_text/x-sql"] = {
 4 |       reg: [/^%%sql/]
 5 |     };
 6 |   };
 7 | 
 8 |   return { onload: onload };
 9 | });
10 | 


--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
1 | # How to release
2 | 
3 | 1. Make sure `CHANGELOG.md` is up-to-date with all changes since last release and available on `master` branch.
4 | 2. Go to Actions -> Release workflow -> Run workflow -> Select `patch|minor|major` depending of the changes you want to release, set `master` branch as desired branch
5 | 


--------------------------------------------------------------------------------
/helm/values.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | jupyter:
 3 |   replicas: 1
 4 |   image:
 5 |     repository: ghcr.io/ljubon/sparkmagic/jupyter
 6 |     tag: latest
 7 |   service:
 8 |     port: 8888
 9 | 
10 | sparkmagic:
11 |   image:
12 |     repository: ghcr.io/ljubon/sparkmagic/sparkmagic-livy
13 |     tag: latest
14 |   service:
15 |     port: 8998
16 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = "0.23.0"
 2 | 
 3 | from sparkmagic.serverextension.handlers import (
 4 |     load_jupyter_server_extension,
 5 | )  # noqa: #501
 6 | 
 7 | 
 8 | def _jupyter_server_extension_paths():
 9 |     return [{"module": "sparkmagic"}]
10 | 
11 | 
12 | def _jupyter_nbextension_paths():
13 |     return []
14 | 


--------------------------------------------------------------------------------
/helm/templates/jupyter-service.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: jupyter-notebook
 6 | spec:
 7 |   selector:
 8 |     app: jupyter-notebook
 9 |   ports:
10 |     - protocol: TCP
11 |       port: {{ .Values.jupyter.service.port }}
12 |       targetPort: {{ .Values.jupyter.service.port }}
13 |   type: LoadBalancer
14 | 


--------------------------------------------------------------------------------
/helm/templates/sparkmagic-service.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: sparkmagic
 6 | spec:
 7 |   selector:
 8 |     app: sparkmagic
 9 |   ports:
10 |     - protocol: TCP
11 |       port: {{ .Values.sparkmagic.service.port }}
12 |       targetPort: {{ .Values.sparkmagic.service.port }}
13 |   type: LoadBalancer
14 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "github-actions"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "monthly"
 7 | 
 8 |   # Additional configuration for Python using pip
 9 |   - package-ecosystem: "pip"
10 |     directory: "/"
11 |     schedule:
12 |       interval: "daily"
13 |     open-pull-requests-limit: 10
14 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/scattergraph.py:
--------------------------------------------------------------------------------
 1 | from plotly.graph_objs import Scatter
 2 | 
 3 | from .graphbase import GraphBase
 4 | 
 5 | 
 6 | class ScatterGraph(GraphBase):
 7 |     def _get_data(self, df, encoding):
 8 |         x_values, y_values = GraphBase._get_x_y_values(df, encoding)
 9 |         return [Scatter(x=x_values, y=y_values, mode="markers")]
10 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Reporting a Vulnerability
 4 | 
 5 | All IPython and Jupyter security are handled via security@ipython.org.
 6 | You can find more information on the Jupyter website. https://jupyter.org/security
 7 | 
 8 | ## Tidelift
 9 | 
10 | You can also report security concerns for autovizwidget,sparkmagic,autovizwidget via the [Tidelift platform](https://tidelift.com/security).
11 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkkernel/kernel.js:
--------------------------------------------------------------------------------
 1 | define(["base/js/namespace"], function(IPython) {
 2 |   var onload = function() {
 3 |     IPython.CodeCell.config_defaults.highlight_modes["magic_text/x-sql"] = {
 4 |       reg: [/^%%sql/]
 5 |     };
 6 |     IPython.CodeCell.config_defaults.highlight_modes["magic_text/x-python"] = {
 7 |       reg: [/^%%local/]
 8 |     };
 9 |   };
10 | 
11 |   return { onload: onload };
12 | });
13 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/bargraph.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from plotly.graph_objs import Bar
 5 | 
 6 | from .graphbase import GraphBase
 7 | 
 8 | 
 9 | class BarGraph(GraphBase):
10 |     def _get_data(self, df, encoding):
11 |         x_values, y_values = GraphBase._get_x_y_values(df, encoding)
12 |         return [Bar(x=x_values, y=y_values)]
13 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/linegraph.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from plotly.graph_objs import Scatter
 5 | 
 6 | from .graphbase import GraphBase
 7 | 
 8 | 
 9 | class LineGraph(GraphBase):
10 |     def _get_data(self, df, encoding):
11 |         x_values, y_values = GraphBase._get_x_y_values(df, encoding)
12 |         return [Scatter(x=x_values, y=y_values)]
13 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/utils/sparklogger.py:
--------------------------------------------------------------------------------
 1 | # Distributed under the terms of the Modified BSD License.
 2 | from hdijupyterutils.log import Log
 3 | 
 4 | import sparkmagic.utils.configuration as conf
 5 | from sparkmagic.utils.constants import MAGICS_LOGGER_NAME
 6 | 
 7 | 
 8 | class SparkLog(Log):
 9 |     def __init__(self, class_name):
10 |         super(SparkLog, self).__init__(
11 |             MAGICS_LOGGER_NAME, conf.logging_config(), class_name
12 |         )
13 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/areagraph.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from plotly.graph_objs import Scatter
 5 | 
 6 | from .graphbase import GraphBase
 7 | 
 8 | 
 9 | class AreaGraph(GraphBase):
10 |     def _get_data(self, df, encoding):
11 |         x_values, y_values = GraphBase._get_x_y_values(df, encoding)
12 |         return [Scatter(x=x_values, y=y_values, fill="tonexty")]
13 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   spark:
 4 |     image: jupyter/sparkmagic-livy
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile.spark
 8 |     hostname: spark
 9 |     ports:
10 |       - "8998:8998"
11 |   jupyter:
12 |     image: jupyter/sparkmagic
13 |     build:
14 |       context: .
15 |       dockerfile: Dockerfile.jupyter
16 |       args:
17 |         dev_mode: "false"
18 |     links:
19 |       - spark
20 |     ports:
21 |       - "8888:8888"
22 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/eventshandler.py:
--------------------------------------------------------------------------------
 1 | from .log import Log
 2 | 
 3 | 
 4 | class EventsHandler(object):
 5 |     def __init__(self, logger_name, logging_config):
 6 |         self.logger = Log(logger_name, logging_config, "EventsHandler")
 7 | 
 8 |     def handle_event(self, kwargs_list):
 9 |         """
10 |         Storing the Event details using the logger.
11 |         """
12 |         event_line = ",".join("{}: {}".format(key, arg) for key, arg in kwargs_list)
13 |         self.logger.info(event_line)
14 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ### Description
 2 | <!-- FILL IN -->
 3 | 
 4 | ### Checklist
 5 | - [ ] Wrote a description of my changes above 
 6 | - [ ] Formatted my code with [`black`](https://black.readthedocs.io/en/stable/index.html)
 7 | - [ ] Added a bullet point for my changes to the top of the `CHANGELOG.md` file
 8 | - [ ] Added or modified unit tests to reflect my changes
 9 | - [ ] Manually tested with a notebook
10 | - [ ] If adding a feature, there is an example notebook and/or documentation in the `README.md` file
11 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/events.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import importlib
 3 | 
 4 | from hdijupyterutils.constants import INSTANCE_ID
 5 | from hdijupyterutils.utils import get_instance_id
 6 | 
 7 | 
 8 | class Events(object):
 9 |     def __init__(self, handler):
10 |         self.handler = handler
11 | 
12 |     @staticmethod
13 |     def get_utc_date_time():
14 |         return datetime.utcnow()
15 | 
16 |     def send_to_handler(self, kwargs_list):
17 |         kwargs_list = [(INSTANCE_ID, get_instance_id())] + kwargs_list
18 | 
19 |         assert len(kwargs_list) <= 12
20 | 
21 |         self.handler.handle_event(kwargs_list)
22 | 


--------------------------------------------------------------------------------
/helm/templates/sparkmagic-deployment.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   name: sparkmagic
 6 | spec:
 7 |   replicas: {{ .Values.sparkmagic.replicas }}
 8 |   selector:
 9 |     matchLabels:
10 |       app: sparkmagic
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: sparkmagic
15 |     spec:
16 |       containers:
17 |         - name: sparkmagic
18 |           image: "{{ .Values.sparkmagic.image.repository }}:{{ .Values.sparkmagic.image.tag }}"
19 |           ports:
20 |             - name: http
21 |               containerPort: {{ .Values.sparkmagic.service.port }}
22 |               protocol: TCP


--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.23.0
 3 | commit = True
 4 | tag = True
 5 | tag_name = {new_version}
 6 | message = "Bump version: {current_version} → {new_version}"
 7 | 
 8 | [bumpversion:file:sparkmagic/sparkmagic/__init__.py]
 9 | search = __version__ = "{current_version}"
10 | replace = __version__ = "{new_version}"
11 | 
12 | [bumpversion:file:autovizwidget/autovizwidget/__init__.py]
13 | search = __version__ = "{current_version}"
14 | replace = __version__ = "{new_version}"
15 | 
16 | [bumpversion:file:hdijupyterutils/hdijupyterutils/__init__.py]
17 | search = __version__ = "{current_version}"
18 | replace = __version__ = "{new_version}"
19 | 


--------------------------------------------------------------------------------
/helm/templates/jupyter-deployment.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   name: jupyter-notebook
 6 | spec:
 7 |   replicas: {{ .Values.sparkmagic.replicas }}
 8 |   selector:
 9 |     matchLabels:
10 |       app: jupyter-notebook
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: jupyter-notebook
15 |     spec:
16 |       containers:
17 |         - name: jupyter-notebook
18 |           image: "{{ .Values.jupyter.image.repository }}:{{ .Values.jupyter.image.tag }}"
19 |           ports:
20 |             - name: http
21 |               containerPort: {{ .Values.jupyter.service.port }}
22 |               protocol: TCP
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior.
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Screenshots**
20 | If applicable, add screenshots to help explain your problem.
21 | 
22 | **Versions:**
23 |  - SparkMagic
24 |  - Livy (if you know it)
25 |  - Spark
26 | 
27 | **Additional context**
28 | Add any other context about the problem here.
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/utils.py:
--------------------------------------------------------------------------------
 1 | # Distributed under the terms of the Modified BSD License.
 2 | 
 3 | import os
 4 | import uuid
 5 | 
 6 | 
 7 | first_run = True
 8 | instance_id = None
 9 | 
10 | 
11 | def expand_path(path):
12 |     return os.path.expanduser(path)
13 | 
14 | 
15 | def join_paths(p1, p2):
16 |     return os.path.join(p1, p2)
17 | 
18 | 
19 | def generate_uuid():
20 |     return uuid.uuid4()
21 | 
22 | 
23 | def get_instance_id():
24 |     global first_run, instance_id
25 | 
26 |     if first_run:
27 |         first_run = False
28 |         instance_id = generate_uuid()
29 | 
30 |     if instance_id is None:
31 |         raise ValueError("Tried to return empty instance ID.")
32 | 
33 |     return instance_id
34 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/tests/test_ipythondisplay.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | from hdijupyterutils.ipythondisplay import IpythonDisplay
 3 | from mock import MagicMock
 4 | import sys
 5 | 
 6 | 
 7 | def test_stdout_flush():
 8 |     ipython_shell = MagicMock()
 9 |     ipython_display = IpythonDisplay()
10 |     ipython_display._ipython_shell = ipython_shell
11 |     sys.stdout = MagicMock()
12 | 
13 |     ipython_display.write("Testing Stdout Flush è")
14 |     assert sys.stdout.flush.call_count == 1
15 | 
16 | 
17 | def test_stderr_flush():
18 |     ipython_shell = MagicMock()
19 |     ipython_display = IpythonDisplay()
20 |     ipython_display._ipython_shell = ipython_shell
21 |     sys.stderr = MagicMock()
22 | 
23 |     ipython_display.send_error("Testing Stderr Flush è")
24 |     assert sys.stderr.flush.call_count == 1
25 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "development"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Devin Stein <devstein@seas.upenn.edu>"]
 6 | readme = "README.md"
 7 | 
 8 | packages = [
 9 |     { include = "hdijupyterutils", from = "./hdijupyterutils" },
10 |     { include = "autovizwidget", from = "./autovizwidget" },
11 |     { include = "sparkmagic", from = "./sparkmagic" },
12 | ]
13 | 
14 | [tool.poetry.dependencies]
15 | python = "^3.8"
16 | hdijupyterutils = {path = "./hdijupyterutils", develop = true }
17 | autovizwidget = {path = "./autovizwidget", develop = true }
18 | sparkmagic = {path = "./sparkmagic", develop = true }
19 | numpy = "^1.24.4"
20 | pandas = "^2.0.3"
21 | pytest = "^8.3.3"
22 | mock = "^5.1.0"
23 | 
24 | 
25 | [build-system]
26 | requires = ["poetry-core"]
27 | build-backend = "poetry.core.masonry.api"
28 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/ipythondisplay.py:
--------------------------------------------------------------------------------
 1 | from IPython.display import display, HTML
 2 | from IPython import get_ipython
 3 | import sys
 4 | 
 5 | 
 6 | class IpythonDisplay(object):
 7 |     def __init__(self):
 8 |         self._ipython_shell = get_ipython()
 9 | 
10 |     def display(self, to_display):
11 |         display(to_display)
12 | 
13 |     def html(self, to_display):
14 |         self.display(HTML(to_display))
15 | 
16 |     def stderr_flush(self):
17 |         sys.stderr.flush()
18 | 
19 |     def stdout_flush(self):
20 |         sys.stdout.flush()
21 | 
22 |     def write(self, msg):
23 |         sys.stdout.write(msg)
24 |         self.stdout_flush()
25 | 
26 |     def writeln(self, msg):
27 |         self.write("{}\n".format(msg))
28 | 
29 |     def send_error(self, error):
30 |         sys.stderr.write("{}\n".format(error))
31 |         self.stderr_flush()
32 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/linearretrypolicy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | 
 5 | class LinearRetryPolicy(object):
 6 |     """Retry policy that always returns the same number of seconds to sleep between calls,
 7 |     takes all status codes 500 or above to be retriable, and retries a given maximum number of times.
 8 |     """
 9 | 
10 |     def __init__(self, seconds_to_sleep, max_retries):
11 |         self._seconds_to_sleep = seconds_to_sleep
12 |         self.max_retries = max_retries
13 | 
14 |     def should_retry(self, status_code, error, retry_count):
15 |         if None in (status_code, retry_count):
16 |             return False
17 |         return (status_code >= 500 and retry_count <= self.max_retries) or error
18 | 
19 |     def seconds_to_sleep(self, retry_count):
20 |         return self._seconds_to_sleep
21 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/utils/events.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import importlib
 3 | from hdijupyterutils.constants import EVENT_NAME, TIMESTAMP
 4 | from hdijupyterutils.events import Events
 5 | 
 6 | from .constants import GRAPH_TYPE, GRAPH_RENDER_EVENT
 7 | from . import configuration as conf
 8 | 
 9 | 
10 | class AutoVizEvents(Events):
11 |     def __init__(self):
12 |         handler = conf.events_handler()
13 |         self.emit = handler is not None
14 |         super(AutoVizEvents, self).__init__(handler)
15 | 
16 |     def emit_graph_render_event(self, graph_type):
17 |         event_name = GRAPH_RENDER_EVENT
18 |         time_stamp = self.get_utc_date_time()
19 | 
20 |         kwargs_list = [
21 |             (EVENT_NAME, event_name),
22 |             (TIMESTAMP, time_stamp),
23 |             (GRAPH_TYPE, graph_type),
24 |         ]
25 | 
26 |         if self.emit:
27 |             self.send_to_handler(kwargs_list)
28 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/utils/configuration.py:
--------------------------------------------------------------------------------
 1 | # Distributed under the terms of the Modified BSD License.
 2 | from hdijupyterutils.constants import (
 3 |     EVENTS_HANDLER_CLASS_NAME,
 4 |     LOGGING_CONFIG_CLASS_NAME,
 5 | )
 6 | from hdijupyterutils.utils import join_paths
 7 | from hdijupyterutils.configuration import override as _override
 8 | from hdijupyterutils.configuration import override_all as _override_all
 9 | from hdijupyterutils.configuration import with_override
10 | 
11 | from .constants import HOME_PATH, CONFIG_FILE
12 | 
13 | 
14 | d = {}
15 | path = join_paths(HOME_PATH, CONFIG_FILE)
16 | 
17 | 
18 | def override(config, value):
19 |     _override(d, path, config, value)
20 | 
21 | 
22 | def override_all(obj):
23 |     _override_all(d, obj)
24 | 
25 | 
26 | _with_override = with_override(d, path)
27 | 
28 | # Configs
29 | 
30 | 
31 | @_with_override
32 | def events_handler():
33 |     return None
34 | 
35 | 
36 | @_with_override
37 | def max_slices_pie_graph():
38 |     return 100
39 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/endpoint.py:
--------------------------------------------------------------------------------
 1 | from .exceptions import BadUserDataException
 2 | 
 3 | 
 4 | class Endpoint(object):
 5 |     def __init__(self, url, auth, implicitly_added=False):
 6 |         if not url:
 7 |             raise BadUserDataException("URL must not be empty")
 8 | 
 9 |         self.url = url.rstrip("/")
10 |         self.auth = auth
11 |         # implicitly_added is set to True only if the endpoint wasn't configured manually by the user through
12 |         # a widget, but was instead implicitly defined as an endpoint to a wrapper kernel in the configuration
13 |         # JSON file.
14 |         self.implicitly_added = implicitly_added
15 | 
16 |     def __eq__(self, other):
17 |         if type(other) is not Endpoint:
18 |             return False
19 |         return self.url == other.url and self.auth == other.auth
20 | 
21 |     def __hash__(self):
22 |         return hash((self.url, self.auth))
23 | 
24 |     def __ne__(self, other):
25 |         return not self == other
26 | 
27 |     def __str__(self):
28 |         return "Endpoint({})".format(self.url)
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # Notebook Checkpoints
60 | .ipynb_checkpoints
61 | 
62 | .idea/*
63 | MANIFEST
64 | .vscode/*
65 | !.vscode/tasks.json
66 | 
67 | venv/*
68 | */.idea/*
69 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/filehandler.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .utils import join_paths, get_instance_id
 4 | from .filesystemreaderwriter import FileSystemReaderWriter
 5 | 
 6 | 
 7 | class MagicsFileHandler(logging.FileHandler):
 8 |     """The default logging handler used by the magics; this behavior can be overridden by modifying the config file"""
 9 | 
10 |     def __init__(self, **kwargs):
11 |         # Simply invokes the behavior of the superclass, but sets the filename keyword argument if it's not already set.
12 |         if "filename" in kwargs:
13 |             super(MagicsFileHandler, self).__init__(**kwargs)
14 |         else:
15 |             magics_home_path = kwargs.pop("home_path")
16 |             logs_folder_name = "logs"
17 |             log_file_name = "log_{}.log".format(get_instance_id())
18 |             directory = FileSystemReaderWriter(
19 |                 join_paths(magics_home_path, logs_folder_name)
20 |             )
21 |             directory.ensure_path_exists()
22 |             super(MagicsFileHandler, self).__init__(
23 |                 filename=join_paths(directory.path, log_file_name), **kwargs
24 |             )
25 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/auth/kerberos.py:
--------------------------------------------------------------------------------
 1 | """Class for implementing a Kerberos authenticator for SparkMagic"""
 2 | 
 3 | from requests_kerberos import HTTPKerberosAuth
 4 | import sparkmagic.utils.configuration as conf
 5 | from .customauth import Authenticator
 6 | 
 7 | 
 8 | class Kerberos(HTTPKerberosAuth, Authenticator):
 9 |     """Kerberos authenticator for SparkMagic"""
10 | 
11 |     def __init__(self, parsed_attributes=None):
12 |         """Initializes the Authenticator with the attributes in the attributes
13 |         parsed from a %spark magic command if applicable, or with default values
14 |         otherwise.
15 | 
16 |         Args:
17 |             self,
18 |             parsed_attributes (IPython.core.magics.namespace): The namespace object that
19 |             is created from parsing %spark magic command.
20 |         """
21 |         HTTPKerberosAuth.__init__(self, **conf.kerberos_auth_configuration())
22 |         Authenticator.__init__(self, parsed_attributes)
23 | 
24 |     def __call__(self, request):
25 |         return HTTPKerberosAuth.__call__(self, request)
26 | 
27 |     def __hash__(self):
28 |         return hash((self.url, self.__class__.__name__))
29 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_endpoint.py:
--------------------------------------------------------------------------------
 1 | from sparkmagic.livyclientlib.exceptions import BadUserDataException
 2 | from sparkmagic.livyclientlib.endpoint import Endpoint
 3 | from sparkmagic.auth.basic import Basic
 4 | from sparkmagic.auth.kerberos import Kerberos
 5 | 
 6 | 
 7 | def test_equality():
 8 |     basic_auth1 = Basic()
 9 |     basic_auth2 = Basic()
10 |     kerberos_auth1 = Kerberos()
11 |     kerberos_auth2 = Kerberos()
12 |     assert Endpoint("http://url.com", basic_auth1) == Endpoint(
13 |         "http://url.com", basic_auth2
14 |     )
15 |     assert Endpoint("http://url.com", kerberos_auth1) == Endpoint(
16 |         "http://url.com", kerberos_auth2
17 |     )
18 | 
19 | 
20 | def test_inequality():
21 |     basic_auth1 = Basic()
22 |     basic_auth2 = Basic()
23 |     basic_auth1.username = "user"
24 |     basic_auth2.username = "different_user"
25 |     assert Endpoint("http://url.com", basic_auth1) != Endpoint(
26 |         "http://url.com", basic_auth2
27 |     )
28 | 
29 | 
30 | def test_invalid_url():
31 |     basic_auth = Basic()
32 |     try:
33 |         endpoint = Endpoint(None, basic_auth)
34 |         assert False
35 |     except BadUserDataException:
36 |         assert True
37 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/filesystemreaderwriter.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | import os
 4 | 
 5 | 
 6 | class FileSystemReaderWriter(object):
 7 |     def __init__(self, path):
 8 |         from .utils import expand_path
 9 | 
10 |         assert path is not None
11 |         self.path = expand_path(path)
12 | 
13 |     def ensure_path_exists(self):
14 |         self._ensure_path_exists(self.path)
15 | 
16 |     def ensure_file_exists(self):
17 |         self._ensure_path_exists(os.path.dirname(self.path))
18 |         if not os.path.exists(self.path):
19 |             open(self.path, "w").close()
20 | 
21 |     def read_lines(self):
22 |         if os.path.isfile(self.path):
23 |             with open(self.path, "r") as f:
24 |                 return f.readlines()
25 |         else:
26 |             return ""
27 | 
28 |     def overwrite_with_line(self, line):
29 |         with open(self.path, "w+") as f:
30 |             f.writelines(line)
31 | 
32 |     def _ensure_path_exists(self, path):
33 |         try:
34 |             os.makedirs(path)
35 |         except OSError:
36 |             if not os.path.isdir(path):
37 |                 raise
38 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/tests/test_events.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from mock import MagicMock
 3 | 
 4 | from hdijupyterutils.events import Events
 5 | from hdijupyterutils.utils import generate_uuid
 6 | from hdijupyterutils.constants import INSTANCE_ID, TIMESTAMP
 7 | from hdijupyterutils.utils import get_instance_id
 8 | 
 9 | 
10 | def setup_function():
11 |     global events, guid1, guid2, guid3, time_stamp
12 | 
13 |     events = Events(MagicMock())
14 |     events.get_utc_date_time = MagicMock()
15 |     time_stamp = events.get_utc_date_time()
16 |     guid1 = generate_uuid()
17 |     guid2 = generate_uuid()
18 |     guid3 = generate_uuid()
19 | 
20 | 
21 | def teardown_function():
22 |     pass
23 | 
24 | 
25 | def test_send_to_handler():
26 |     kwargs_list = [(TIMESTAMP, time_stamp)]
27 |     expected_kwargs_list = [(INSTANCE_ID, get_instance_id())] + kwargs_list
28 | 
29 |     events.send_to_handler(kwargs_list)
30 | 
31 |     events.handler.handle_event.assert_called_once_with(expected_kwargs_list)
32 | 
33 | 
34 | def test_send_to_handler_asserts_less_than_12():
35 |     with pytest.raises(AssertionError):
36 |         kwargs_list = [(TIMESTAMP, time_stamp)] * 13
37 |         events.send_to_handler(kwargs_list)
38 |         assert False
39 | 


--------------------------------------------------------------------------------
/helm/Chart.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v2
 3 | name: sparkmagic
 4 | description: A Helm chart for Kubernetes
 5 | 
 6 | # A chart can be either an 'application' or a 'library' chart.
 7 | #
 8 | # Application charts are a collection of templates that can be packaged into versioned archives
 9 | # to be deployed.
10 | #
11 | # Library charts provide useful utilities or functions for the chart developer. They're included as
12 | # a dependency of application charts to inject those utilities and functions into the rendering
13 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
14 | type: application
15 | 
16 | # This is the chart version. This version number should be incremented each time you make changes
17 | # to the chart and its templates, including the app version.
18 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
19 | version: 0.1.0
20 | 
21 | # This is the version number of the application being deployed. This version number should be
22 | # incremented each time you make changes to the application. Versions are not expected to
23 | # follow Semantic Versioning. They should reflect the version the application is using.
24 | # It is recommended to use it with quotes.
25 | appVersion: "0.21.0"
26 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkrkernel/sparkrkernel.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | from sparkmagic.utils.constants import LANG_R
 4 | from sparkmagic.kernels.wrapperkernel.sparkkernelbase import SparkKernelBase
 5 | 
 6 | 
 7 | class SparkRKernel(SparkKernelBase):
 8 |     def __init__(self, **kwargs):
 9 |         implementation = "SparkR"
10 |         implementation_version = "1.0"
11 |         language = LANG_R
12 |         language_version = "0.1"
13 |         language_info = {
14 |             "name": "sparkR",
15 |             "mimetype": "text/x-rsrc",
16 |             "codemirror_mode": "text/x-rsrc",
17 |             "file_extension": ".r",
18 |             "pygments_lexer": "r",
19 |         }
20 | 
21 |         session_language = LANG_R
22 | 
23 |         super(SparkRKernel, self).__init__(
24 |             implementation,
25 |             implementation_version,
26 |             language,
27 |             language_version,
28 |             language_info,
29 |             session_language,
30 |             **kwargs
31 |         )
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     from ipykernel.kernelapp import IPKernelApp
36 | 
37 |     IPKernelApp.launch_instance(kernel_class=SparkRKernel)
38 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/sparkkernel/sparkkernel.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | from sparkmagic.utils.constants import LANG_SCALA
 4 | from sparkmagic.kernels.wrapperkernel.sparkkernelbase import SparkKernelBase
 5 | 
 6 | 
 7 | class SparkKernel(SparkKernelBase):
 8 |     def __init__(self, **kwargs):
 9 |         implementation = "Spark"
10 |         implementation_version = "1.0"
11 |         language = LANG_SCALA
12 |         language_version = "0.1"
13 |         language_info = {
14 |             "name": "scala",
15 |             "mimetype": "text/x-scala",
16 |             "codemirror_mode": "text/x-scala",
17 |             "file_extension": ".sc",
18 |             "pygments_lexer": "scala",
19 |         }
20 | 
21 |         session_language = LANG_SCALA
22 | 
23 |         super(SparkKernel, self).__init__(
24 |             implementation,
25 |             implementation_version,
26 |             language,
27 |             language_version,
28 |             language_info,
29 |             session_language,
30 |             **kwargs
31 |         )
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     from ipykernel.kernelapp import IPKernelApp
36 | 
37 |     IPKernelApp.launch_instance(kernel_class=SparkKernel)
38 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/pysparkkernel/pysparkkernel.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | from sparkmagic.utils.constants import LANG_PYTHON
 4 | from sparkmagic.kernels.wrapperkernel.sparkkernelbase import SparkKernelBase
 5 | 
 6 | 
 7 | class PySparkKernel(SparkKernelBase):
 8 |     def __init__(self, **kwargs):
 9 |         implementation = "PySpark"
10 |         implementation_version = "1.0"
11 |         language = LANG_PYTHON
12 |         language_version = "0.1"
13 |         language_info = {
14 |             "name": "pyspark",
15 |             "mimetype": "text/x-python",
16 |             "codemirror_mode": {"name": "python", "version": 3},
17 |             "file_extension": ".py",
18 |             "pygments_lexer": "python3",
19 |         }
20 | 
21 |         session_language = LANG_PYTHON
22 | 
23 |         super(PySparkKernel, self).__init__(
24 |             implementation,
25 |             implementation_version,
26 |             language,
27 |             language_version,
28 |             language_info,
29 |             session_language,
30 |             **kwargs
31 |         )
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     from ipykernel.kernelapp import IPKernelApp
36 | 
37 |     IPKernelApp.launch_instance(kernel_class=PySparkKernel)
38 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Run Unit Tests
 2 | 
 3 | on:
 4 |   push: {}
 5 |   pull_request: {}
 6 |   schedule:
 7 |     # Run daily
 8 |     - cron: "6 4 * * *"
 9 |   workflow_call:
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |       - name: Set up Python ${{ matrix.python-version }}
21 |         uses: actions/setup-python@v5
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 |       - name: Install system dependencies
25 |         run: |
26 |           sudo apt-get install -y libkrb5-dev
27 |       - name: Install package dependencies
28 |         run: |
29 |           python -m pip install --upgrade pip
30 |           pip install pytest mock
31 |           pip install -r hdijupyterutils/requirements.txt -e hdijupyterutils
32 |           pip install -r autovizwidget/requirements.txt -e autovizwidget
33 |           pip install -r sparkmagic/requirements.txt -e sparkmagic
34 |       - name: Run hdijupyterutils tests
35 |         run: |
36 |           pytest hdijupyterutils
37 |       - name: Run autovizwidget tests
38 |         run: |
39 |           pytest autovizwidget
40 |       - name: Run sparkmagic tests
41 |         run: |
42 |           mkdir ~/.sparkmagic
43 |           pytest sparkmagic
44 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/controllerwidget/abstractmenuwidget.py:
--------------------------------------------------------------------------------
 1 | from ipywidgets import Box
 2 | 
 3 | from hdijupyterutils.ipythondisplay import IpythonDisplay
 4 | from hdijupyterutils.ipywidgetfactory import IpyWidgetFactory
 5 | 
 6 | 
 7 | class AbstractMenuWidget(Box):
 8 |     def __init__(
 9 |         self,
10 |         spark_controller,
11 |         ipywidget_factory=None,
12 |         ipython_display=None,
13 |         nested_widget_mode=False,
14 |         testing=False,
15 |         **kwargs
16 |     ):
17 |         kwargs["orientation"] = "vertical"
18 | 
19 |         if not testing:
20 |             super(AbstractMenuWidget, self).__init__((), **kwargs)
21 | 
22 |         self.spark_controller = spark_controller
23 | 
24 |         if ipywidget_factory is None:
25 |             ipywidget_factory = IpyWidgetFactory()
26 |         self.ipywidget_factory = ipywidget_factory
27 | 
28 |         if ipython_display is None:
29 |             ipython_display = IpythonDisplay()
30 |         self.ipython_display = ipython_display
31 | 
32 |         self.children = []
33 | 
34 |         if not nested_widget_mode:
35 |             self._repr_html_()
36 | 
37 |     def _repr_html_(self):
38 |         for child in self.children:
39 |             self.ipython_display.display(child)
40 |         return ""
41 | 
42 |     def hide_all(self):
43 |         for child in self.children:
44 |             child.visible = False
45 | 
46 |     def run(self):
47 |         raise NotImplementedError("Concrete menu widget must define run")
48 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/tests/test_plotlygraphrenderer.py:
--------------------------------------------------------------------------------
 1 | from mock import MagicMock
 2 | 
 3 | from ..plotlygraphs.graphrenderer import GraphRenderer
 4 | from ..widget.encoding import Encoding
 5 | 
 6 | 
 7 | def test_support_all_graph_types():
 8 |     renderer = GraphRenderer()
 9 | 
10 |     for chart_type in Encoding.supported_chart_types:
11 |         graph = renderer._get_graph(chart_type)
12 |         assert graph is not None
13 |         getattr(graph, "render")
14 |         getattr(graph, "display_x")
15 |         getattr(graph, "display_y")
16 |         getattr(graph, "display_logarithmic_x_axis")
17 |         getattr(graph, "display_logarithmic_y_axis")
18 | 
19 | 
20 | def test_display_controls():
21 |     renderer = GraphRenderer()
22 | 
23 |     GraphRenderer.display_x = MagicMock(return_value=True)
24 |     GraphRenderer.display_y = MagicMock(return_value=True)
25 |     assert renderer.display_controls(Encoding.chart_type_line)
26 | 
27 |     GraphRenderer.display_x = MagicMock(return_value=True)
28 |     GraphRenderer.display_y = MagicMock(return_value=False)
29 |     assert renderer.display_controls(Encoding.chart_type_line)
30 | 
31 |     GraphRenderer.display_x = MagicMock(return_value=False)
32 |     GraphRenderer.display_y = MagicMock(return_value=True)
33 |     assert renderer.display_controls(Encoding.chart_type_line)
34 | 
35 |     GraphRenderer.display_x = MagicMock(return_value=False)
36 |     GraphRenderer.display_y = MagicMock(return_value=False)
37 |     assert not renderer.display_controls(Encoding.chart_type_line)
38 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/tests/test_filesystemreaderwriter.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | from hdijupyterutils.filesystemreaderwriter import FileSystemReaderWriter
 4 | 
 5 | 
 6 | def test_read():
 7 |     path = "test"
 8 |     if os.path.isfile(path):
 9 |         os.remove(path)
10 | 
11 |     expected_lines = ["a\n", "b"]
12 |     rw = FileSystemReaderWriter(path)
13 |     with open("test", "w") as f:
14 |         f.writelines(expected_lines)
15 | 
16 |     read_lines = rw.read_lines()
17 |     assert expected_lines == read_lines
18 | 
19 |     os.remove(path)
20 | 
21 | 
22 | def test_write_non_existent_file():
23 |     path = "test"
24 |     if os.path.isfile(path):
25 |         os.remove(path)
26 | 
27 |     expected_line = "hi"
28 | 
29 |     rw = FileSystemReaderWriter(path)
30 |     rw.overwrite_with_line(expected_line)
31 | 
32 |     with open("test", "r") as f:
33 |         lines = f.readlines()
34 |         assert len(lines) == 1
35 |         assert lines[0] == expected_line
36 | 
37 |     os.remove(path)
38 | 
39 | 
40 | def test_overwrite_existent_file():
41 |     path = "test"
42 |     if os.path.isfile(path):
43 |         os.remove(path)
44 | 
45 |     with open("test", "w") as f:
46 |         f.writelines(["ab"])
47 | 
48 |     expected_line = "hi"
49 | 
50 |     rw = FileSystemReaderWriter(path)
51 |     rw.overwrite_with_line(expected_line)
52 | 
53 |     with open("test", "r") as f:
54 |         lines = f.readlines()
55 |         assert len(lines) == 1
56 |         assert lines[0] == expected_line
57 | 
58 |     os.remove(path)
59 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/configurableretrypolicy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from .linearretrypolicy import LinearRetryPolicy
 5 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException
 6 | 
 7 | 
 8 | class ConfigurableRetryPolicy(LinearRetryPolicy):
 9 |     """Retry policy that returns a configurable number of seconds to sleep
10 |     between calls, takes all status codes 500 or above to be retriable, and
11 |     retries a given maximum number of times.
12 | 
13 |     If the retry count exceeds the number of items in the list, last
14 |     item in the list is always returned.
15 |     """
16 | 
17 |     def __init__(self, retry_seconds_to_sleep_list, max_retries):
18 |         super(ConfigurableRetryPolicy, self).__init__(-1, max_retries)
19 | 
20 |         # If user configured to an empty list, let's make this behave as
21 |         # a Linear Retry Policy by assigning a list of 1 element.
22 |         if len(retry_seconds_to_sleep_list) == 0:
23 |             retry_seconds_to_sleep_list = [5]
24 |         elif not all(n > 0 for n in retry_seconds_to_sleep_list):
25 |             raise BadUserConfigurationException(
26 |                 "All items in the list in your config need to be positive for configurable retry policy"
27 |             )
28 | 
29 |         self.retry_seconds_to_sleep_list = retry_seconds_to_sleep_list
30 |         self._max_index = len(self.retry_seconds_to_sleep_list) - 1
31 | 
32 |     def seconds_to_sleep(self, retry_count):
33 |         index = max(retry_count - 1, 0)
34 |         if index > self._max_index:
35 |             index = self._max_index
36 | 
37 |         return self.retry_seconds_to_sleep_list[index]
38 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/tests/test_sparkevents.py:
--------------------------------------------------------------------------------
 1 | from hdijupyterutils.constants import INSTANCE_ID, EVENT_NAME, TIMESTAMP
 2 | from hdijupyterutils.utils import get_instance_id
 3 | from mock import MagicMock
 4 | 
 5 | from autovizwidget.utils.events import AutoVizEvents
 6 | from autovizwidget.utils.constants import GRAPH_RENDER_EVENT, GRAPH_TYPE
 7 | import autovizwidget.utils.configuration as conf
 8 | 
 9 | 
10 | def setup_function():
11 |     global events, time_stamp
12 | 
13 |     events = AutoVizEvents()
14 |     events.handler = MagicMock()
15 |     events.get_utc_date_time = MagicMock()
16 |     time_stamp = events.get_utc_date_time()
17 | 
18 | 
19 | def teardown_function():
20 |     conf.override_all({})
21 | 
22 | 
23 | def test_not_emit_graph_render_event_when_not_registered():
24 |     event_name = GRAPH_RENDER_EVENT
25 |     graph_type = "Bar"
26 | 
27 |     kwargs_list = [
28 |         (INSTANCE_ID, get_instance_id()),
29 |         (EVENT_NAME, event_name),
30 |         (TIMESTAMP, time_stamp),
31 |         (GRAPH_TYPE, graph_type),
32 |     ]
33 | 
34 |     events.emit_graph_render_event(graph_type)
35 | 
36 |     events.get_utc_date_time.assert_called_with()
37 |     assert not events.handler.handle_event.called
38 | 
39 | 
40 | def test_emit_graph_render_event_when_registered():
41 |     conf.override(conf.events_handler.__name__, events.handler)
42 |     event_name = GRAPH_RENDER_EVENT
43 |     graph_type = "Bar"
44 | 
45 |     kwargs_list = [
46 |         (INSTANCE_ID, get_instance_id()),
47 |         (EVENT_NAME, event_name),
48 |         (TIMESTAMP, time_stamp),
49 |         (GRAPH_TYPE, graph_type),
50 |     ]
51 | 
52 |     events.emit_graph_render_event(graph_type)
53 | 
54 |     events.get_utc_date_time.assert_called_with()
55 |     assert not events.handler.handle_event.called
56 | 


--------------------------------------------------------------------------------
/Dockerfile.jupyter:
--------------------------------------------------------------------------------
 1 | FROM jupyter/base-notebook
 2 | 
 3 | ARG dev_mode=false
 4 | 
 5 | USER root
 6 | 
 7 | # This is needed because requests-kerberos fails to install on debian due to missing linux headers
 8 | RUN conda install requests-kerberos -y
 9 | 
10 | USER $NB_USER
11 | 
12 | RUN pip install --upgrade pip
13 | RUN pip install --upgrade --ignore-installed setuptools
14 | 
15 | COPY examples /home/jovyan/work
16 | 
17 | # Install sparkmagic - if DEV_MODE is set, use the one in the host directory.
18 | # Otherwise, just install from pip.
19 | COPY hdijupyterutils hdijupyterutils/
20 | COPY autovizwidget autovizwidget/
21 | COPY sparkmagic sparkmagic/
22 | 
23 | USER root
24 | RUN chown -R $NB_USER .
25 | 
26 | USER $NB_USER
27 | RUN if [ "$dev_mode" = "true" ]; then \
28 |       cd hdijupyterutils && pip install -e . && cd ../ && \
29 |       cd autovizwidget && pip install -e . && cd ../ && \
30 |       cd sparkmagic && pip install -e . && cd ../ ; \
31 |     else pip install sparkmagic ; fi
32 | 
33 | 
34 | RUN mkdir /home/$NB_USER/.sparkmagic
35 | COPY sparkmagic/example_config.json /home/$NB_USER/.sparkmagic/config.json
36 | RUN sed -i 's/localhost/spark/g' /home/$NB_USER/.sparkmagic/config.json
37 | RUN pip install ipywidgets
38 | RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkkernel
39 | RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/pysparkkernel
40 | RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkrkernel
41 | RUN jupyter server extension enable --py sparkmagic
42 | 
43 | USER root
44 | RUN chown $NB_USER /home/$NB_USER/.sparkmagic/config.json
45 | 
46 | CMD ["start-notebook.sh", "--NotebookApp.iopub_data_rate_limit=1000000000"]
47 | 
48 | USER $NB_USER
49 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/kernels/wrapperkernel/usercodeparser.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from sparkmagic.kernels.kernelmagics import KernelMagics
 5 | 
 6 | 
 7 | class UserCodeParser(object):
 8 |     # A list of the names of all magics that are cell magics, but which have no cell body input.
 9 |     # For example, the %%info magic has no cell body input, i.e. it is incorrect to call
10 |     #    %%info
11 |     #    some_input
12 |     _magics_with_no_cell_body = [
13 |         i.__name__
14 |         for i in [
15 |             KernelMagics.info,
16 |             KernelMagics.logs,
17 |             KernelMagics.cleanup,
18 |             KernelMagics.delete,
19 |             KernelMagics.help,
20 |             KernelMagics.spark,
21 |             KernelMagics.send_to_spark,
22 |         ]
23 |     ]
24 | 
25 |     def get_code_to_run(self, code):
26 |         try:
27 |             all_but_first_line = code.split(None, 1)[1]
28 |         except IndexError:
29 |             all_but_first_line = ""
30 | 
31 |         if code.startswith("%%local") or code.startswith("%local"):
32 |             return all_but_first_line
33 |         elif any(code.startswith("%%" + s) for s in self._magics_with_no_cell_body):
34 |             return "{}\n ".format(code)
35 |         elif any(code.startswith("%" + s) for s in self._magics_with_no_cell_body):
36 |             return "%{}\n ".format(code)
37 |         elif code.startswith("%%") or code.startswith("%"):
38 |             # If they use other line magics:
39 |             #       %autosave
40 |             #       my spark code
41 |             # my spark code would be run locally and there might be an error.
42 |             return code
43 |         elif not code:
44 |             return code
45 |         else:
46 |             return "%%spark\n{}".format(code)
47 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/tests/test_configuration.py:
--------------------------------------------------------------------------------
 1 | from mock import MagicMock
 2 | 
 3 | from hdijupyterutils.configuration import override, override_all, with_override
 4 | 
 5 | 
 6 | # This is a sample implementation of how a module would use the config methods.
 7 | # We'll use these three functions to test it works.
 8 | d = {}
 9 | path = "~/.testing/config.json"
10 | original_value = 0
11 | 
12 | 
13 | def module_override(config, value):
14 |     global d, path
15 |     override(d, path, config, value)
16 | 
17 | 
18 | def module_override_all(obj):
19 |     global d
20 |     override_all(d, obj)
21 | 
22 | 
23 | # Configs
24 | @with_override(d, path)
25 | def my_config():
26 |     global original_value
27 |     return original_value
28 | 
29 | 
30 | @with_override(d, path)
31 | def my_config_2():
32 |     global original_value
33 |     return original_value
34 | 
35 | 
36 | # Test helper functions
37 | def setup_function():
38 |     module_override_all({})
39 | 
40 | 
41 | def teardown_function():
42 |     module_override_all({})
43 | 
44 | 
45 | # Unit tests begin
46 | def test_original_value_without_overrides():
47 |     assert original_value == my_config()
48 | 
49 | 
50 | def test_original_value_with_overrides():
51 |     new_value = 2
52 |     module_override(my_config.__name__, new_value)
53 |     assert new_value == my_config()
54 | 
55 | 
56 | def test_original_values_when_others_override():
57 |     new_value = 2
58 |     module_override(my_config.__name__, new_value)
59 |     assert new_value == my_config()
60 |     assert original_value == my_config_2()
61 | 
62 | 
63 | def test_resetting_values_when_others_override():
64 |     new_value = 2
65 |     module_override(my_config.__name__, new_value)
66 |     assert new_value == my_config()
67 |     assert original_value == my_config_2()
68 | 
69 |     # Reset
70 |     module_override_all({})
71 |     assert original_value == my_config()
72 |     assert original_value == my_config_2()
73 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/datagraph.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | import pandas as pd
 5 | from hdijupyterutils.ipythondisplay import IpythonDisplay
 6 | 
 7 | 
 8 | class DataGraph(object):
 9 |     """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use
10 |     pandas df HTML representation."""
11 | 
12 |     def __init__(self, display=None):
13 |         if display is None:
14 |             self.display = IpythonDisplay()
15 |         else:
16 |             self.display = display
17 | 
18 |     def render(self, df, encoding, output):
19 |         with output:
20 |             max_rows = pd.get_option("display.max_rows")
21 |             max_cols = pd.get_option("display.max_columns")
22 |             show_dimensions = pd.get_option("display.show_dimensions")
23 | 
24 |             # This will hide the index column for pandas df.
25 |             self.display.html(
26 |                 """
27 | <style>
28 |     table.dataframe.hideme thead th:first-child {
29 |         display: none;
30 |     }
31 |     table.dataframe.hideme tbody th {
32 |         display: none;
33 |     }
34 | </style>
35 | """
36 |             )
37 |             self.display.html(
38 |                 df.to_html(
39 |                     max_rows=max_rows,
40 |                     max_cols=max_cols,
41 |                     show_dimensions=show_dimensions,
42 |                     notebook=True,
43 |                     classes="hideme",
44 |                 )
45 |             )
46 | 
47 |     @staticmethod
48 |     def display_logarithmic_x_axis():
49 |         return False
50 | 
51 |     @staticmethod
52 |     def display_logarithmic_y_axis():
53 |         return False
54 | 
55 |     @staticmethod
56 |     def display_x():
57 |         return False
58 | 
59 |     @staticmethod
60 |     def display_y():
61 |         return False
62 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/log.py:
--------------------------------------------------------------------------------
 1 | # Distributed under the terms of the Modified BSD License.
 2 | 
 3 | from __future__ import print_function
 4 | import logging
 5 | import logging.config
 6 | 
 7 | from .constants import LOGGING_CONFIG_CLASS_NAME
 8 | 
 9 | 
10 | class Log(object):
11 |     """Logger for magics. A small wrapper class around the configured logger described in the configuration file"""
12 | 
13 |     def __init__(self, logger_name, logging_config, caller_name):
14 |         logging.config.dictConfig(logging_config)
15 | 
16 |         assert caller_name is not None
17 |         self._caller_name = caller_name
18 |         self.logger_name = logger_name
19 |         self._getLogger()
20 | 
21 |     def debug(self, message):
22 |         self.logger.debug(self._transform_log_message(message))
23 | 
24 |     def error(self, message):
25 |         self.logger.error(self._transform_log_message(message))
26 | 
27 |     def info(self, message):
28 |         self.logger.info(self._transform_log_message(message))
29 | 
30 |     def _getLogger(self):
31 |         self.logger = logging.getLogger(self.logger_name)
32 | 
33 |     def _transform_log_message(self, message):
34 |         return "{}\t{}".format(self._caller_name, message)
35 | 
36 | 
37 | def logging_config():
38 |     return {
39 |         "version": 1,
40 |         "formatters": {
41 |             "magicsFormatter": {
42 |                 "format": "%(asctime)s\t%(levelname)s\t%(message)s",
43 |                 "datefmt": "",
44 |             }
45 |         },
46 |         "handlers": {
47 |             "magicsHandler": {
48 |                 "class": LOGGING_CONFIG_CLASS_NAME,
49 |                 "formatter": "magicsFormatter",
50 |                 "home_path": "~/.hdijupyterutils",
51 |             }
52 |         },
53 |         "loggers": {
54 |             "magicsLogger": {
55 |                 "handlers": ["magicsHandler"],
56 |                 "level": "DEBUG",
57 |                 "propagate": 0,
58 |             }
59 |         },
60 |     }
61 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Release
 3 | 
 4 | on:
 5 |   workflow_dispatch:
 6 |     inputs:
 7 |       type:
 8 |         type: choice
 9 |         description: Select type of bump version
10 |         # https://github.com/jaumann/github-bumpversion-action/tree/v0.0.7/?tab=readme-ov-file#bumping
11 |         options:
12 |           - major
13 |           - minor
14 |           - patch
15 |         default: 'patch'
16 | jobs:
17 |   release:
18 |     # Do not run this job for pull requests where both branches are from the same repo.
19 |     # Jobs that depend on this one will be skipped too.
20 |     # This prevents duplicate CI runs for our own pull requests, whilst preserving the ability to
21 |     # run the CI for each branch push to a fork, and for each pull request originating from a fork.
22 |     if: github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.id != github.event.pull_request.base.repo.id
23 |     permissions:
24 |       contents: write
25 |       packages: write
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - name: Check out code
29 |         uses: actions/checkout@v4
30 |         with:
31 |           persist-credentials: true
32 |           fetch-depth: 0
33 |           ref: master
34 | 
35 |       - name: Bump version and create tag
36 |         uses: jasonamyers/github-bumpversion-action@v1.0.5
37 |         id: bump
38 |         env:
39 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40 |           DEFAULT_BUMP: ${{ inputs.type }}
41 | 
42 |       - name: Push tag
43 |         run: git push origin master --tags
44 |         env:
45 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
46 | 
47 |       - name: Create GitHub Release with changelog
48 |         id: create_release
49 |         uses: ncipollo/release-action@v1.14.0
50 |         with:
51 |           name: ${{ steps.bump.outputs.new_ver }}
52 |           bodyFile: CHANGELOG.md
53 |           tag: ${{ steps.bump.outputs.new_ver }}
54 |         env:
55 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
56 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/ipywidgetfactory.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from ipywidgets import (
 5 |     VBox,
 6 |     Output,
 7 |     Button,
 8 |     HTML,
 9 |     HBox,
10 |     Dropdown,
11 |     Checkbox,
12 |     ToggleButtons,
13 |     Text,
14 |     Textarea,
15 |     Tab,
16 |     Password,
17 | )
18 | 
19 | 
20 | class IpyWidgetFactory(object):
21 |     """This class exists solely for unit testing purposes."""
22 | 
23 |     @staticmethod
24 |     def get_vbox(**kwargs):
25 |         return VBox(**kwargs)
26 | 
27 |     @staticmethod
28 |     def get_output(**kwargs):
29 |         return Output(**kwargs)
30 | 
31 |     @staticmethod
32 |     def get_button(**kwargs):
33 |         return Button(**kwargs)
34 | 
35 |     @staticmethod
36 |     def get_html(value, **kwargs):
37 |         return HTML(value, **kwargs)
38 | 
39 |     @staticmethod
40 |     def get_hbox(**kwargs):
41 |         return HBox(**kwargs)
42 | 
43 |     @staticmethod
44 |     def get_dropdown(**kwargs):
45 |         return Dropdown(**kwargs)
46 | 
47 |     @staticmethod
48 |     def get_checkbox(**kwargs):
49 |         return Checkbox(**kwargs)
50 | 
51 |     @staticmethod
52 |     def get_toggle_buttons(**kwargs):
53 |         return ToggleButtons(**kwargs)
54 | 
55 |     @staticmethod
56 |     def get_text(**kwargs):
57 |         return Text(**kwargs)
58 | 
59 |     @staticmethod
60 |     def get_password(**kwargs):
61 |         return Password(**kwargs)
62 | 
63 |     @staticmethod
64 |     def get_text_area(**kwargs):
65 |         return Textarea(**kwargs)
66 | 
67 |     @staticmethod
68 |     def get_submit_button(**kwargs):
69 |         return SubmitButton(**kwargs)
70 | 
71 |     @staticmethod
72 |     def get_tab(**kwargs):
73 |         return Tab(**kwargs)
74 | 
75 | 
76 | class SubmitButton(Button):
77 |     def __init__(self, **kwargs):
78 |         super(SubmitButton, self).__init__(**kwargs)
79 |         self.on_click(self.submit_clicked)
80 | 
81 |     def submit_clicked(self, button):
82 |         self.parent_widget.run()
83 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/sendstringtosparkcommand.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Jupyter Development Team.
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from sparkmagic.livyclientlib.sendtosparkcommand import SendToSparkCommand
 5 | from sparkmagic.livyclientlib.command import Command
 6 | from sparkmagic.livyclientlib.exceptions import BadUserDataException
 7 | 
 8 | 
 9 | class SendStringToSparkCommand(SendToSparkCommand):
10 |     def _scala_command(
11 |         self, input_variable_name, input_variable_value, output_variable_name
12 |     ):
13 |         self._assert_input_is_string_type(input_variable_name, input_variable_value)
14 |         scala_code = 'var {} = """{}"""'.format(
15 |             output_variable_name, input_variable_value
16 |         )
17 |         return Command(scala_code)
18 | 
19 |     def _pyspark_command(
20 |         self, input_variable_name, input_variable_value, output_variable_name
21 |     ):
22 |         self._assert_input_is_string_type(input_variable_name, input_variable_value)
23 |         pyspark_code = "{} = {}".format(
24 |             output_variable_name, repr(input_variable_value)
25 |         )
26 |         return Command(pyspark_code)
27 | 
28 |     def _r_command(
29 |         self, input_variable_name, input_variable_value, output_variable_name
30 |     ):
31 |         self._assert_input_is_string_type(input_variable_name, input_variable_value)
32 |         escaped_input_variable_value = input_variable_value.replace(
33 |             "\\", "\\\\"
34 |         ).replace('"', '\\"')
35 |         r_code = """assign("{}","{}")""".format(
36 |             output_variable_name, escaped_input_variable_value
37 |         )
38 |         return Command(r_code)
39 | 
40 |     def _assert_input_is_string_type(self, input_variable_name, input_variable_value):
41 |         if not isinstance(input_variable_value, str):
42 |             wrong_type = input_variable_value.__class__.__name__
43 |             raise BadUserDataException(
44 |                 "{} is not a str or bytes! Got {} instead".format(
45 |                     input_variable_name, wrong_type
46 |                 )
47 |             )
48 | 


--------------------------------------------------------------------------------
/hdijupyterutils/setup.py:
--------------------------------------------------------------------------------
 1 | DESCRIPTION = "HdiJupyterUtils: Utils for Jupyter projects from HDInsight team"
 2 | NAME = "hdijupyterutils"
 3 | PACKAGES = ["hdijupyterutils"]
 4 | AUTHOR = "Jupyter Development Team"
 5 | AUTHOR_EMAIL = "jupyter@googlegroups.org"
 6 | URL = "https://github.com/jupyter-incubator/sparkmagic"
 7 | DOWNLOAD_URL = "https://github.com/jupyter-incubator/sparkmagic"
 8 | LICENSE = "BSD 3-clause"
 9 | 
10 | import io
11 | import os
12 | import re
13 | 
14 | from distutils.core import setup
15 | 
16 | 
17 | def read(path, encoding="utf-8"):
18 |     path = os.path.join(os.path.dirname(__file__), path)
19 |     with io.open(path, encoding=encoding) as fp:
20 |         return fp.read()
21 | 
22 | 
23 | # read requirements.txt and convert to install_requires format
24 | def requirements(path):
25 |     lines = read(path).splitlines()
26 |     # remove comments and empty lines
27 |     lines = [line for line in lines if not line.startswith("#") and line]
28 |     return lines
29 | 
30 | 
31 | def version(path):
32 |     """Obtain the package version from a python file e.g. pkg/__init__.py.
33 | 
34 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
35 |     """
36 |     version_file = read(path)
37 |     version_match = re.search(
38 |         r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M
39 |     )
40 |     if version_match:
41 |         return version_match.group(1)
42 |     raise RuntimeError("Unable to find version string.")
43 | 
44 | 
45 | VERSION = version("hdijupyterutils/__init__.py")
46 | 
47 | setup(
48 |     name=NAME,
49 |     version=VERSION,
50 |     description=DESCRIPTION,
51 |     author=AUTHOR,
52 |     author_email=AUTHOR_EMAIL,
53 |     url=URL,
54 |     download_url=DOWNLOAD_URL,
55 |     license=LICENSE,
56 |     packages=PACKAGES,
57 |     classifiers=[
58 |         "Development Status :: 4 - Beta",
59 |         "Environment :: Console",
60 |         "Intended Audience :: Science/Research",
61 |         "License :: OSI Approved :: BSD License",
62 |         "Natural Language :: English",
63 |         "Programming Language :: Python :: 3.7",
64 |     ],
65 |     install_requires=requirements("requirements.txt"),
66 | )
67 | 


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/tests/test_logger.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import logging
 3 | 
 4 | from hdijupyterutils.log import Log, logging_config
 5 | 
 6 | 
 7 | def get_logging_config():
 8 |     return logging_config()
 9 | 
10 | 
11 | def test_log_init():
12 |     logging_config = get_logging_config()
13 |     logger = Log("name", logging_config, "something")
14 |     assert isinstance(logger.logger, logging.Logger)
15 | 
16 | 
17 | # A MockLogger class with debug and error methods that store the most recent level + message in an
18 | # instance variable.
19 | class MockLogger(object):
20 |     def __init__(self):
21 |         self.level = self.message = None
22 | 
23 |     def debug(self, message):
24 |         self.level, self.message = "DEBUG", message
25 | 
26 |     def error(self, message):
27 |         self.level, self.message = "ERROR", message
28 | 
29 |     def info(self, message):
30 |         self.level, self.message = "INFO", message
31 | 
32 | 
33 | class MockLog(Log):
34 |     def __init__(self, name):
35 |         logging_config = get_logging_config()
36 |         super(MockLog, self).__init__(name, logging_config, name)
37 | 
38 |     def _getLogger(self):
39 |         self.logger = MockLogger()
40 | 
41 | 
42 | def test_log_returnvalue():
43 |     logger = MockLog("test2")
44 |     assert isinstance(logger.logger, MockLogger)
45 |     mock = logger.logger
46 |     logger.debug("word1")
47 |     assert mock.level == "DEBUG"
48 |     assert mock.message == "test2\tword1"
49 |     logger.error("word2")
50 |     assert mock.level == "ERROR"
51 |     assert mock.message == "test2\tword2"
52 |     logger.info("word3")
53 |     assert mock.level == "INFO"
54 |     assert mock.message == "test2\tword3"
55 | 
56 | 
57 | def test_log_unicode():
58 |     logger = MockLog("test2")
59 |     assert isinstance(logger.logger, MockLogger)
60 |     mock = logger.logger
61 |     logger.debug("word1è")
62 |     assert mock.level == "DEBUG"
63 |     assert mock.message == "test2\tword1è"
64 |     logger.error("word2è")
65 |     assert mock.level == "ERROR"
66 |     assert mock.message == "test2\tword2è"
67 |     logger.info("word3è")
68 |     assert mock.level == "INFO"
69 |     assert mock.message == "test2\tword3è"
70 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/auth/customauth.py:
--------------------------------------------------------------------------------
 1 | """Base class for implementing an authentication provider for SparkMagic"""
 2 | 
 3 | from hdijupyterutils.ipywidgetfactory import IpyWidgetFactory
 4 | from sparkmagic.utils.constants import WIDGET_WIDTH
 5 | 
 6 | 
 7 | class Authenticator(object):
 8 |     """Base Authenticator for all Sparkmagic authentication providers."""
 9 | 
10 |     def __init__(self, parsed_attributes=None):
11 |         """Initializes the Authenticator with the attributes in the attributes
12 |         parsed from a %spark magic command if applicable, or with default values
13 |         otherwise.
14 | 
15 |         Args:
16 |             self,
17 |             parsed_attributes (IPython.core.magics.namespace): The namespace object that
18 |             is created from parsing %spark magic command.
19 |         """
20 |         if parsed_attributes is not None:
21 |             self.url = parsed_attributes.url
22 |         else:
23 |             self.url = "http://example.com/livy"
24 |         self.widgets = self.get_widgets(WIDGET_WIDTH)
25 | 
26 |     def get_widgets(self, widget_width):
27 |         """Creates and returns an address widget
28 | 
29 |         Args:
30 |             widget_width (str): The width of all widgets to be created.
31 | 
32 |         Returns:
33 |             Sequence[hdijupyterutils.ipywidgetfactory.IpyWidgetFactory]: list of widgets
34 |         """
35 |         ipywidget_factory = IpyWidgetFactory()
36 | 
37 |         self.address_widget = ipywidget_factory.get_text(
38 |             description="Address:", value="http://example.com/livy", width=widget_width
39 |         )
40 |         widgets = [self.address_widget]
41 |         return widgets
42 | 
43 |     def update_with_widget_values(self):
44 |         """Updates url to be value in address widget."""
45 |         self.url = self.address_widget.value
46 | 
47 |     def __call__(self, request):
48 |         """subclasses should override"""
49 |         return None
50 | 
51 |     def __eq__(self, other):
52 |         if not isinstance(other, Authenticator):
53 |             return False
54 |         return self.url == other.url
55 | 
56 |     def __hash__(self):
57 |         return hash((self.url, self.__class__.__name__))
58 | 


--------------------------------------------------------------------------------
/Dockerfile.spark:
--------------------------------------------------------------------------------
 1 | # Pin to Spark 2.x for Scala 2.11 (https://issues.apache.org/jira/browse/LIVY-423) 
 2 | FROM datamechanics/spark:2.4.7-hadoop-3.1.0-java-8-scala-2.11-python-3.7-latest
 3 | 
 4 | # Use root user for development. This shouldn't be used in production.
 5 | USER 0
 6 | 
 7 | # ----------
 8 | # Setup Python and Livy/Spark Deps
 9 | #
10 | # Install GCP keys to avoid error:
11 | # "GPG error: https://packages.cloud.google.com/apt cloud-sdk InRelease: The following signatures couldn't be verified because the public key is not available: NO_PUBKEY B53DC80D13EDEF05"
12 | RUN apt-get install apt-transport-https ca-certificates gnupg
13 | RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
14 | RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
15 | # Livy Requires:
16 | # - mvn (from maven package or maven3 tarball)
17 | # - openjdk-8-jdk (or Oracle JDK 8)
18 | # - Python 2.7+
19 | # - R 3.x
20 | RUN apt-get update && apt-get install -yq --no-install-recommends --force-yes \
21 |     curl \
22 |     git \
23 |     python3 python3-setuptools python3-venv python3-pip \
24 |     r-base \
25 |     r-base-core && \
26 |     rm -rf /var/lib/apt/lists/*
27 | 
28 | ENV PYTHONHASHSEED 0
29 | ENV PYTHONIOENCODING UTF-8
30 | ENV PIP_DISABLE_PIP_VERSION_CHECK 1
31 | 
32 | # ----------
33 | # Build Livy
34 | # ----------
35 | ARG LIVY_VERSION=0.7.1-incubating
36 | ENV LIVY_HOME /usr/livy
37 | ENV LIVY_CONF_DIR "${LIVY_HOME}/conf"
38 | ENV LIVY_PORT 8998
39 | 
40 | RUN curl --progress-bar -L --retry 3 \
41 |     "http://archive.apache.org/dist/incubator/livy/${LIVY_VERSION}/apache-livy-${LIVY_VERSION}-bin.zip" \
42 |     -o "./apache-livy-${LIVY_VERSION}-bin.zip" \
43 |   && unzip -qq "./apache-livy-${LIVY_VERSION}-bin.zip" -d /usr \
44 |   && mv "/usr/apache-livy-${LIVY_VERSION}-bin" "${LIVY_HOME}" \
45 |   && rm -rf "./apache-livy-${LIVY_VERSION}-bin.zip" \
46 |   && mkdir "${LIVY_HOME}/logs" \
47 |   && chown -R root:root "${LIVY_HOME}"
48 | 
49 | EXPOSE 8998
50 | 
51 | HEALTHCHECK CMD curl -f "http://host.docker.internal:${LIVY_PORT}/" || exit 1
52 | 
53 | CMD ${LIVY_HOME}/bin/livy-server
54 | 


--------------------------------------------------------------------------------
/autovizwidget/setup.py:
--------------------------------------------------------------------------------
 1 | DESCRIPTION = "AutoVizWidget: An Auto-Visualization library for pandas dataframes"
 2 | NAME = "autovizwidget"
 3 | PACKAGES = [
 4 |     "autovizwidget",
 5 |     "autovizwidget/plotlygraphs",
 6 |     "autovizwidget/widget",
 7 |     "autovizwidget/utils",
 8 | ]
 9 | AUTHOR = "Jupyter Development Team"
10 | AUTHOR_EMAIL = "jupyter@googlegroups.org"
11 | URL = "https://github.com/jupyter-incubator/sparkmagic"
12 | DOWNLOAD_URL = "https://github.com/jupyter-incubator/sparkmagic"
13 | LICENSE = "BSD 3-clause"
14 | 
15 | import io
16 | import os
17 | import re
18 | 
19 | from distutils.core import setup
20 | 
21 | 
22 | def read(path, encoding="utf-8"):
23 |     path = os.path.join(os.path.dirname(__file__), path)
24 |     with io.open(path, encoding=encoding) as fp:
25 |         return fp.read()
26 | 
27 | 
28 | # read requirements.txt and convert to install_requires format
29 | def requirements(path):
30 |     lines = read(path).splitlines()
31 |     # remove comments and empty lines
32 |     lines = [line for line in lines if not line.startswith("#") and line]
33 |     return lines
34 | 
35 | 
36 | def version(path):
37 |     """Obtain the package version from a python file e.g. pkg/__init__.py.
38 | 
39 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
40 |     """
41 |     version_file = read(path)
42 |     version_match = re.search(
43 |         r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M
44 |     )
45 |     if version_match:
46 |         return version_match.group(1)
47 |     raise RuntimeError("Unable to find version string.")
48 | 
49 | 
50 | VERSION = version("autovizwidget/__init__.py")
51 | 
52 | 
53 | setup(
54 |     name=NAME,
55 |     version=VERSION,
56 |     description=DESCRIPTION,
57 |     author=AUTHOR,
58 |     author_email=AUTHOR_EMAIL,
59 |     url=URL,
60 |     download_url=DOWNLOAD_URL,
61 |     license=LICENSE,
62 |     packages=PACKAGES,
63 |     classifiers=[
64 |         "Development Status :: 4 - Beta",
65 |         "Environment :: Console",
66 |         "Intended Audience :: Science/Research",
67 |         "License :: OSI Approved :: BSD License",
68 |         "Natural Language :: English",
69 |         "Programming Language :: Python :: 3.7",
70 |     ],
71 |     install_requires=requirements("requirements.txt"),
72 | )
73 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/graphrenderer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from plotly.offline import init_notebook_mode
 5 | import plotly as p
 6 | 
 7 | from .datagraph import DataGraph
 8 | from .piegraph import PieGraph
 9 | from .linegraph import LineGraph
10 | from .areagraph import AreaGraph
11 | from .bargraph import BarGraph
12 | from .scattergraph import ScatterGraph
13 | from ..widget.encoding import Encoding
14 | 
15 | 
16 | class GraphRenderer(object):
17 |     @staticmethod
18 |     def render(df, encoding, output):
19 |         with output:
20 |             init_notebook_mode()
21 | 
22 |         GraphRenderer._get_graph(encoding.chart_type).render(df, encoding, output)
23 | 
24 |     @staticmethod
25 |     def display_x(chart_type):
26 |         return GraphRenderer._get_graph(chart_type).display_x()
27 | 
28 |     @staticmethod
29 |     def display_y(chart_type):
30 |         return GraphRenderer._get_graph(chart_type).display_y()
31 | 
32 |     @staticmethod
33 |     def display_logarithmic_x_axis(chart_type):
34 |         return GraphRenderer._get_graph(chart_type).display_logarithmic_x_axis()
35 | 
36 |     @staticmethod
37 |     def display_logarithmic_y_axis(chart_type):
38 |         return GraphRenderer._get_graph(chart_type).display_logarithmic_y_axis()
39 | 
40 |     @staticmethod
41 |     def display_controls(chart_type):
42 |         display_x = GraphRenderer.display_x(chart_type)
43 |         display_y = GraphRenderer.display_y(chart_type)
44 |         return display_x or display_y
45 | 
46 |     @staticmethod
47 |     def _get_graph(chart_type):
48 |         if chart_type == Encoding.chart_type_scatter:
49 |             graph = ScatterGraph()
50 |         elif chart_type == Encoding.chart_type_line:
51 |             graph = LineGraph()
52 |         elif chart_type == Encoding.chart_type_area:
53 |             graph = AreaGraph()
54 |         elif chart_type == Encoding.chart_type_bar:
55 |             graph = BarGraph()
56 |         elif chart_type == Encoding.chart_type_pie:
57 |             graph = PieGraph()
58 |         elif chart_type == Encoding.chart_type_table:
59 |             graph = DataGraph()
60 |         else:
61 |             raise ValueError("Cannot display chart of type {}".format(chart_type))
62 | 
63 |         return graph
64 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_kernels.py:
--------------------------------------------------------------------------------
 1 | from sparkmagic.utils.constants import LANG_PYTHON, LANG_SCALA, LANG_R
 2 | from sparkmagic.kernels.sparkkernel.sparkkernel import SparkKernel
 3 | from sparkmagic.kernels.pysparkkernel.pysparkkernel import PySparkKernel
 4 | from sparkmagic.kernels.sparkrkernel.sparkrkernel import SparkRKernel
 5 | 
 6 | 
 7 | class TestPyparkKernel(PySparkKernel):
 8 |     def __init__(self):
 9 |         kwargs = {"testing": True}
10 |         super(TestPyparkKernel, self).__init__(**kwargs)
11 | 
12 | 
13 | class TestSparkKernel(SparkKernel):
14 |     def __init__(self):
15 |         kwargs = {"testing": True}
16 |         super(TestSparkKernel, self).__init__(**kwargs)
17 | 
18 | 
19 | class TestSparkRKernel(SparkRKernel):
20 |     def __init__(self):
21 |         kwargs = {"testing": True}
22 |         super(TestSparkRKernel, self).__init__(**kwargs)
23 | 
24 | 
25 | def test_pyspark_kernel_configs():
26 |     kernel = TestPyparkKernel()
27 |     assert kernel.session_language == LANG_PYTHON
28 | 
29 |     assert kernel.implementation == "PySpark"
30 |     assert kernel.language == LANG_PYTHON
31 |     assert kernel.language_version == "0.1"
32 |     assert kernel.language_info == {
33 |         "name": "pyspark",
34 |         "mimetype": "text/x-python",
35 |         "codemirror_mode": {"name": "python", "version": 3},
36 |         "file_extension": ".py",
37 |         "pygments_lexer": "python3",
38 |     }
39 | 
40 | 
41 | def test_spark_kernel_configs():
42 |     kernel = TestSparkKernel()
43 | 
44 |     assert kernel.session_language == LANG_SCALA
45 | 
46 |     assert kernel.implementation == "Spark"
47 |     assert kernel.language == LANG_SCALA
48 |     assert kernel.language_version == "0.1"
49 |     assert kernel.language_info == {
50 |         "name": "scala",
51 |         "mimetype": "text/x-scala",
52 |         "pygments_lexer": "scala",
53 |         "file_extension": ".sc",
54 |         "codemirror_mode": "text/x-scala",
55 |     }
56 | 
57 | 
58 | def test_sparkr_kernel_configs():
59 |     kernel = TestSparkRKernel()
60 | 
61 |     assert kernel.session_language == LANG_R
62 | 
63 |     assert kernel.implementation == "SparkR"
64 |     assert kernel.language == LANG_R
65 |     assert kernel.language_version == "0.1"
66 |     assert kernel.language_info == {
67 |         "name": "sparkR",
68 |         "mimetype": "text/x-rsrc",
69 |         "pygments_lexer": "r",
70 |         "file_extension": ".r",
71 |         "codemirror_mode": "text/x-rsrc",
72 |     }
73 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_heartbeatthread.py:
--------------------------------------------------------------------------------
 1 | from mock import MagicMock
 2 | from time import sleep
 3 | 
 4 | from sparkmagic.livyclientlib.livysession import _HeartbeatThread
 5 | 
 6 | 
 7 | def test_create_thread():
 8 |     session = MagicMock()
 9 |     refresh_seconds = 1
10 |     retry_seconds = 2
11 |     heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds)
12 | 
13 |     assert heartbeat_thread.livy_session == session
14 |     assert heartbeat_thread.refresh_seconds == refresh_seconds
15 |     assert heartbeat_thread.retry_seconds == retry_seconds
16 | 
17 | 
18 | def test_run_once():
19 |     session = MagicMock()
20 |     refresh_seconds = 0.1
21 |     retry_seconds = 2
22 |     heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds, 1)
23 | 
24 |     heartbeat_thread.start()
25 |     sleep(0.15)
26 |     heartbeat_thread.stop()
27 | 
28 |     session.refresh_status_and_info.assert_called_once_with()
29 |     assert heartbeat_thread.livy_session is None
30 | 
31 | 
32 | def test_run_stops():
33 |     session = MagicMock()
34 |     refresh_seconds = 0.01
35 |     retry_seconds = 2
36 |     heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds)
37 | 
38 |     heartbeat_thread.start()
39 |     sleep(0.1)
40 |     heartbeat_thread.stop()
41 | 
42 |     assert session.refresh_status_and_info.called
43 |     assert heartbeat_thread.livy_session is None
44 | 
45 | 
46 | def test_run_retries():
47 |     msg = "oh noes!"
48 |     session = MagicMock()
49 |     session.refresh_status_and_info = MagicMock(side_effect=ValueError(msg))
50 |     refresh_seconds = 0.1
51 |     retry_seconds = 0.1
52 |     heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds, 1)
53 | 
54 |     heartbeat_thread.start()
55 |     sleep(0.15)
56 |     heartbeat_thread.stop()
57 | 
58 |     session.refresh_status_and_info.assert_called_once_with()
59 |     session.logger.error.assert_called_once_with(msg)
60 |     assert heartbeat_thread.livy_session is None
61 | 
62 | 
63 | def test_run_retries_stops():
64 |     msg = "oh noes!"
65 |     session = MagicMock()
66 |     session.refresh_status_and_info = MagicMock(side_effect=ValueError(msg))
67 |     refresh_seconds = 0.01
68 |     retry_seconds = 0.01
69 |     heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds)
70 | 
71 |     heartbeat_thread.start()
72 |     sleep(0.1)
73 |     heartbeat_thread.stop()
74 | 
75 |     assert session.refresh_status_and_info.called
76 |     assert session.logger.error.called
77 |     assert heartbeat_thread.livy_session is None
78 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/widget/encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | 
 5 | class Encoding(object):
 6 |     chart_type_scatter = "Scatter"
 7 |     chart_type_line = "Line"
 8 |     chart_type_area = "Area"
 9 |     chart_type_bar = "Bar"
10 |     chart_type_pie = "Pie"
11 |     chart_type_table = "Table"
12 |     supported_chart_types = [
13 |         chart_type_line,
14 |         chart_type_area,
15 |         chart_type_bar,
16 |         chart_type_pie,
17 |         chart_type_table,
18 |     ]
19 | 
20 |     y_agg_avg = "Avg"
21 |     y_agg_min = "Min"
22 |     y_agg_max = "Max"
23 |     y_agg_sum = "Sum"
24 |     y_agg_none = "None"
25 |     y_agg_count = "Count"
26 |     supported_y_agg = [
27 |         y_agg_avg,
28 |         y_agg_min,
29 |         y_agg_max,
30 |         y_agg_sum,
31 |         y_agg_none,
32 |         y_agg_count,
33 |     ]
34 | 
35 |     def __init__(
36 |         self,
37 |         chart_type=None,
38 |         x=None,
39 |         y=None,
40 |         y_aggregation=None,
41 |         logarithmic_x_axis=False,
42 |         logarithmic_y_axis=False,
43 |     ):
44 |         self._chart_type = chart_type
45 |         self._x = x
46 |         self._y = y
47 |         self._y_aggregation = y_aggregation
48 |         self._logarithmic_x_axis = logarithmic_x_axis
49 |         self._logarithmic_y_axis = logarithmic_y_axis
50 | 
51 |     @property
52 |     def chart_type(self):
53 |         return self._chart_type
54 | 
55 |     @chart_type.setter
56 |     def chart_type(self, value):
57 |         self._chart_type = value
58 | 
59 |     @property
60 |     def x(self):
61 |         return self._x
62 | 
63 |     @x.setter
64 |     def x(self, value):
65 |         self._x = value
66 | 
67 |     @property
68 |     def y(self):
69 |         return self._y
70 | 
71 |     @y.setter
72 |     def y(self, value):
73 |         self._y = value
74 | 
75 |     @property
76 |     def y_aggregation(self):
77 |         return self._y_aggregation
78 | 
79 |     @y_aggregation.setter
80 |     def y_aggregation(self, value):
81 |         self._y_aggregation = value
82 | 
83 |     @property
84 |     def logarithmic_x_axis(self):
85 |         return self._logarithmic_x_axis
86 | 
87 |     @logarithmic_x_axis.setter
88 |     def logarithmic_x_axis(self, value):
89 |         self._logarithmic_x_axis = value
90 | 
91 |     @property
92 |     def logarithmic_y_axis(self):
93 |         return self._logarithmic_y_axis
94 | 
95 |     @logarithmic_y_axis.setter
96 |     def logarithmic_y_axis(self, value):
97 |         self._logarithmic_y_axis = value
98 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // vscode tasks to easily run things.
 3 |     // Ctrl + Shift + B to bring tasks up and select one
 4 |     // See https://go.microsoft.com/fwlink/?LinkId=733558
 5 |     // for the documentation about the tasks.json format
 6 |     "version": "0.1.0",
 7 |     "command": "cmd", 
 8 |     "isShellCommand": true,
 9 |     "showOutput": "silent",
10 |     "args": ["/C"],
11 | 
12 |     "tasks": [
13 |         {
14 |             "taskName": "go to github",
15 |             "suppressTaskName": true,
16 |             "isBuildCommand": true,
17 |             "args": ["start https://github.com/jupyter-incubator/sparkmagic"]
18 |         },
19 |         {
20 |             "taskName": "docker build",
21 |             "suppressTaskName": true,
22 |             "isBuildCommand": true,
23 |             "args": ["docker-compose build"]
24 |         },
25 |         {
26 |             "taskName": "docker-compose up",
27 |             "suppressTaskName": true,
28 |             "isBuildCommand": false,
29 |             "isTestCommand": true,
30 |             "args": ["docker-compose up -d && start http://localhost:8888"]
31 |         },
32 |         {
33 |             "taskName": "docker-compose down",
34 |             "suppressTaskName": true,
35 |             "isBuildCommand": false,
36 |             "isTestCommand": true,
37 |             "args": ["docker-compose down"]
38 |         },
39 |         {
40 |             "taskName": "test all",
41 |             "suppressTaskName": true,
42 |             "isBuildCommand": false,
43 |             "isTestCommand": true,
44 |             "args": ["nosetests autovizwidget hdijupyterutils sparkmagic"]
45 |         },
46 |         {
47 |             "taskName": "test autovizwidget",
48 |             "suppressTaskName": true,
49 |             "isBuildCommand": false,
50 |             "isTestCommand": true,
51 |             "args": ["nosetests autovizwidget"]
52 |         },
53 |         {
54 |             "taskName": "test hdijupyterutils",
55 |             "suppressTaskName": true,
56 |             "isBuildCommand": false,
57 |             "isTestCommand": true,
58 |             "args": ["nosetests hdijupyterutils"]
59 |         },
60 |         {
61 |             "taskName": "test sparkmagic",
62 |             "suppressTaskName": true,
63 |             "isBuildCommand": false,
64 |             "isTestCommand": true,
65 |             "args": ["nosetests sparkmagic"]
66 |         },
67 |         {
68 |             "taskName": "test file",
69 |             "suppressTaskName": true,
70 |             "isBuildCommand": false,
71 |             "isTestCommand": true,
72 |             "args": ["nosetests --nocapture ${file}"]
73 |         }
74 |     ]
75 | }


--------------------------------------------------------------------------------
/hdijupyterutils/hdijupyterutils/configuration.py:
--------------------------------------------------------------------------------
 1 | """Utility to read configs from file."""
 2 | 
 3 | # Distributed under the terms of the Modified BSD License.
 4 | import json
 5 | import sys
 6 | 
 7 | from .utils import join_paths
 8 | from .filesystemreaderwriter import FileSystemReaderWriter
 9 | 
10 | 
11 | def with_override(overrides, path, fsrw_class=None):
12 |     """A decorator which first initializes the overrided configurations,
13 |     then checks the global overrided defaults for the given configuration,
14 |     calling the function to get the default result otherwise."""
15 | 
16 |     def ret(f):
17 |         def wrapped_f(*args):
18 |             # Can access overrides and path here
19 |             _initialize(overrides, path, fsrw_class)
20 |             name = f.__name__
21 |             if name in overrides:
22 |                 return overrides[name]
23 |             else:
24 |                 return f(*args)
25 | 
26 |         # Hack! We do this so that we can query the .__name__ of the function
27 |         # later to get the name of the configuration dynamically, e.g. for unit tests
28 |         wrapped_f.__name__ = f.__name__
29 |         return wrapped_f
30 | 
31 |     return ret
32 | 
33 | 
34 | def override(overrides, path, config, value, fsrw_class=None):
35 |     """Given a string representing a configuration and a value for that configuration,
36 |     override the configuration. Initialize the overrided configuration beforehand."""
37 |     _initialize(overrides, path, fsrw_class)
38 |     overrides[config] = value
39 | 
40 | 
41 | def override_all(overrides, new_overrides):
42 |     """Given a dictionary representing the overrided defaults for this
43 |     configuration, initialize the global configuration."""
44 |     overrides.clear()
45 |     overrides.update(new_overrides)
46 | 
47 | 
48 | def _initialize(overrides, path, fsrw_class):
49 |     """Checks if the configuration is initialized. If it isn't, initializes the
50 |     overrides object by reading from the configuration
51 |     file, overwriting the current set of overrides if there is one."""
52 |     if not overrides:
53 |         new_overrides = _load(path, fsrw_class)
54 |         override_all(overrides, new_overrides)
55 | 
56 | 
57 | def _load(path, fsrw_class=None):
58 |     """Returns a dictionary of configuration by reading from the configuration
59 |     file."""
60 |     if fsrw_class is None:
61 |         fsrw_class = FileSystemReaderWriter
62 | 
63 |     config_file = fsrw_class(path)
64 |     config_file.ensure_file_exists()
65 |     config_text = config_file.read_lines()
66 |     line = "".join(config_text).strip()
67 | 
68 |     if line == "":
69 |         overrides = {}
70 |     else:
71 |         overrides = json.loads(line)
72 |     return overrides
73 | 


--------------------------------------------------------------------------------
/sparkmagic/example_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "kernel_python_credentials" : {
 3 |     "username": "",
 4 |     "password": "",
 5 |     "url": "http://localhost:8998",
 6 |     "auth": "None"
 7 |   },
 8 | 
 9 |   "kernel_scala_credentials" : {
10 |     "username": "",
11 |     "password": "",
12 |     "url": "http://localhost:8998",
13 |     "auth": "None"
14 |   },
15 |   "kernel_r_credentials": {
16 |     "username": "",
17 |     "password": "",
18 |     "url": "http://localhost:8998"
19 |   },
20 | 
21 |   "logging_config": {
22 |     "version": 1,
23 |     "formatters": {
24 |       "magicsFormatter": { 
25 |         "format": "%(asctime)s\t%(levelname)s\t%(message)s",
26 |         "datefmt": ""
27 |       }
28 |     },
29 |     "handlers": {
30 |       "magicsHandler": { 
31 |         "class": "hdijupyterutils.filehandler.MagicsFileHandler",
32 |         "formatter": "magicsFormatter",
33 |         "home_path": "~/.sparkmagic"
34 |       }
35 |     },
36 |     "loggers": {
37 |       "magicsLogger": { 
38 |         "handlers": ["magicsHandler"],
39 |         "level": "DEBUG",
40 |         "propagate": 0
41 |       }
42 |     }
43 |   },
44 |   "authenticators": {
45 |     "Kerberos": "sparkmagic.auth.kerberos.Kerberos",
46 |     "None": "sparkmagic.auth.customauth.Authenticator", 
47 |     "Basic_Access": "sparkmagic.auth.basic.Basic"
48 |   },
49 | 
50 |   "wait_for_idle_timeout_seconds": 15,
51 |   "livy_session_startup_timeout_seconds": 60,
52 | 
53 |   "http_session_config": {
54 |     "adapters":
55 |       [
56 |         {
57 |           "prefix": "http://",
58 |           "adapter": "requests.adapters.HTTPAdapter"
59 |         }
60 |       ]
61 |   },
62 | 
63 |   "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.",
64 | 
65 |   "ignore_ssl_errors": false,
66 | 
67 |   "session_configs": {
68 |     "driverMemory": "1000M",
69 |     "executorCores": 2
70 |   },
71 |   "session_configs_defaults": {
72 |     "conf": {
73 |         "spark.sql.catalog.spark_catalog.type": "hive"
74 |     }
75 |   },
76 |   "use_auto_viz": true,
77 |   "coerce_dataframe": true,
78 |   "max_results_sql": 2500,
79 |   "pyspark_dataframe_encoding": "utf-8",
80 |   
81 |   "heartbeat_refresh_seconds": 30,
82 |   "livy_server_heartbeat_timeout_seconds": 0,
83 |   "heartbeat_retry_seconds": 10,
84 | 
85 |   "server_extension_default_kernel_name": "pysparkkernel",
86 |   "custom_headers": {},
87 |   
88 |   "retry_policy": "configurable",
89 |   "retry_seconds_to_sleep_list": [0.2, 0.5, 1, 3, 5],
90 |   "configurable_retry_policy_max_retries": 8
91 | }
92 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Packages
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [released]
 6 | 
 7 | jobs:
 8 |   tests:
 9 |     name: "Tests"
10 |     uses: "./.github/workflows/tests.yml"
11 | 
12 |   build:
13 |     needs: [tests]
14 |     if: ${{ success() }}
15 |     env:
16 |       PYPI_TOKEN_AUTOVIZWIDGET: ${{ secrets.PYPI_TOKEN_AUTOVIZWIDGET }}
17 |       PYPI_TOKEN_HDIJUPYTERUTILS: ${{ secrets.PYPI_TOKEN_HDIJUPYTERUTILS }}
18 |       PYPI_TOKEN_SPARKMAGIC: ${{ secrets.PYPI_TOKEN_SPARKMAGIC }}
19 | 
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v4
24 |       - name: Set up Python
25 |         uses: actions/setup-python@v5
26 |         with:
27 |           python-version: 3.8
28 | 
29 |       - name: Install dependencies
30 |         run: |
31 |           python -m pip install --upgrade pip
32 |           pip install build
33 | 
34 |       - name: Setup PyPI
35 |         run: |
36 |           cat > ~/.pypirc <<- EOF
37 |           [distutils]
38 |           index-servers =
39 |               pypi
40 |               pypitest
41 | 
42 |           [autovizwidget]
43 |           username=__token__
44 |           password=$PYPI_TOKEN_AUTOVIZWIDGET
45 | 
46 |           [hdijupyterutils]
47 |           username=__token__
48 |           password=$PYPI_TOKEN_HDIJUPYTERUTILS
49 | 
50 |           [sparkmagic]
51 |           username=__token__
52 |           password=$PYPI_TOKEN_SPARKMAGIC
53 | 
54 |           EOF
55 | 
56 |       - name: Build hdijupyterutils dist
57 |         run: |
58 |           cd hdijupyterutils
59 |           python -m build
60 |           cd ..
61 | 
62 |       - name: Publish hdijupyterutils to PyPI
63 |         uses: pypa/gh-action-pypi-publish@release/v1
64 |         with:
65 |           user: __token__
66 |           password: ${{ secrets.PYPI_TOKEN_HDIJUPYTERUTILS }}
67 |           packages_dir: hdijupyterutils/dist/
68 |           verbose: true
69 | 
70 |       - name: Build autovizwidget dist
71 |         run: |
72 |           cd autovizwidget
73 |           python -m build
74 |           cd ..
75 | 
76 |       - name: Publish autovizwidget to PyPI
77 |         uses: pypa/gh-action-pypi-publish@release/v1
78 |         with:
79 |           user: __token__
80 |           password: ${{ secrets.PYPI_TOKEN_AUTOVIZWIDGET }}
81 |           packages_dir: autovizwidget/dist/
82 |           verbose: true
83 | 
84 |       - name: Build sparkmagic dist
85 |         run: |
86 |           cd sparkmagic
87 |           python -m build
88 |           cd ..
89 | 
90 |       - name: Publish sparkmagic to PyPI
91 |         uses: pypa/gh-action-pypi-publish@release/v1
92 |         with:
93 |           user: __token__
94 |           password: ${{ secrets.PYPI_TOKEN_SPARKMAGIC }}
95 |           packages_dir: sparkmagic/dist/
96 |           verbose: true
97 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/sendtosparkcommand.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Jupyter Development Team.
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from sparkmagic.livyclientlib.command import Command
 5 | from sparkmagic.livyclientlib.exceptions import BadUserDataException
 6 | import sparkmagic.utils.constants as constants
 7 | 
 8 | from abc import abstractmethod
 9 | 
10 | 
11 | class SendToSparkCommand(Command):
12 |     def __init__(
13 |         self,
14 |         input_variable_name,
15 |         input_variable_value,
16 |         output_variable_name,
17 |         spark_events=None,
18 |     ):
19 |         super(SendToSparkCommand, self).__init__("", spark_events)
20 |         self.input_variable_name = input_variable_name
21 |         self.input_variable_value = input_variable_value
22 |         self.output_variable_name = output_variable_name
23 | 
24 |     def execute(self, session):
25 |         try:
26 |             command = self.to_command(
27 |                 session.kind,
28 |                 self.input_variable_name,
29 |                 self.input_variable_value,
30 |                 self.output_variable_name,
31 |             )
32 |             return command.execute(session)
33 |         except Exception as e:
34 |             raise e
35 | 
36 |     def to_command(
37 |         self, kind, input_variable_name, input_variable_value, output_variable_name
38 |     ):
39 |         if kind == constants.SESSION_KIND_PYSPARK:
40 |             return self._pyspark_command(
41 |                 input_variable_name, input_variable_value, output_variable_name
42 |             )
43 |         elif kind == constants.SESSION_KIND_SPARK:
44 |             return self._scala_command(
45 |                 input_variable_name, input_variable_value, output_variable_name
46 |             )
47 |         elif kind == constants.SESSION_KIND_SPARKR:
48 |             return self._r_command(
49 |                 input_variable_name, input_variable_value, output_variable_name
50 |             )
51 |         else:
52 |             raise BadUserDataException("Kind '{}' is not supported.".format(kind))
53 | 
54 |     @abstractmethod
55 |     def _scala_command(
56 |         self, input_variable_name, input_variable_value, output_variable_name
57 |     ):
58 |         raise NotImplementedError  # override and provide proper implementation in supertype!
59 | 
60 |     @abstractmethod
61 |     def _pyspark_command(
62 |         self, input_variable_name, input_variable_value, output_variable_name
63 |     ):
64 |         raise NotImplementedError  # override and provide proper implementation in supertype!
65 | 
66 |     @abstractmethod
67 |     def _r_command(
68 |         self, input_variable_name, input_variable_value, output_variable_name
69 |     ):
70 |         raise NotImplementedError  # override and provide proper implementation in supertype!
71 | 


--------------------------------------------------------------------------------
/.github/workflows/docker_build.yml:
--------------------------------------------------------------------------------
 1 | name: Build Docker Image
 2 | 
 3 | on:
 4 |   push: {}
 5 |   pull_request: {}
 6 |   schedule:
 7 |     # Run daily
 8 |     - cron: "6 4 * * *"
 9 | 
10 | jobs:
11 |   docker:
12 |     runs-on: ubuntu-latest
13 |     env:
14 |       REPOSITORY_URL: ghcr.io
15 |       SPARKMAGIC_IMAGE_NAME: ${{ github.repository }}/jupyter:latest
16 |       SPARKMAGIC_DOCKERFILE: Dockerfile.jupyter
17 |       JUPYTER_IMAGE_NAME: ${{ github.repository }}/sparkmagic-livy:latest
18 |       JUPYTER_DOCKERFILE: Dockerfile.spark
19 |     permissions:
20 |       contents: read
21 |       packages: write
22 |     steps:
23 |       - name: Checkout
24 |         uses: actions/checkout@v4
25 | 
26 |       - name: Set up QEMU
27 |         uses: docker/setup-qemu-action@v3
28 | 
29 |       - name: Set up Docker Buildx
30 |         uses: docker/setup-buildx-action@v3
31 | 
32 |       - name: Login to GitHub Container Registry
33 |         uses: docker/login-action@v3
34 |         with:
35 |           registry: ${{ env.REPOSITORY_URL }}
36 |           username: ${{ github.repository_owner }}
37 |           password: ${{ secrets.GITHUB_TOKEN }}
38 | 
39 |       - name: Build and push with latest release - SparkMagic
40 |         uses: docker/build-push-action@v6
41 |         if: github.event_name == 'schedule'
42 |         with:
43 |           context: .
44 |           file: ${{ env.JUPYTER_DOCKERFILE }}
45 |           platforms: linux/amd64,linux/arm64
46 |           push: true
47 |           tags: |
48 |             ${{ env.REPOSITORY_URL }}/${{ env.SPARKMAGIC_IMAGE_NAME }}
49 | 
50 |       - name: Build and push with latest release - Jupyter
51 |         uses: docker/build-push-action@v6
52 |         if: github.event_name == 'schedule'
53 |         with:
54 |           context: .
55 |           file: ${{ env.JUPYTER_DOCKERFILE }}
56 |           platforms: linux/amd64,linux/arm64
57 |           push: true
58 |           tags: |
59 |             ${{ env.REPOSITORY_URL }}/${{ env.JUPYTER_IMAGE_NAME }}
60 | 
61 |       - name: Build and push with local packages - SparkMagic
62 |         uses: docker/build-push-action@v6
63 |         if: github.event_name == 'push'
64 |         with:
65 |           context: .
66 |           file: ${{ env.JUPYTER_DOCKERFILE }}
67 |           platforms: linux/amd64,linux/arm64
68 |           push: true
69 |           build-args: |
70 |             dev_mode="true"
71 |           tags: |
72 |             ${{ env.REPOSITORY_URL }}/${{ env.SPARKMAGIC_IMAGE_NAME }}
73 | 
74 |       - name: Build and push with local packages - Jupyter
75 |         uses: docker/build-push-action@v6
76 |         if: github.event_name == 'push'
77 |         with:
78 |           context: .
79 |           file: ${{ env.JUPYTER_DOCKERFILE }}
80 |           platforms: linux/amd64,linux/arm64
81 |           push: true
82 |           build-args: |
83 |             dev_mode="true"
84 |           tags: |
85 |             ${{ env.REPOSITORY_URL }}/${{ env.JUPYTER_IMAGE_NAME }}
86 | 


--------------------------------------------------------------------------------
/sparkmagic/setup.py:
--------------------------------------------------------------------------------
 1 | DESCRIPTION = "SparkMagic: Spark execution via Livy"
 2 | NAME = "sparkmagic"
 3 | PACKAGES = [
 4 |     "sparkmagic",
 5 |     "sparkmagic/controllerwidget",
 6 |     "sparkmagic/kernels",
 7 |     "sparkmagic/livyclientlib",
 8 |     "sparkmagic/auth",
 9 |     "sparkmagic/magics",
10 |     "sparkmagic/kernels/pysparkkernel",
11 |     "sparkmagic/kernels/sparkkernel",
12 |     "sparkmagic/kernels/sparkrkernel",
13 |     "sparkmagic/kernels/wrapperkernel",
14 |     "sparkmagic/utils",
15 |     "sparkmagic/serverextension",
16 | ]
17 | AUTHOR = "Jupyter Development Team"
18 | AUTHOR_EMAIL = "jupyter@googlegroups.org"
19 | URL = "https://github.com/jupyter-incubator/sparkmagic"
20 | DOWNLOAD_URL = "https://github.com/jupyter-incubator/sparkmagic"
21 | LICENSE = "BSD 3-clause"
22 | 
23 | import io
24 | import os
25 | import re
26 | 
27 | from distutils.core import setup
28 | 
29 | 
30 | def read(path, encoding="utf-8"):
31 |     path = os.path.join(os.path.dirname(__file__), path)
32 |     with io.open(path, encoding=encoding) as fp:
33 |         return fp.read()
34 | 
35 | 
36 | # read requirements.txt and convert to install_requires format
37 | def requirements(path):
38 |     lines = read(path).splitlines()
39 |     # remove comments and empty lines
40 |     lines = [line for line in lines if not line.startswith("#") and line]
41 |     return lines
42 | 
43 | 
44 | def version(path):
45 |     """Obtain the package version from a python file e.g. pkg/__init__.py.
46 | 
47 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
48 |     """
49 |     version_file = read(path)
50 |     version_match = re.search(
51 |         r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M
52 |     )
53 |     if version_match:
54 |         return version_match.group(1)
55 |     raise RuntimeError("Unable to find version string.")
56 | 
57 | 
58 | VERSION = version("sparkmagic/__init__.py")
59 | 
60 | 
61 | setup(
62 |     name=NAME,
63 |     version=VERSION,
64 |     description=DESCRIPTION,
65 |     author=AUTHOR,
66 |     author_email=AUTHOR_EMAIL,
67 |     url=URL,
68 |     download_url=DOWNLOAD_URL,
69 |     license=LICENSE,
70 |     packages=PACKAGES,
71 |     include_package_data=True,
72 |     package_data={
73 |         "sparkmagic": [
74 |             "kernels/pysparkkernel/kernel.js",
75 |             "kernels/sparkkernel/kernel.js",
76 |             "kernels/sparkrkernel/kernel.js",
77 |             "kernels/pysparkkernel/kernel.json",
78 |             "kernels/sparkkernel/kernel.json",
79 |             "kernels/sparkrkernel/kernel.json",
80 |         ]
81 |     },
82 |     classifiers=[
83 |         "Development Status :: 4 - Beta",
84 |         "Environment :: Console",
85 |         "Intended Audience :: Science/Research",
86 |         "License :: OSI Approved :: BSD License",
87 |         "Natural Language :: English",
88 |         "Programming Language :: Python :: 3.7",
89 |     ],
90 |     install_requires=requirements("requirements.txt"),
91 | )
92 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/plotlygraphs/piegraph.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from plotly.graph_objs import Pie, Figure
 5 | from plotly.offline import iplot
 6 | 
 7 | try:
 8 |     from pandas.core.base import DataError
 9 | except ImportError:
10 |     try:
11 |         from pandas.core.groupby import DataError
12 |     except ImportError:
13 |         from pandas.errors import DataError
14 | 
15 | import autovizwidget.utils.configuration as conf
16 | from .graphbase import GraphBase
17 | 
18 | 
19 | class PieGraph(GraphBase):
20 |     @staticmethod
21 |     def render(df, encoding, output):
22 |         if encoding.x is None:
23 |             with output:
24 |                 print("\n\n\nPlease select an X axis.")
25 |                 return
26 | 
27 |         try:
28 |             values, labels = PieGraph._get_x_values_labels(df, encoding)
29 |         except TypeError:
30 |             with output:
31 |                 print(
32 |                     "\n\n\nCannot group by X selection because of its type: '{}'. Please select another column.".format(
33 |                         df[encoding.x].dtype
34 |                     )
35 |                 )
36 |                 return
37 |         except (ValueError, DataError):
38 |             with output:
39 |                 print(
40 |                     "\n\n\nCannot group by X selection. Please select another column.".format(
41 |                         df[encoding.x].dtype
42 |                     )
43 |                 )
44 |                 if df.size == 0:
45 |                     print("\n\n\nCannot display a pie graph for an empty data set.")
46 |                 return
47 | 
48 |         max_slices_pie_graph = conf.max_slices_pie_graph()
49 |         with output:
50 |             # There's performance issues with a large amount of slices.
51 |             # 1500 rows crash the browser.
52 |             # 500 rows take ~15 s.
53 |             # 100 rows is almost automatic.
54 |             if len(values) > max_slices_pie_graph:
55 |                 print(
56 |                     "There's {} values in your pie graph, which would render the graph unresponsive.\n"
57 |                     "Please select another X with at most {} possible values.".format(
58 |                         len(values), max_slices_pie_graph
59 |                     )
60 |                 )
61 |             else:
62 |                 data = [Pie(values=values, labels=labels)]
63 | 
64 |                 fig = Figure(data=data)
65 |                 iplot(fig, show_link=False)
66 | 
67 |     @staticmethod
68 |     def display_logarithmic_x_axis():
69 |         return False
70 | 
71 |     @staticmethod
72 |     def display_logarithmic_y_axis():
73 |         return False
74 | 
75 |     @staticmethod
76 |     def _get_x_values_labels(df, encoding):
77 |         if encoding.y is None:
78 |             series = df.groupby([encoding.x]).size()
79 |             values = series.values.tolist()
80 |             labels = series.index.tolist()
81 |         else:
82 |             labels, values = GraphBase._get_x_y_values(df, encoding)
83 |         return values, labels
84 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Licensing terms
 2 | 
 3 | This project is licensed under the terms of the Modified BSD License
 4 | (also known as New or Revised or 3-Clause BSD), as follows:
 5 | 
 6 | - Copyright (c) 2001-2015, IPython Development Team
 7 | - Copyright (c) 2015-, Jupyter Development Team
 8 | 
 9 | All rights reserved.
10 | 
11 | Redistribution and use in source and binary forms, with or without
12 | modification, are permitted provided that the following conditions are met:
13 | 
14 | Redistributions of source code must retain the above copyright notice, this
15 | list of conditions and the following disclaimer.
16 | 
17 | Redistributions in binary form must reproduce the above copyright notice, this
18 | list of conditions and the following disclaimer in the documentation and/or
19 | other materials provided with the distribution.
20 | 
21 | Neither the name of the Jupyter Development Team nor the names of its
22 | contributors may be used to endorse or promote products derived from this
23 | software without specific prior written permission.
24 | 
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 | DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | ## About the Jupyter Development Team
37 | 
38 | The Jupyter Development Team is the set of all contributors to the Jupyter project.
39 | This includes all of the Jupyter Subprojects, which are the different repositories
40 | under the [jupyter](https://github.com/jupyter/) GitHub organization.
41 | 
42 | The core team that coordinates development on GitHub can be found here:
43 | https://github.com/jupyter/.
44 | 
45 | ## Our copyright policy
46 | 
47 | Jupyter uses a shared copyright model. Each contributor maintains copyright
48 | over their contributions to Jupyter. But, it is important to note that these
49 | contributions are typically only changes to the repositories. Thus, the Jupyter
50 | source code, in its entirety is not the copyright of any single person or
51 | institution.  Instead, it is the collective copyright of the entire Jupyter
52 | Development Team.  If individual contributors want to maintain a record of what
53 | changes/contributions they have specific copyright on, they should indicate
54 | their copyright in the commit message of the change, when they commit the
55 | change to one of the Jupyter repositories.
56 | 
57 | With this in mind, the following banner should be used in any source code file
58 | to indicate the copyright and license terms:
59 | 
60 |     # Copyright (c) Jupyter Development Team.
61 |     # Distributed under the terms of the Modified BSD License.
62 | 


--------------------------------------------------------------------------------
/autovizwidget/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Licensing terms
 2 | 
 3 | This project is licensed under the terms of the Modified BSD License
 4 | (also known as New or Revised or 3-Clause BSD), as follows:
 5 | 
 6 | - Copyright (c) 2001-2015, IPython Development Team
 7 | - Copyright (c) 2015-, Jupyter Development Team
 8 | 
 9 | All rights reserved.
10 | 
11 | Redistribution and use in source and binary forms, with or without
12 | modification, are permitted provided that the following conditions are met:
13 | 
14 | Redistributions of source code must retain the above copyright notice, this
15 | list of conditions and the following disclaimer.
16 | 
17 | Redistributions in binary form must reproduce the above copyright notice, this
18 | list of conditions and the following disclaimer in the documentation and/or
19 | other materials provided with the distribution.
20 | 
21 | Neither the name of the Jupyter Development Team nor the names of its
22 | contributors may be used to endorse or promote products derived from this
23 | software without specific prior written permission.
24 | 
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 | DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | ## About the Jupyter Development Team
37 | 
38 | The Jupyter Development Team is the set of all contributors to the Jupyter project.
39 | This includes all of the Jupyter Subprojects, which are the different repositories
40 | under the [jupyter](https://github.com/jupyter/) GitHub organization.
41 | 
42 | The core team that coordinates development on GitHub can be found here:
43 | https://github.com/jupyter/.
44 | 
45 | ## Our copyright policy
46 | 
47 | Jupyter uses a shared copyright model. Each contributor maintains copyright
48 | over their contributions to Jupyter. But, it is important to note that these
49 | contributions are typically only changes to the repositories. Thus, the Jupyter
50 | source code, in its entirety is not the copyright of any single person or
51 | institution.  Instead, it is the collective copyright of the entire Jupyter
52 | Development Team.  If individual contributors want to maintain a record of what
53 | changes/contributions they have specific copyright on, they should indicate
54 | their copyright in the commit message of the change, when they commit the
55 | change to one of the Jupyter repositories.
56 | 
57 | With this in mind, the following banner should be used in any source code file
58 | to indicate the copyright and license terms:
59 | 
60 |     # Copyright (c) Jupyter Development Team.
61 |     # Distributed under the terms of the Modified BSD License.
62 | 


--------------------------------------------------------------------------------
/hdijupyterutils/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Licensing terms
 2 | 
 3 | This project is licensed under the terms of the Modified BSD License
 4 | (also known as New or Revised or 3-Clause BSD), as follows:
 5 | 
 6 | - Copyright (c) 2001-2015, IPython Development Team
 7 | - Copyright (c) 2015-, Jupyter Development Team
 8 | 
 9 | All rights reserved.
10 | 
11 | Redistribution and use in source and binary forms, with or without
12 | modification, are permitted provided that the following conditions are met:
13 | 
14 | Redistributions of source code must retain the above copyright notice, this
15 | list of conditions and the following disclaimer.
16 | 
17 | Redistributions in binary form must reproduce the above copyright notice, this
18 | list of conditions and the following disclaimer in the documentation and/or
19 | other materials provided with the distribution.
20 | 
21 | Neither the name of the Jupyter Development Team nor the names of its
22 | contributors may be used to endorse or promote products derived from this
23 | software without specific prior written permission.
24 | 
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 | DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | ## About the Jupyter Development Team
37 | 
38 | The Jupyter Development Team is the set of all contributors to the Jupyter project.
39 | This includes all of the Jupyter Subprojects, which are the different repositories
40 | under the [jupyter](https://github.com/jupyter/) GitHub organization.
41 | 
42 | The core team that coordinates development on GitHub can be found here:
43 | https://github.com/jupyter/.
44 | 
45 | ## Our copyright policy
46 | 
47 | Jupyter uses a shared copyright model. Each contributor maintains copyright
48 | over their contributions to Jupyter. But, it is important to note that these
49 | contributions are typically only changes to the repositories. Thus, the Jupyter
50 | source code, in its entirety is not the copyright of any single person or
51 | institution.  Instead, it is the collective copyright of the entire Jupyter
52 | Development Team.  If individual contributors want to maintain a record of what
53 | changes/contributions they have specific copyright on, they should indicate
54 | their copyright in the commit message of the change, when they commit the
55 | change to one of the Jupyter repositories.
56 | 
57 | With this in mind, the following banner should be used in any source code file
58 | to indicate the copyright and license terms:
59 | 
60 |     # Copyright (c) Jupyter Development Team.
61 |     # Distributed under the terms of the Modified BSD License.
62 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/controllerwidget/createsessionwidget.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | import json
 4 | 
 5 | import sparkmagic.utils.configuration as conf
 6 | from sparkmagic.utils.constants import LANG_SCALA, LANG_PYTHON
 7 | from sparkmagic.controllerwidget.abstractmenuwidget import AbstractMenuWidget
 8 | 
 9 | 
10 | class CreateSessionWidget(AbstractMenuWidget):
11 |     def __init__(
12 |         self,
13 |         spark_controller,
14 |         ipywidget_factory,
15 |         ipython_display,
16 |         endpoints_dropdown_widget,
17 |         refresh_method,
18 |     ):
19 |         # This is nested
20 |         super(CreateSessionWidget, self).__init__(
21 |             spark_controller, ipywidget_factory, ipython_display, True
22 |         )
23 | 
24 |         self.refresh_method = refresh_method
25 | 
26 |         self.endpoints_dropdown_widget = endpoints_dropdown_widget
27 | 
28 |         self.session_widget = self.ipywidget_factory.get_text(
29 |             description="Name:", value="session-name"
30 |         )
31 |         self.lang_widget = self.ipywidget_factory.get_toggle_buttons(
32 |             description="Language:",
33 |             options=[LANG_SCALA, LANG_PYTHON],
34 |         )
35 |         self.properties = self.ipywidget_factory.get_text(
36 |             description="Properties:", value=json.dumps(conf.session_configs())
37 |         )
38 |         self.submit_widget = self.ipywidget_factory.get_submit_button(
39 |             description="Create Session"
40 |         )
41 | 
42 |         self.children = [
43 |             self.ipywidget_factory.get_html(value="<br/>", width="600px"),
44 |             self.endpoints_dropdown_widget,
45 |             self.session_widget,
46 |             self.lang_widget,
47 |             self.properties,
48 |             self.ipywidget_factory.get_html(value="<br/>", width="600px"),
49 |             self.submit_widget,
50 |         ]
51 | 
52 |         for child in self.children:
53 |             child.parent_widget = self
54 | 
55 |     def run(self):
56 |         try:
57 |             properties_json = self.properties.value
58 |             if properties_json.strip() != "":
59 |                 conf.override(
60 |                     conf.session_configs.__name__, json.loads(self.properties.value)
61 |                 )
62 |         except ValueError as e:
63 |             self.ipython_display.send_error(
64 |                 "Session properties must be a valid JSON string. Error:\n{}".format(e)
65 |             )
66 |             return
67 | 
68 |         endpoint = self.endpoints_dropdown_widget.value
69 |         language = self.lang_widget.value
70 |         alias = self.session_widget.value
71 |         skip = False
72 |         properties = conf.get_session_properties(language)
73 | 
74 |         try:
75 |             self.spark_controller.add_session(alias, endpoint, skip, properties)
76 |         except ValueError as e:
77 |             self.ipython_display.send_error(
78 |                 """Could not add session with
79 | name:
80 |     {}
81 | properties:
82 |     {}
83 | 
84 | due to error: '{}'""".format(
85 |                     alias, properties, e
86 |                 )
87 |             )
88 |             return
89 | 
90 |         self.refresh_method()
91 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_usercodeparser.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | from sparkmagic.kernels.wrapperkernel.usercodeparser import UserCodeParser
  3 | from sparkmagic.kernels.kernelmagics import KernelMagics
  4 | 
  5 | 
  6 | def test_empty_string():
  7 |     parser = UserCodeParser()
  8 | 
  9 |     assert "" == parser.get_code_to_run("")
 10 | 
 11 | 
 12 | def test_spark_code():
 13 |     parser = UserCodeParser()
 14 |     cell = "my code\nand more"
 15 | 
 16 |     assert "%%spark\nmy code\nand more" == parser.get_code_to_run(cell)
 17 | 
 18 | 
 19 | def test_local_single():
 20 |     parser = UserCodeParser()
 21 |     cell = """%local
 22 | hi
 23 | hi
 24 | hi"""
 25 | 
 26 |     assert "hi\nhi\nhi" == parser.get_code_to_run(cell)
 27 | 
 28 | 
 29 | def test_local_double():
 30 |     parser = UserCodeParser()
 31 |     cell = """%%local
 32 | hi
 33 | hi
 34 | hi"""
 35 | 
 36 |     assert "hi\nhi\nhi" == parser.get_code_to_run(cell)
 37 | 
 38 | 
 39 | def test_our_line_magics():
 40 |     parser = UserCodeParser()
 41 |     magic_name = KernelMagics.info.__name__
 42 |     cell = "%{}".format(magic_name)
 43 | 
 44 |     assert "%%{}\n ".format(magic_name) == parser.get_code_to_run(cell)
 45 | 
 46 | 
 47 | def test_our_line_magics_with_content():
 48 |     parser = UserCodeParser()
 49 |     magic_name = KernelMagics.info.__name__
 50 |     cell = """%{}
 51 | my content
 52 | more content""".format(
 53 |         magic_name
 54 |     )
 55 | 
 56 |     assert "%%{}\nmy content\nmore content\n ".format(
 57 |         magic_name
 58 |     ) == parser.get_code_to_run(cell)
 59 | 
 60 | 
 61 | def test_other_cell_magic():
 62 |     parser = UserCodeParser()
 63 |     cell = """%%magic
 64 | hi
 65 | hi
 66 | hi"""
 67 | 
 68 |     assert "{}".format(cell) == parser.get_code_to_run(cell)
 69 | 
 70 | 
 71 | def test_other_line_magic():
 72 |     parser = UserCodeParser()
 73 |     cell = """%magic
 74 | hi
 75 | hi
 76 | hi"""
 77 | 
 78 |     assert cell == parser.get_code_to_run(cell)
 79 | 
 80 | 
 81 | def test_scala_code():
 82 |     parser = UserCodeParser()
 83 |     cell = """/* Place the cursor in the cell and press SHIFT + ENTER to run */
 84 | 
 85 | val fruits = sc.textFile("wasb:///example/data/fruits.txt")
 86 | val yellowThings = sc.textFile("wasb:///example/data/yellowthings.txt")"""
 87 | 
 88 |     assert "%%spark\n{}".format(cell) == parser.get_code_to_run(cell)
 89 | 
 90 | 
 91 | def test_unicode():
 92 |     parser = UserCodeParser()
 93 |     cell = "print 'è🐙🐙🐙🐙'"
 94 | 
 95 |     assert "%%spark\n{}".format(cell) == parser.get_code_to_run(cell)
 96 | 
 97 | 
 98 | def test_unicode_in_magics():
 99 |     parser = UserCodeParser()
100 |     magic_name = KernelMagics.info.__name__
101 |     cell = """%{}
102 | my content è🐙
103 | more content""".format(
104 |         magic_name
105 |     )
106 | 
107 |     assert "%%{}\nmy content è🐙\nmore content\n ".format(
108 |         magic_name
109 |     ) == parser.get_code_to_run(cell)
110 | 
111 | 
112 | def test_unicode_in_double_magics():
113 |     parser = UserCodeParser()
114 |     magic_name = KernelMagics.info.__name__
115 |     cell = """%%{}
116 | my content è🐙
117 | more content""".format(
118 |         magic_name
119 |     )
120 | 
121 |     assert "%%{}\nmy content è🐙\nmore content\n ".format(
122 |         magic_name
123 |     ) == parser.get_code_to_run(cell)
124 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/auth/basic.py:
--------------------------------------------------------------------------------
 1 | """Class for implementing a basic access authenticator for SparkMagic"""
 2 | 
 3 | from sparkmagic.livyclientlib.exceptions import BadUserDataException
 4 | from hdijupyterutils.ipywidgetfactory import IpyWidgetFactory
 5 | from requests.auth import HTTPBasicAuth
 6 | from .customauth import Authenticator
 7 | 
 8 | 
 9 | class Basic(HTTPBasicAuth, Authenticator):
10 |     """Basic Access authenticator for SparkMagic"""
11 | 
12 |     def __init__(self, parsed_attributes=None):
13 |         """Initializes the Authenticator with the attributes in the attributes
14 |         parsed from a %spark magic command if applicable, or with default values
15 |         otherwise.
16 | 
17 |         Args:
18 |             self,
19 |             parsed_attributes (IPython.core.magics.namespace): The namespace object that
20 |             is created from parsing %spark magic command.
21 |         """
22 |         if parsed_attributes is not None:
23 |             if parsed_attributes.user == "" or parsed_attributes.password == "":
24 |                 new_exc = BadUserDataException(
25 |                     "Need to supply username and password arguments for "
26 |                     "Basic Access Authentication. (e.g. -a username -p password)."
27 |                 )
28 |                 raise new_exc
29 |             self.username = parsed_attributes.user
30 |             self.password = parsed_attributes.password
31 |         else:
32 |             self.username = "username"
33 |             self.password = "password"
34 |         HTTPBasicAuth.__init__(self, self.username, self.password)
35 |         Authenticator.__init__(self, parsed_attributes)
36 | 
37 |     def get_widgets(self, widget_width):
38 |         """Creates and returns a list with an address, username, and password widget
39 | 
40 |         Args:
41 |             widget_width (str): The width of all widgets to be created.
42 | 
43 |         Returns:
44 |             Sequence[hdijupyterutils.ipywidgetfactory.IpyWidgetFactory]: list of widgets
45 |         """
46 |         ipywidget_factory = IpyWidgetFactory()
47 | 
48 |         self.user_widget = ipywidget_factory.get_text(
49 |             description="Username:", value=self.username, width=widget_width
50 |         )
51 | 
52 |         self.password_widget = ipywidget_factory.get_password(
53 |             description="Password:", value=self.password, width=widget_width
54 |         )
55 | 
56 |         widgets = [self.user_widget, self.password_widget]
57 |         return Authenticator.get_widgets(self, widget_width) + widgets
58 | 
59 |     def update_with_widget_values(self):
60 |         """Updates url, username, and password to be the value of their respective widgets."""
61 |         Authenticator.update_with_widget_values(self)
62 |         self.username = self.user_widget.value
63 |         self.password = self.password_widget.value
64 | 
65 |     def __eq__(self, other):
66 |         if not isinstance(other, Basic):
67 |             return False
68 |         return (
69 |             self.url == other.url
70 |             and self.username == other.username
71 |             and self.password == other.password
72 |         )
73 | 
74 |     def __call__(self, request):
75 |         return HTTPBasicAuth.__call__(self, request)
76 | 
77 |     def __hash__(self):
78 |         return hash((self.username, self.password, self.url, self.__class__.__name__))
79 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/controllerwidget/managesessionwidget.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | from sparkmagic.controllerwidget.abstractmenuwidget import AbstractMenuWidget
 4 | 
 5 | 
 6 | class ManageSessionWidget(AbstractMenuWidget):
 7 |     def __init__(
 8 |         self, spark_controller, ipywidget_factory, ipython_display, refresh_method
 9 |     ):
10 |         # This is nested
11 |         super(ManageSessionWidget, self).__init__(
12 |             spark_controller, ipywidget_factory, ipython_display, True
13 |         )
14 | 
15 |         self.refresh_method = refresh_method
16 | 
17 |         self.children = self.get_existing_session_widgets()
18 | 
19 |         for child in self.children:
20 |             child.parent_widget = self
21 | 
22 |     def run(self):
23 |         self.refresh_method()
24 | 
25 |     def get_existing_session_widgets(self):
26 |         session_widgets = []
27 |         session_widgets.append(
28 |             self.ipywidget_factory.get_html(value="<br/>", width="600px")
29 |         )
30 | 
31 |         client_dict = self.spark_controller.get_managed_clients()
32 |         if len(client_dict) > 0:
33 |             # Header
34 |             header = self.get_session_widget("Name", "Id", "Kind", "State", False)
35 |             session_widgets.append(header)
36 |             session_widgets.append(
37 |                 self.ipywidget_factory.get_html(value="<hr/>", width="600px")
38 |             )
39 | 
40 |             # Sessions
41 |             for name, session in client_dict.items():
42 |                 session_widgets.append(
43 |                     self.get_session_widget(
44 |                         name, session.id, session.kind, session.status
45 |                     )
46 |                 )
47 | 
48 |             session_widgets.append(
49 |                 self.ipywidget_factory.get_html(value="<br/>", width="600px")
50 |             )
51 |         else:
52 |             session_widgets.append(
53 |                 self.ipywidget_factory.get_html(value="No sessions yet.", width="600px")
54 |             )
55 | 
56 |         return session_widgets
57 | 
58 |     def get_session_widget(self, name, session_id, kind, state, button=True):
59 |         hbox = self.ipywidget_factory.get_hbox()
60 | 
61 |         name_w = self.ipywidget_factory.get_html(
62 |             value=name, width="200px", padding="4px"
63 |         )
64 |         id_w = self.ipywidget_factory.get_html(
65 |             value=str(session_id), width="100px", padding="4px"
66 |         )
67 |         kind_w = self.ipywidget_factory.get_html(
68 |             value=kind, width="100px", padding="4px"
69 |         )
70 |         state_w = self.ipywidget_factory.get_html(
71 |             value=state, width="100px", padding="4px"
72 |         )
73 | 
74 |         if button:
75 | 
76 |             def delete_on_click(button):
77 |                 self.spark_controller.delete_session_by_name(name)
78 |                 self.refresh_method()
79 | 
80 |             delete_w = self.ipywidget_factory.get_button(description="Delete")
81 |             delete_w.on_click(delete_on_click)
82 |         else:
83 |             delete_w = self.ipywidget_factory.get_html(
84 |                 value="", width="100px", padding="4px"
85 |             )
86 | 
87 |         hbox.children = [name_w, id_w, kind_w, state_w, delete_w]
88 | 
89 |         return hbox
90 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at itamar@pythonspeed.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/livyreliablehttpclient.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015  aggftw@gmail.com
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from .linearretrypolicy import LinearRetryPolicy
 5 | from .configurableretrypolicy import ConfigurableRetryPolicy
 6 | from .reliablehttpclient import ReliableHttpClient
 7 | from sparkmagic.utils.constants import LINEAR_RETRY, CONFIGURABLE_RETRY
 8 | import sparkmagic.utils.configuration as conf
 9 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException
10 | 
11 | 
12 | class LivyReliableHttpClient(object):
13 |     """A Livy-specific Http client which wraps the normal ReliableHttpClient. Propagates
14 |     HttpClientExceptions up."""
15 | 
16 |     def __init__(self, http_client, endpoint):
17 |         self.endpoint = endpoint
18 |         self._http_client = http_client
19 | 
20 |     @staticmethod
21 |     def from_endpoint(endpoint):
22 |         headers = {"Content-Type": "application/json"}
23 |         headers.update(conf.custom_headers())
24 |         retry_policy = LivyReliableHttpClient._get_retry_policy()
25 |         return LivyReliableHttpClient(
26 |             ReliableHttpClient(endpoint, headers, retry_policy), endpoint
27 |         )
28 | 
29 |     def post_statement(self, session_id, data):
30 |         return self._http_client.post(
31 |             self._statements_url(session_id), [201], data
32 |         ).json()
33 | 
34 |     def get_statement(self, session_id, statement_id):
35 |         return self._http_client.get(
36 |             self._statement_url(session_id, statement_id), [200]
37 |         ).json()
38 | 
39 |     def get_sessions(self):
40 |         return self._http_client.get("/sessions", [200]).json()
41 | 
42 |     def post_session(self, properties):
43 |         return self._http_client.post("/sessions", [201], properties).json()
44 | 
45 |     def get_session(self, session_id):
46 |         return self._http_client.get(self._session_url(session_id), [200]).json()
47 | 
48 |     def delete_session(self, session_id):
49 |         self._http_client.delete(self._session_url(session_id), [200, 404])
50 | 
51 |     def get_all_session_logs(self, session_id):
52 |         return self._http_client.get(
53 |             self._session_url(session_id) + "/log?from=0", [200]
54 |         ).json()
55 | 
56 |     def get_headers(self):
57 |         return self._http_client.get_headers()
58 | 
59 |     def cancel_statement(self, session_id, statement_id):
60 |         return self._http_client.post(
61 |             "{}/cancel".format(self._statement_url(session_id, statement_id)), [200], {}
62 |         ).json()
63 | 
64 |     @staticmethod
65 |     def _session_url(session_id):
66 |         return "/sessions/{}".format(session_id)
67 | 
68 |     @staticmethod
69 |     def _statements_url(session_id):
70 |         return "/sessions/{}/statements".format(session_id)
71 | 
72 |     @staticmethod
73 |     def _statement_url(session_id, statement_id):
74 |         return "/sessions/{}/statements/{}".format(session_id, statement_id)
75 | 
76 |     @staticmethod
77 |     def _get_retry_policy():
78 |         policy = conf.retry_policy()
79 | 
80 |         if policy == LINEAR_RETRY:
81 |             return LinearRetryPolicy(seconds_to_sleep=5, max_retries=5)
82 |         elif policy == CONFIGURABLE_RETRY:
83 |             return ConfigurableRetryPolicy(
84 |                 retry_seconds_to_sleep_list=conf.retry_seconds_to_sleep_list(),
85 |                 max_retries=conf.configurable_retry_policy_max_retries(),
86 |             )
87 |         else:
88 |             raise BadUserConfigurationException(
89 |                 "Retry policy '{}' not supported".format(policy)
90 |             )
91 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_configurableretrypolicy.py:
--------------------------------------------------------------------------------
 1 | from sparkmagic.livyclientlib.configurableretrypolicy import ConfigurableRetryPolicy
 2 | import sparkmagic.utils.configuration as conf
 3 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException
 4 | 
 5 | 
 6 | def test_with_empty_list():
 7 |     times = []
 8 |     max_retries = 5
 9 |     policy = ConfigurableRetryPolicy(times, max_retries)
10 | 
11 |     assert 5 == policy.seconds_to_sleep(0)
12 |     assert 5 == policy.seconds_to_sleep(4)
13 |     assert 5 == policy.seconds_to_sleep(5)
14 |     assert 5 == policy.seconds_to_sleep(6)
15 | 
16 |     # Check based on retry count
17 |     assert True is policy.should_retry(500, False, 0)
18 |     assert True is policy.should_retry(500, False, 4)
19 |     assert True is policy.should_retry(500, False, 5)
20 |     assert False is policy.should_retry(500, False, 6)
21 | 
22 |     # Check based on status code
23 |     assert False is policy.should_retry(201, False, 0)
24 |     assert False is policy.should_retry(201, False, 6)
25 | 
26 |     # Check based on error
27 |     assert True is policy.should_retry(201, True, 0)
28 |     assert True is policy.should_retry(201, True, 6)
29 | 
30 | 
31 | def test_with_one_element_list():
32 |     times = [2]
33 |     max_retries = 5
34 |     policy = ConfigurableRetryPolicy(times, max_retries)
35 | 
36 |     assert 2 == policy.seconds_to_sleep(0)
37 |     assert 2 == policy.seconds_to_sleep(4)
38 |     assert 2 == policy.seconds_to_sleep(5)
39 |     assert 2 == policy.seconds_to_sleep(6)
40 | 
41 |     # Check based on retry count
42 |     assert True is policy.should_retry(500, False, 0)
43 |     assert True is policy.should_retry(500, False, 4)
44 |     assert True is policy.should_retry(500, False, 5)
45 |     assert False is policy.should_retry(500, False, 6)
46 | 
47 |     # Check based on status code
48 |     assert False is policy.should_retry(201, False, 0)
49 |     assert False is policy.should_retry(201, False, 6)
50 | 
51 |     # Check based on error
52 |     assert True is policy.should_retry(201, True, 0)
53 |     assert True is policy.should_retry(201, True, 6)
54 | 
55 | 
56 | def test_with_default_values():
57 |     times = conf.retry_seconds_to_sleep_list()
58 |     max_retries = conf.configurable_retry_policy_max_retries()
59 |     policy = ConfigurableRetryPolicy(times, max_retries)
60 | 
61 |     assert times[0] == policy.seconds_to_sleep(0)
62 |     assert times[0] == policy.seconds_to_sleep(1)
63 |     assert times[1] == policy.seconds_to_sleep(2)
64 |     assert times[2] == policy.seconds_to_sleep(3)
65 |     assert times[3] == policy.seconds_to_sleep(4)
66 |     assert times[4] == policy.seconds_to_sleep(5)
67 |     assert times[4] == policy.seconds_to_sleep(6)
68 |     assert times[4] == policy.seconds_to_sleep(7)
69 |     assert times[4] == policy.seconds_to_sleep(8)
70 |     assert times[4] == policy.seconds_to_sleep(9)
71 | 
72 |     # Check based on retry count
73 |     assert True is policy.should_retry(500, False, 0)
74 |     assert True is policy.should_retry(500, False, 7)
75 |     assert True is policy.should_retry(500, False, 8)
76 |     assert False is policy.should_retry(500, False, 9)
77 | 
78 |     # Check based on status code
79 |     assert False is policy.should_retry(201, False, 0)
80 |     assert False is policy.should_retry(201, False, 9)
81 | 
82 |     # Check based on error
83 |     assert True is policy.should_retry(201, True, 0)
84 |     assert True is policy.should_retry(201, True, 9)
85 | 
86 | 
87 | def test_with_negative_values():
88 |     times = [0.1, -1]
89 |     max_retries = 5
90 | 
91 |     try:
92 |         policy = ConfigurableRetryPolicy(times, max_retries)
93 |         assert False
94 |     except BadUserConfigurationException:
95 |         assert True
96 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/widget/utils.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | from .encoding import Encoding
  4 | from .autovizwidget import AutoVizWidget
  5 | 
  6 | 
  7 | def infer_vegalite_type(data):
  8 |     """
  9 |     From an array-like input, infer the correct vega typecode
 10 |     ('O', 'N', 'Q', or 'T')
 11 |     Parameters
 12 |     ----------
 13 |     data: Numpy array or Pandas Series
 14 |     """
 15 | 
 16 |     typ = pd.api.types.infer_dtype(data)
 17 | 
 18 |     if typ in [
 19 |         "floating",
 20 |         "mixed-integer-float",
 21 |         "integer",
 22 |         "mixed-integer",
 23 |         "complex",
 24 |     ]:
 25 |         typecode = "Q"
 26 |     elif typ in ["string", "bytes", "categorical", "boolean", "mixed", "unicode"]:
 27 |         typecode = "N"
 28 |     elif typ in [
 29 |         "datetime",
 30 |         "datetime64",
 31 |         "timedelta",
 32 |         "timedelta64",
 33 |         "date",
 34 |         "time",
 35 |         "period",
 36 |     ]:
 37 |         typecode = "T"
 38 |     else:
 39 |         typecode = "N"
 40 | 
 41 |     return typecode
 42 | 
 43 | 
 44 | def _validate_custom_order(order):
 45 |     assert len(order) == 4
 46 |     list_to_check = list(order)
 47 |     list_to_check.sort()
 48 |     assert list_to_check == ["N", "O", "Q", "T"]
 49 | 
 50 | 
 51 | def _classify_data_by_type(data, order, skip=None):
 52 |     """Get O, N, Q, or T vegalite type for all columns in data except if in skip."""
 53 |     if skip is None:
 54 |         skip = []
 55 | 
 56 |     d = dict()
 57 |     for typ in order:
 58 |         d[typ] = []
 59 | 
 60 |     for column_name in data:
 61 |         if column_name not in skip:
 62 |             typ = infer_vegalite_type(data[column_name])
 63 |             d[typ].append(column_name)
 64 | 
 65 |     return d
 66 | 
 67 | 
 68 | def select_x(data, order=None):
 69 |     """
 70 |     Helper function that does a best effort of selecting an automatic x axis.
 71 |     Returns None if it cannot find x axis.
 72 |     """
 73 |     if data is None:
 74 |         return None
 75 | 
 76 |     if len(data) < 1:
 77 |         return None
 78 | 
 79 |     if order is None:
 80 |         order = ["T", "O", "N", "Q"]
 81 |     else:
 82 |         _validate_custom_order(order)
 83 | 
 84 |     d = _classify_data_by_type(data, order)
 85 | 
 86 |     chosen_x = None
 87 |     for typ in order:
 88 |         if len(d[typ]) >= 1:
 89 |             chosen_x = d[typ][0]
 90 |             break
 91 | 
 92 |     return chosen_x
 93 | 
 94 | 
 95 | def select_y(data, x_name, order=None, aggregator=None):
 96 |     """
 97 |     Helper function that does a best effort of selecting an automatic y axis.
 98 |     It won't set the same axis that x is set to again.
 99 |     Returns None if it cannot find y axis.
100 |     """
101 |     if data is None:
102 |         return None
103 | 
104 |     if len(data) < 2:
105 |         return None
106 | 
107 |     if x_name is None:
108 |         return None
109 | 
110 |     if order is None:
111 |         order = ["Q", "O", "N", "T"]
112 |     else:
113 |         _validate_custom_order(order)
114 | 
115 |     d = _classify_data_by_type(data, order, [x_name])
116 | 
117 |     # Choose the first column found on the following order: Q, O, N, T
118 |     chosen_y = None
119 |     for typ in order:
120 |         if len(d[typ]) >= 1:
121 |             chosen_y = d[typ][0]
122 |             break
123 | 
124 |     return chosen_y
125 | 
126 | 
127 | def display_dataframe(df):
128 |     selected_x = select_x(df)
129 |     selected_y = select_y(df, selected_x)
130 |     encoding = Encoding(
131 |         chart_type=Encoding.chart_type_table,
132 |         x=selected_x,
133 |         y=selected_y,
134 |         y_aggregation=Encoding.y_agg_max,
135 |     )
136 |     return AutoVizWidget(df, encoding)
137 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_exceptions.py:
--------------------------------------------------------------------------------
  1 | from mock import MagicMock
  2 | import pytest
  3 | 
  4 | import sparkmagic.utils.configuration as conf
  5 | from sparkmagic.livyclientlib.exceptions import *
  6 | 
  7 | 
  8 | self = None
  9 | ipython_display = None
 10 | logger = None
 11 | 
 12 | 
 13 | def setup_function():
 14 |     global self, ipython_display, logger
 15 |     self = MagicMock()
 16 |     ipython_display = self.ipython_display
 17 |     logger = self.logger
 18 |     conf.override_all({})
 19 | 
 20 | 
 21 | def test_handle_expected_exceptions():
 22 |     mock_method = MagicMock()
 23 |     mock_method.__name__ = "MockMethod"
 24 |     decorated = handle_expected_exceptions(mock_method)
 25 |     assert decorated.__name__ == mock_method.__name__
 26 | 
 27 |     result = decorated(self, 1, 2, 3)
 28 |     assert result == mock_method.return_value
 29 |     assert ipython_display.send_error.call_count == 0
 30 |     mock_method.assert_called_once_with(self, 1, 2, 3)
 31 | 
 32 | 
 33 | def test_handle_expected_exceptions_handle():
 34 |     conf.override_all({"all_errors_are_fatal": False})
 35 |     mock_method = MagicMock(side_effect=LivyUnexpectedStatusException("ridiculous"))
 36 |     mock_method.__name__ = "MockMethod2"
 37 |     decorated = handle_expected_exceptions(mock_method)
 38 |     assert decorated.__name__ == mock_method.__name__
 39 | 
 40 |     result = decorated(self, 1, kwarg="foo")
 41 |     assert result is None
 42 |     assert ipython_display.send_error.call_count == 1
 43 |     mock_method.assert_called_once_with(self, 1, kwarg="foo")
 44 | 
 45 | 
 46 | def test_handle_expected_exceptions_throw():
 47 |     with pytest.raises(ValueError):
 48 |         mock_method = MagicMock(side_effect=ValueError("HALP"))
 49 |         mock_method.__name__ = "mock_meth"
 50 |         decorated = handle_expected_exceptions(mock_method)
 51 |         assert decorated.__name__ == mock_method.__name__
 52 | 
 53 |         _ = decorated(self, 1, kwarg="foo")
 54 | 
 55 | 
 56 | def test_handle_expected_exceptions_throws_if_all_errors_fatal():
 57 |     with pytest.raises(LivyUnexpectedStatusException):
 58 |         conf.override_all({"all_errors_are_fatal": True})
 59 |         mock_method = MagicMock(side_effect=LivyUnexpectedStatusException("Oh no!"))
 60 |         mock_method.__name__ = "mock_meth"
 61 |         decorated = handle_expected_exceptions(mock_method)
 62 |         assert decorated.__name__ == mock_method.__name__
 63 | 
 64 |         _ = decorated(self, 1, kwarg="foo")
 65 | 
 66 | 
 67 | # test wrap with unexpected to true
 68 | def test_wrap_unexpected_exceptions():
 69 |     mock_method = MagicMock()
 70 |     mock_method.__name__ = "tos"
 71 |     decorated = wrap_unexpected_exceptions(mock_method)
 72 |     assert decorated.__name__ == mock_method.__name__
 73 | 
 74 |     result = decorated(self, 0.0)
 75 |     assert result == mock_method.return_value
 76 |     assert ipython_display.send_error.call_count == 0
 77 |     mock_method.assert_called_once_with(self, 0.0)
 78 | 
 79 | 
 80 | def test_wrap_unexpected_exceptions_handle():
 81 |     mock_method = MagicMock(side_effect=ValueError("~~~~~~"))
 82 |     mock_method.__name__ = "tos"
 83 |     decorated = wrap_unexpected_exceptions(mock_method)
 84 |     assert decorated.__name__ == mock_method.__name__
 85 | 
 86 |     result = decorated(self, "FOOBAR", FOOBAR="FOOBAR")
 87 |     assert result is None
 88 |     assert ipython_display.send_error.call_count == 1
 89 |     mock_method.assert_called_once_with(self, "FOOBAR", FOOBAR="FOOBAR")
 90 | 
 91 | 
 92 | def test_wrap_unexpected_exceptions_throws_if_all_errors_fatal():
 93 |     with pytest.raises(ValueError):
 94 |         conf.override_all({"all_errors_are_fatal": True})
 95 |         mock_method = MagicMock(side_effect=ValueError("~~~~~~"))
 96 |         mock_method.__name__ = "tos"
 97 |         decorated = wrap_unexpected_exceptions(mock_method)
 98 |         assert decorated.__name__ == mock_method.__name__
 99 | 
100 |         _ = decorated(self, "FOOBAR", FOOBAR="FOOBAR")
101 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/controllerwidget/addendpointwidget.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2015  aggftw@gmail.com
  2 | # Distributed under the terms of the Modified BSD License.
  3 | import importlib
  4 | from sparkmagic.livyclientlib.endpoint import Endpoint
  5 | import sparkmagic.utils.configuration as conf
  6 | from sparkmagic.utils.constants import WIDGET_WIDTH
  7 | from .abstractmenuwidget import AbstractMenuWidget
  8 | 
  9 | 
 10 | class AddEndpointWidget(AbstractMenuWidget):
 11 |     def __init__(
 12 |         self,
 13 |         spark_controller,
 14 |         ipywidget_factory,
 15 |         ipython_display,
 16 |         endpoints,
 17 |         endpoints_dropdown_widget,
 18 |         refresh_method,
 19 |     ):
 20 |         # This is nested
 21 |         super(AddEndpointWidget, self).__init__(
 22 |             spark_controller, ipywidget_factory, ipython_display, True
 23 |         )
 24 |         self.endpoints = endpoints
 25 |         self.endpoints_dropdown_widget = endpoints_dropdown_widget
 26 |         self.refresh_method = refresh_method
 27 | 
 28 |         # map auth class path string to the instance of the class.
 29 |         self.auth_instances = {}
 30 |         for auth in conf.authenticators().values():
 31 |             module, class_name = (auth).rsplit(".", 1)
 32 |             events_handler_module = importlib.import_module(module)
 33 |             auth_class = getattr(events_handler_module, class_name)
 34 |             self.auth_instances[auth] = auth_class()
 35 | 
 36 |         dropdown_options = [(k, v) for k, v in conf.authenticators().items()]
 37 |         self.auth_type = self.ipywidget_factory.get_dropdown(
 38 |             options=dropdown_options, description="Auth type:"
 39 |         )
 40 | 
 41 |         # combine all authentication instance's widgets into one list to pass to self.children.
 42 |         self.all_widgets = list()
 43 |         for _class, instance in self.auth_instances.items():
 44 |             for widget in instance.widgets:
 45 |                 if _class == self.auth_type.value:
 46 |                     widget.layout.display = "flex"
 47 |                     self.auth = instance
 48 |                 else:
 49 |                     widget.layout.display = "none"
 50 |                 self.all_widgets.append(widget)
 51 | 
 52 |         # Submit widget
 53 |         self.submit_widget = self.ipywidget_factory.get_submit_button(
 54 |             description="Add endpoint"
 55 |         )
 56 | 
 57 |         self.auth_type.on_trait_change(self._update_auth)
 58 | 
 59 |         self.children = (
 60 |             [
 61 |                 self.ipywidget_factory.get_html(value="<br/>", width=WIDGET_WIDTH),
 62 |                 self.auth_type,
 63 |             ]
 64 |             + self.all_widgets
 65 |             + [
 66 |                 self.ipywidget_factory.get_html(value="<br/>", width=WIDGET_WIDTH),
 67 |                 self.submit_widget,
 68 |             ]
 69 |         )
 70 | 
 71 |         for child in self.children:
 72 |             child.parent_widget = self
 73 |         self._update_auth()
 74 | 
 75 |     def run(self):
 76 |         self.auth.update_with_widget_values()
 77 |         if self.auth_type.label == "None":
 78 |             endpoint = Endpoint(self.auth.url, None)
 79 |         else:
 80 |             endpoint = Endpoint(self.auth.url, self.auth)
 81 |         self.endpoints[self.auth.url] = endpoint
 82 |         self.ipython_display.writeln("Added endpoint {}".format(self.auth.url))
 83 |         try:
 84 |             # We need to call the refresh method because drop down in Tab 2 for endpoints wouldn't
 85 |             # refresh with the new value otherwise.
 86 |             self.refresh_method()
 87 |         except:
 88 |             self.endpoints.pop(self.auth.url, None)
 89 |             self.refresh_method()
 90 |             raise
 91 | 
 92 |     def _update_auth(self):
 93 |         """Create an instance of the chosen auth type maps to in the config
 94 |         file."""
 95 |         for widget in self.auth.widgets:
 96 |             widget.layout.display = "none"
 97 |         self.auth = self.auth_instances.get(self.auth_type.value)
 98 |         for widget in self.auth.widgets:
 99 |             widget.layout.display = "flex"
100 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/sessionmanager.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2015  aggftw@gmail.com
  2 | # Distributed under the terms of the Modified BSD License.
  3 | import atexit
  4 | from sparkmagic.utils.sparklogger import SparkLog
  5 | from sparkmagic.livyclientlib.exceptions import SessionManagementException
  6 | from sparkmagic.utils.constants import MAGICS_LOGGER_NAME
  7 | import sparkmagic.utils.configuration as conf
  8 | 
  9 | 
 10 | class SessionManager(object):
 11 |     def __init__(self, ipython_display):
 12 |         self.logger = SparkLog("SessionManager")
 13 |         self.ipython_display = ipython_display
 14 | 
 15 |         self._sessions = dict()
 16 | 
 17 |         self._register_cleanup_on_exit()
 18 | 
 19 |     @property
 20 |     def sessions(self):
 21 |         return self._sessions
 22 | 
 23 |     def get_sessions_list(self):
 24 |         return list(self._sessions.keys())
 25 | 
 26 |     def get_sessions_info(self):
 27 |         return [
 28 |             "Name: {}\t{}".format(k, str(self._sessions[k]))
 29 |             for k in list(self._sessions.keys())
 30 |         ]
 31 | 
 32 |     def add_session(self, name, session):
 33 |         if name in self._sessions:
 34 |             raise SessionManagementException(
 35 |                 "Session with name '{}' already exists. Please delete the session"
 36 |                 " first if you intend to replace it.".format(name)
 37 |             )
 38 | 
 39 |         self._sessions[name] = session
 40 | 
 41 |     def get_any_session(self):
 42 |         number_of_sessions = len(self._sessions)
 43 |         if number_of_sessions == 1:
 44 |             key = self.get_sessions_list()[0]
 45 |             return self._sessions[key]
 46 |         elif number_of_sessions == 0:
 47 |             raise SessionManagementException(
 48 |                 "You need to have at least 1 client created to execute commands."
 49 |             )
 50 |         else:
 51 |             raise SessionManagementException(
 52 |                 "Please specify the client to use. Possible sessions are {}".format(
 53 |                     self.get_sessions_list()
 54 |                 )
 55 |             )
 56 | 
 57 |     def get_session(self, name):
 58 |         if name in self._sessions:
 59 |             return self._sessions[name]
 60 |         raise SessionManagementException(
 61 |             "Could not find '{}' session in list of saved sessions. Possible sessions are {}".format(
 62 |                 name, self.get_sessions_list()
 63 |             )
 64 |         )
 65 | 
 66 |     def get_session_id_for_client(self, name):
 67 |         if name in self.get_sessions_list():
 68 |             return self._sessions[name].id
 69 |         return None
 70 | 
 71 |     def get_session_name_by_id_endpoint(self, id, endpoint):
 72 |         for name, session in self._sessions.items():
 73 |             if session.id == int(id) and session.endpoint == endpoint:
 74 |                 return name
 75 |         return None
 76 | 
 77 |     def delete_client(self, name):
 78 |         self._remove_session(name)
 79 | 
 80 |     def clean_up_all(self):
 81 |         for name in self.get_sessions_list():
 82 |             self._remove_session(name)
 83 | 
 84 |     def _remove_session(self, name):
 85 |         if name in self.get_sessions_list():
 86 |             self._sessions[name].delete()
 87 |             del self._sessions[name]
 88 |         else:
 89 |             raise SessionManagementException(
 90 |                 "Could not find '{}' session in list of saved sessions. Possible sessions are {}".format(
 91 |                     name, self.get_sessions_list()
 92 |                 )
 93 |             )
 94 | 
 95 |     def _register_cleanup_on_exit(self):
 96 |         """
 97 |         Stop the livy sessions before python process exits for any reason (if enabled in conf)
 98 |         """
 99 |         if conf.cleanup_all_sessions_on_exit():
100 | 
101 |             def cleanup_spark_sessions():
102 |                 try:
103 |                     self.clean_up_all()
104 |                 except Exception as e:
105 |                     self.logger.error(
106 |                         "Error cleaning up sessions on exit: {}".format(e)
107 |                     )
108 |                     pass
109 | 
110 |             atexit.register(cleanup_spark_sessions)
111 |             self.ipython_display.writeln("Cleaning up livy sessions on exit is enabled")
112 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/sendpandasdftosparkcommand.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Jupyter Development Team.
  2 | # Distributed under the terms of the Modified BSD License.
  3 | 
  4 | from sparkmagic.livyclientlib.sendtosparkcommand import SendToSparkCommand
  5 | from sparkmagic.livyclientlib.command import Command
  6 | from sparkmagic.livyclientlib.exceptions import BadUserDataException
  7 | 
  8 | import sparkmagic.utils.configuration as conf
  9 | 
 10 | import pandas as pd
 11 | 
 12 | 
 13 | class SendPandasDfToSparkCommand(SendToSparkCommand):
 14 |     # convert unicode to utf8 or pyspark will mark data as corrupted(and deserialize incorrectly)
 15 |     _python_decode = """
 16 |         import sys
 17 |         import json
 18 | 
 19 |         if sys.version_info.major == 2:
 20 |             def json_loads_byteified(json_text):
 21 |                 return _byteify(
 22 |                     json.loads(json_text, object_hook=_byteify),
 23 |                     ignore_dicts=True
 24 |                 )
 25 |         else:
 26 |             def json_loads_byteified(json_text):
 27 |                 return json.loads(json_text)
 28 | 
 29 |         def _byteify(data, ignore_dicts = False):
 30 |             if isinstance(data, unicode):
 31 |                 return data.encode('utf-8')
 32 |             if isinstance(data, list):
 33 |                 return [ _byteify(item, ignore_dicts=True) for item in data ]
 34 |             if isinstance(data, dict) and not ignore_dicts:
 35 |                 return {
 36 |                     _byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True)
 37 |                     for key, value in data.iteritems()
 38 |                 }
 39 |             return data
 40 |     """
 41 | 
 42 |     def __init__(
 43 |         self, input_variable_name, input_variable_value, output_variable_name, max_rows
 44 |     ):
 45 |         super(SendPandasDfToSparkCommand, self).__init__(
 46 |             input_variable_name, input_variable_value, output_variable_name
 47 |         )
 48 |         self.max_rows = max_rows
 49 | 
 50 |     def _scala_command(self, input_variable_name, pandas_df, output_variable_name):
 51 |         self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df)
 52 |         pandas_json = self._get_dataframe_as_json(pandas_df)
 53 | 
 54 |         scala_code = '''
 55 |         val rdd_json_array = spark.sparkContext.makeRDD("""{}""" :: Nil)
 56 |         val {} = spark.read.json(rdd_json_array)'''.format(
 57 |             pandas_json, output_variable_name
 58 |         )
 59 | 
 60 |         return Command(scala_code)
 61 | 
 62 |     def _pyspark_command(self, input_variable_name, pandas_df, output_variable_name):
 63 |         self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df)
 64 | 
 65 |         pyspark_code = self._python_decode
 66 | 
 67 |         pandas_json = self._get_dataframe_as_json(pandas_df)
 68 | 
 69 |         pyspark_code += """
 70 |         json_array = json_loads_byteified('{}')
 71 |         rdd_json_array = spark.sparkContext.parallelize(json_array)
 72 |         {} = spark.read.json(rdd_json_array)""".format(
 73 |             pandas_json, output_variable_name
 74 |         )
 75 | 
 76 |         return Command(pyspark_code)
 77 | 
 78 |     def _r_command(self, input_variable_name, pandas_df, output_variable_name):
 79 |         self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df)
 80 |         pandas_json = self._get_dataframe_as_json(pandas_df)
 81 | 
 82 |         r_code = """
 83 |         fileConn<-file("temporary_pandas_df_sparkmagics.txt")
 84 |         writeLines('{}', fileConn)
 85 |         close(fileConn)
 86 |         {} <- read.json("temporary_pandas_df_sparkmagics.txt")
 87 |         {}.persist()
 88 |         file.remove("temporary_pandas_df_sparkmagics.txt")""".format(
 89 |             pandas_json, output_variable_name, output_variable_name
 90 |         )
 91 | 
 92 |         return Command(r_code)
 93 | 
 94 |     def _get_dataframe_as_json(self, pandas_df):
 95 |         return pandas_df.head(self.max_rows).to_json(orient="records")
 96 | 
 97 |     def _assert_input_is_pandas_dataframe(
 98 |         self, input_variable_name, input_variable_value
 99 |     ):
100 |         if not isinstance(input_variable_value, pd.DataFrame):
101 |             wrong_type = input_variable_value.__class__.__name__
102 |             raise BadUserDataException(
103 |                 "{} is not a Pandas DataFrame! Got {} instead.".format(
104 |                     input_variable_name, wrong_type
105 |                 )
106 |             )
107 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/controllerwidget/magicscontrollerwidget.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2015  aggftw@gmail.com
  2 | # Distributed under the terms of the Modified BSD License.
  3 | from sparkmagic.controllerwidget.abstractmenuwidget import AbstractMenuWidget
  4 | from sparkmagic.controllerwidget.addendpointwidget import AddEndpointWidget
  5 | from sparkmagic.controllerwidget.manageendpointwidget import ManageEndpointWidget
  6 | from sparkmagic.controllerwidget.managesessionwidget import ManageSessionWidget
  7 | from sparkmagic.controllerwidget.createsessionwidget import CreateSessionWidget
  8 | from sparkmagic.livyclientlib.endpoint import Endpoint
  9 | from sparkmagic.utils.constants import LANGS_SUPPORTED
 10 | import sparkmagic.utils.configuration as conf
 11 | from sparkmagic.utils.utils import Namespace, initialize_auth
 12 | 
 13 | 
 14 | class MagicsControllerWidget(AbstractMenuWidget):
 15 |     def __init__(
 16 |         self, spark_controller, ipywidget_factory, ipython_display, endpoints=None
 17 |     ):
 18 |         super(MagicsControllerWidget, self).__init__(
 19 |             spark_controller, ipywidget_factory, ipython_display
 20 |         )
 21 | 
 22 |         if endpoints is None:
 23 |             endpoints = {
 24 |                 endpoint.url: endpoint for endpoint in self._get_default_endpoints()
 25 |             }
 26 |         self.endpoints = endpoints
 27 | 
 28 |         self._refresh()
 29 | 
 30 |     def run(self):
 31 |         pass
 32 | 
 33 |     @staticmethod
 34 |     def _get_default_endpoints():
 35 |         default_endpoints = set()
 36 | 
 37 |         for kernel_type in LANGS_SUPPORTED:
 38 |             endpoint_config = getattr(conf, "kernel_%s_credentials" % kernel_type)()
 39 |             if (
 40 |                 all([p in endpoint_config for p in ["url", "password", "username"]])
 41 |                 and endpoint_config["url"] != ""
 42 |             ):
 43 |                 user = endpoint_config["username"]
 44 |                 passwd = endpoint_config["password"]
 45 |                 args = Namespace(
 46 |                     user=user,
 47 |                     password=passwd,
 48 |                     auth=endpoint_config.get("auth", None),
 49 |                     url=endpoint_config.get("url", None),
 50 |                 )
 51 |                 auth_instance = initialize_auth(args)
 52 | 
 53 |                 default_endpoints.add(
 54 |                     Endpoint(
 55 |                         auth=auth_instance,
 56 |                         url=endpoint_config["url"],
 57 |                         implicitly_added=True,
 58 |                     )
 59 |                 )
 60 | 
 61 |         return default_endpoints
 62 | 
 63 |     def _refresh(self):
 64 |         dropdown_options = [(k, v) for k, v in self.endpoints.items()]
 65 |         self.endpoints_dropdown_widget = self.ipywidget_factory.get_dropdown(
 66 |             description="Endpoint:", options=dropdown_options
 67 |         )
 68 | 
 69 |         self.manage_session = ManageSessionWidget(
 70 |             self.spark_controller,
 71 |             self.ipywidget_factory,
 72 |             self.ipython_display,
 73 |             self._refresh,
 74 |         )
 75 |         self.create_session = CreateSessionWidget(
 76 |             self.spark_controller,
 77 |             self.ipywidget_factory,
 78 |             self.ipython_display,
 79 |             self.endpoints_dropdown_widget,
 80 |             self._refresh,
 81 |         )
 82 |         self.add_endpoint = AddEndpointWidget(
 83 |             self.spark_controller,
 84 |             self.ipywidget_factory,
 85 |             self.ipython_display,
 86 |             self.endpoints,
 87 |             self.endpoints_dropdown_widget,
 88 |             self._refresh,
 89 |         )
 90 |         self.manage_endpoint = ManageEndpointWidget(
 91 |             self.spark_controller,
 92 |             self.ipywidget_factory,
 93 |             self.ipython_display,
 94 |             self.endpoints,
 95 |             self._refresh,
 96 |         )
 97 | 
 98 |         self.tabs = self.ipywidget_factory.get_tab(
 99 |             children=[
100 |                 self.manage_session,
101 |                 self.create_session,
102 |                 self.add_endpoint,
103 |                 self.manage_endpoint,
104 |             ]
105 |         )
106 |         self.tabs.set_title(0, "Manage Sessions")
107 |         self.tabs.set_title(1, "Create Session")
108 |         self.tabs.set_title(2, "Add Endpoint")
109 |         self.tabs.set_title(3, "Manage Endpoints")
110 | 
111 |         self.children = [self.tabs]
112 | 
113 |         for child in self.children:
114 |             child.parent_widget = self
115 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | from ..widget import utils as utils
  4 | from ..widget.encoding import Encoding
  5 | 
  6 | 
  7 | df = None
  8 | encoding = None
  9 | 
 10 | 
 11 | def setup_function():
 12 |     global df, encoding
 13 | 
 14 |     records = [
 15 |         {
 16 |             "buildingID": 0,
 17 |             "date": "6/1/13",
 18 |             "temp_diff": 12,
 19 |             "mystr": "alejandro",
 20 |             "mystr2": "1",
 21 |         },
 22 |         {
 23 |             "buildingID": 1,
 24 |             "date": "6/1/13",
 25 |             "temp_diff": 0,
 26 |             "mystr": "alejandro",
 27 |             "mystr2": "1",
 28 |         },
 29 |         {
 30 |             "buildingID": 2,
 31 |             "date": "6/1/14",
 32 |             "temp_diff": 11,
 33 |             "mystr": "alejandro",
 34 |             "mystr2": "1",
 35 |         },
 36 |         {
 37 |             "buildingID": 0,
 38 |             "date": "6/1/15",
 39 |             "temp_diff": 5,
 40 |             "mystr": "alejandro",
 41 |             "mystr2": "1.0",
 42 |         },
 43 |         {
 44 |             "buildingID": 1,
 45 |             "date": "6/1/16",
 46 |             "temp_diff": 19,
 47 |             "mystr": "alejandro",
 48 |             "mystr2": "1",
 49 |         },
 50 |         {
 51 |             "buildingID": 2,
 52 |             "date": "6/1/17",
 53 |             "temp_diff": 32,
 54 |             "mystr": "alejandro",
 55 |             "mystr2": "1",
 56 |         },
 57 |     ]
 58 |     df = pd.DataFrame(records)
 59 | 
 60 |     encoding = Encoding(chart_type="table", x="date", y="temp_diff")
 61 | 
 62 | 
 63 | def teardown_function():
 64 |     pass
 65 | 
 66 | 
 67 | def test_on_render_viz():
 68 |     df["date"] = pd.to_datetime(df["date"])
 69 |     df["mystr2"] = pd.to_numeric(df["mystr2"])
 70 | 
 71 |     assert utils.infer_vegalite_type(df["buildingID"]) == "Q"
 72 |     assert utils.infer_vegalite_type(df["date"]) == "T"
 73 |     assert utils.infer_vegalite_type(df["temp_diff"]) == "Q"
 74 |     assert utils.infer_vegalite_type(df["mystr"]) == "N"
 75 |     assert utils.infer_vegalite_type(df["mystr2"]) == "Q"
 76 | 
 77 | 
 78 | def test_select_x():
 79 |     assert utils.select_x(None) is None
 80 | 
 81 |     def _check(d, expected):
 82 |         x = utils.select_x(d)
 83 |         assert x == expected
 84 | 
 85 |     data = dict(
 86 |         col1=[1.0, 2.0, 3.0],  # Q
 87 |         col2=["A", "B", "C"],  # N
 88 |         col3=pd.date_range("2012", periods=3, freq="A"),
 89 |     )  # T
 90 |     _check(data, "col3")
 91 | 
 92 |     data = dict(col1=[1.0, 2.0, 3.0], col2=["A", "B", "C"])  # Q  # N
 93 |     _check(data, "col2")
 94 | 
 95 |     data = dict(col1=[1.0, 2.0, 3.0])  # Q
 96 |     _check(data, "col1")
 97 | 
 98 |     # Custom order
 99 |     data = dict(
100 |         col1=[1.0, 2.0, 3.0],  # Q
101 |         col2=["A", "B", "C"],  # N
102 |         col3=pd.date_range("2012", periods=3, freq="A"),  # T
103 |         col4=pd.date_range("2012", periods=3, freq="A"),
104 |     )  # T
105 |     selected_x = utils.select_x(data, ["N", "T", "Q", "O"])
106 |     assert selected_x == "col2"
107 | 
108 |     # Len < 1
109 |     assert utils.select_x(dict()) is None
110 | 
111 | 
112 | def test_select_y():
113 |     def _check(d, expected):
114 |         x = "col1"
115 |         y = utils.select_y(d, x)
116 |         assert y == expected
117 | 
118 |     data = dict(
119 |         col1=[1.0, 2.0, 3.0],  # Chosen X
120 |         col2=["A", "B", "C"],  # N
121 |         col3=pd.date_range("2012", periods=3, freq="A"),  # T
122 |         col4=pd.date_range("2012", periods=3, freq="A"),  # T
123 |         col5=[1.0, 2.0, 3.0],
124 |     )  # Q
125 |     _check(data, "col5")
126 | 
127 |     data = dict(
128 |         col1=[1.0, 2.0, 3.0],  # Chosen X
129 |         col2=["A", "B", "C"],  # N
130 |         col3=pd.date_range("2012", periods=3, freq="A"),
131 |     )  # T
132 |     _check(data, "col2")
133 | 
134 |     data = dict(
135 |         col1=[1.0, 2.0, 3.0],  # Chosen X
136 |         col2=pd.date_range("2012", periods=3, freq="A"),
137 |     )  # T
138 |     _check(data, "col2")
139 | 
140 |     # No data
141 |     assert utils.select_y(None, "something") is None
142 | 
143 |     # Len < 2
144 |     assert utils.select_y(dict(col1=[1.0, 2.0, 3.0]), "something") is None
145 | 
146 |     # No x
147 |     assert utils.select_y(df, None) is None
148 | 
149 |     # Custom order
150 |     data = dict(
151 |         col1=[1.0, 2.0, 3.0],  # Chosen X
152 |         col2=["A", "B", "C"],  # N
153 |         col3=pd.date_range("2012", periods=3, freq="A"),  # T
154 |         col4=pd.date_range("2012", periods=3, freq="A"),  # T
155 |         col5=[1.0, 2.0, 3.0],  # Q
156 |         col6=[1.0, 2.0, 3.0],
157 |     )  # Q
158 |     selected_x = "col1"
159 |     selected_y = utils.select_y(data, selected_x, ["N", "T", "Q", "O"])
160 |     assert selected_y == "col2"
161 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_livyreliablehttpclient.py:
--------------------------------------------------------------------------------
  1 | from mock import MagicMock
  2 | 
  3 | from sparkmagic.livyclientlib.livyreliablehttpclient import LivyReliableHttpClient
  4 | from sparkmagic.livyclientlib.endpoint import Endpoint
  5 | import sparkmagic.utils.configuration as conf
  6 | import sparkmagic.utils.constants as constants
  7 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException
  8 | from sparkmagic.livyclientlib.configurableretrypolicy import ConfigurableRetryPolicy
  9 | from sparkmagic.livyclientlib.linearretrypolicy import LinearRetryPolicy
 10 | 
 11 | 
 12 | def test_post_statement():
 13 |     http_client = MagicMock()
 14 |     livy_client = LivyReliableHttpClient(http_client, None)
 15 |     data = {"adlfj": "sadflkjsdf"}
 16 |     out = livy_client.post_statement(100, data)
 17 |     assert out == http_client.post.return_value.json.return_value
 18 |     http_client.post.assert_called_once_with("/sessions/100/statements", [201], data)
 19 | 
 20 | 
 21 | def test_get_statement():
 22 |     http_client = MagicMock()
 23 |     livy_client = LivyReliableHttpClient(http_client, None)
 24 |     out = livy_client.get_statement(100, 4)
 25 |     assert out == http_client.get.return_value.json.return_value
 26 |     http_client.get.assert_called_once_with("/sessions/100/statements/4", [200])
 27 | 
 28 | 
 29 | def test_cancel_statement():
 30 |     http_client = MagicMock()
 31 |     livy_client = LivyReliableHttpClient(http_client, None)
 32 |     out = livy_client.cancel_statement(100, 104)
 33 |     assert out == http_client.post.return_value.json.return_value
 34 |     http_client.post.assert_called_once_with(
 35 |         "/sessions/100/statements/104/cancel", [200], {}
 36 |     )
 37 | 
 38 | 
 39 | def test_get_sessions():
 40 |     http_client = MagicMock()
 41 |     livy_client = LivyReliableHttpClient(http_client, None)
 42 |     out = livy_client.get_sessions()
 43 |     assert out == http_client.get.return_value.json.return_value
 44 |     http_client.get.assert_called_once_with("/sessions", [200])
 45 | 
 46 | 
 47 | def test_post_session():
 48 |     http_client = MagicMock()
 49 |     livy_client = LivyReliableHttpClient(http_client, None)
 50 |     properties = {"adlfj": "sadflkjsdf", 1: [2, 3, 4, 5]}
 51 |     out = livy_client.post_session(properties)
 52 |     assert out == http_client.post.return_value.json.return_value
 53 |     http_client.post.assert_called_once_with("/sessions", [201], properties)
 54 | 
 55 | 
 56 | def test_get_session():
 57 |     http_client = MagicMock()
 58 |     livy_client = LivyReliableHttpClient(http_client, None)
 59 |     out = livy_client.get_session(4)
 60 |     assert out == http_client.get.return_value.json.return_value
 61 |     http_client.get.assert_called_once_with("/sessions/4", [200])
 62 | 
 63 | 
 64 | def test_delete_session():
 65 |     http_client = MagicMock()
 66 |     livy_client = LivyReliableHttpClient(http_client, None)
 67 |     livy_client.delete_session(99)
 68 |     http_client.delete.assert_called_once_with("/sessions/99", [200, 404])
 69 | 
 70 | 
 71 | def test_get_all_session_logs():
 72 |     http_client = MagicMock()
 73 |     livy_client = LivyReliableHttpClient(http_client, None)
 74 |     out = livy_client.get_all_session_logs(42)
 75 |     assert out == http_client.get.return_value.json.return_value
 76 |     http_client.get.assert_called_once_with("/sessions/42/log?from=0", [200])
 77 | 
 78 | 
 79 | def test_custom_headers():
 80 |     custom_headers = {"header1": "value1"}
 81 |     overrides = {conf.custom_headers.__name__: custom_headers}
 82 |     conf.override_all(overrides)
 83 |     endpoint = Endpoint("http://url.com", None)
 84 |     client = LivyReliableHttpClient.from_endpoint(endpoint)
 85 |     headers = client.get_headers()
 86 |     assert len(headers) == 2
 87 |     assert ("Content-Type" in headers) == True
 88 |     assert ("header1" in headers) == True
 89 | 
 90 | 
 91 | def test_retry_policy():
 92 |     # Default is configurable retry
 93 |     times = conf.retry_seconds_to_sleep_list()
 94 |     max_retries = conf.configurable_retry_policy_max_retries()
 95 |     policy = LivyReliableHttpClient._get_retry_policy()
 96 |     assert type(policy) is ConfigurableRetryPolicy
 97 |     assert times == policy.retry_seconds_to_sleep_list
 98 |     assert max_retries == policy.max_retries
 99 | 
100 |     # Configure to linear retry
101 |     _override_policy(constants.LINEAR_RETRY)
102 |     policy = LivyReliableHttpClient._get_retry_policy()
103 |     assert type(policy) is LinearRetryPolicy
104 |     assert 5 == policy.seconds_to_sleep(1)
105 |     assert 5 == policy.max_retries
106 | 
107 |     # Configure to something invalid
108 |     _override_policy("garbage")
109 |     try:
110 |         policy = LivyReliableHttpClient._get_retry_policy()
111 |         assert False
112 |     except BadUserConfigurationException:
113 |         assert True
114 | 
115 | 
116 | def _override_policy(policy):
117 |     overrides = {conf.retry_policy.__name__: policy}
118 |     conf.override_all(overrides)
119 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/utils/constants.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2015  aggftw@gmail.com
  2 | # Distributed under the terms of the Modified BSD License.
  3 | 
  4 | import os
  5 | 
  6 | HOME_PATH = os.environ.get("SPARKMAGIC_CONF_DIR", "~/.sparkmagic")
  7 | CONFIG_FILE = os.environ.get("SPARKMAGIC_CONF_FILE", "config.json")
  8 | 
  9 | SESSION_KIND_SPARK = "spark"
 10 | SESSION_KIND_PYSPARK = "pyspark"
 11 | SESSION_KIND_SPARKR = "sparkr"
 12 | SESSION_KINDS_SUPPORTED = [
 13 |     SESSION_KIND_SPARK,
 14 |     SESSION_KIND_PYSPARK,
 15 |     SESSION_KIND_SPARKR,
 16 | ]
 17 | 
 18 | LIBRARY_LOADED_EVENT = "notebookLoaded"
 19 | CLUSTER_CHANGE_EVENT = "notebookClusterChange"
 20 | SESSION_CREATION_START_EVENT = "notebookSessionCreationStart"
 21 | SESSION_CREATION_END_EVENT = "notebookSessionCreationEnd"
 22 | SESSION_DELETION_START_EVENT = "notebookSessionDeletionStart"
 23 | SESSION_DELETION_END_EVENT = "notebookSessionDeletionEnd"
 24 | STATEMENT_EXECUTION_START_EVENT = "notebookStatementExecutionStart"
 25 | STATEMENT_EXECUTION_END_EVENT = "notebookStatementExecutionEnd"
 26 | SQL_EXECUTION_START_EVENT = "notebookSqlExecutionStart"
 27 | SQL_EXECUTION_END_EVENT = "notebookSqlExecutionEnd"
 28 | MAGIC_EXECUTION_START_EVENT = "notebookMagicExecutionStart"
 29 | MAGIC_EXECUTION_END_EVENT = "notebookMagicExecutionEnd"
 30 | 
 31 | CLUSTER_DNS_NAME = "ClusterDnsName"
 32 | SESSION_ID = "SessionId"
 33 | SESSION_GUID = "SessionGuid"
 34 | STATEMENT_ID = "StatementId"
 35 | STATEMENT_GUID = "StatementGuid"
 36 | SQL_GUID = "SqlGuid"
 37 | MAGIC_NAME = "MagicName"
 38 | MAGIC_GUID = "MagicGuid"
 39 | LIVY_KIND = "LivyKind"
 40 | STATUS = "Status"
 41 | SUCCESS = "Success"
 42 | EXCEPTION_TYPE = "ExceptionType"
 43 | EXCEPTION_MESSAGE = "ExceptionMessage"
 44 | SAMPLE_METHOD = "SampleMethod"
 45 | MAX_ROWS = "MaxRows"
 46 | SAMPLE_FRACTION = "SampleFraction"
 47 | ERROR_MESSAGE = "ErrorMessage"
 48 | STATUS_CODE = "StatusCode"
 49 | 
 50 | CONTEXT_NAME_SPARK = "spark"
 51 | CONTEXT_NAME_SQL = "sql"
 52 | 
 53 | LANG_SCALA = "scala"
 54 | LANG_PYTHON = "python"
 55 | LANG_R = "r"
 56 | LANGS_SUPPORTED = [LANG_SCALA, LANG_PYTHON, LANG_R]
 57 | 
 58 | LONG_RANDOM_VARIABLE_NAME = "yQeKOYBsFgLWWGWZJu3y"
 59 | 
 60 | WIDGET_WIDTH = "800px"
 61 | 
 62 | MAGICS_LOGGER_NAME = "magicsLogger"
 63 | 
 64 | # The list here https://livy.incubator.apache.org/docs/latest/rest-api.html
 65 | # appears incomplete; full list is in
 66 | # https://github.com/apache/incubator-livy/blob/master/core/src/main/scala/org/apache/livy/sessions/SessionState.scala:
 67 | IDLE_SESSION_STATUS = "idle"
 68 | ERROR_SESSION_STATUS = "error"
 69 | DEAD_SESSION_STATUS = "dead"
 70 | NOT_STARTED_SESSION_STATUS = "not_started"
 71 | STARTING_SESSION_STATUS = "starting"
 72 | BUSY_SESSION_STATUS = "busy"
 73 | SUCCESS_SESSION_STATUS = "success"
 74 | SHUT_DOWN_SESSION_STATUS = "shutting_down"
 75 | RUNNING_SESSION_STATUS = "running"
 76 | KILLED_SESSION_STATUS = "killed"
 77 | RECOVERING_SESSION_STATUS = "recovering"
 78 | 
 79 | POSSIBLE_SESSION_STATUS = [
 80 |     NOT_STARTED_SESSION_STATUS,
 81 |     IDLE_SESSION_STATUS,
 82 |     STARTING_SESSION_STATUS,
 83 |     BUSY_SESSION_STATUS,
 84 |     ERROR_SESSION_STATUS,
 85 |     DEAD_SESSION_STATUS,
 86 |     SUCCESS_SESSION_STATUS,
 87 |     SHUT_DOWN_SESSION_STATUS,
 88 |     RUNNING_SESSION_STATUS,
 89 |     KILLED_SESSION_STATUS,
 90 |     RECOVERING_SESSION_STATUS,
 91 | ]
 92 | FINAL_STATUS = [
 93 |     DEAD_SESSION_STATUS,
 94 |     ERROR_SESSION_STATUS,
 95 |     SUCCESS_SESSION_STATUS,
 96 |     KILLED_SESSION_STATUS,
 97 | ]
 98 | 
 99 | ERROR_STATEMENT_STATUS = "error"
100 | CANCELLED_STATEMENT_STATUS = "cancelled"
101 | AVAILABLE_STATEMENT_STATUS = "available"
102 | FINAL_STATEMENT_STATUS = [
103 |     ERROR_STATEMENT_STATUS,
104 |     CANCELLED_STATEMENT_STATUS,
105 |     AVAILABLE_STATEMENT_STATUS,
106 | ]
107 | 
108 | DELETE_SESSION_ACTION = "delete"
109 | START_SESSION_ACTION = "start"
110 | DO_NOTHING_ACTION = "nothing"
111 | 
112 | INTERNAL_ERROR_MSG = (
113 |     "An internal error was encountered.\n"
114 |     "Please file an issue at https://github.com/jupyter-incubator/sparkmagic\nError:\n{}"
115 | )
116 | EXPECTED_ERROR_MSG = "An error was encountered:\n{}"
117 | 
118 | YARN_RESOURCE_LIMIT_MSG = "Queue's AM resource limit exceeded."
119 | RESOURCE_LIMIT_WARNING = (
120 |     "Warning: The Spark session does not have enough YARN resources to start. {}"
121 | )
122 | COMMAND_INTERRUPTED_MSG = "Interrupted by user"
123 | COMMAND_CANCELLATION_FAILED_MSG = (
124 |     "Interrupted by user but Livy failed to cancel the Spark statement. "
125 |     "The Livy session might have become unusable."
126 | )
127 | 
128 | LIVY_HEARTBEAT_TIMEOUT_PARAM = "heartbeatTimeoutInSecond"
129 | LIVY_KIND_PARAM = "kind"
130 | 
131 | NO_AUTH = "None"
132 | AUTH_KERBEROS = "Kerberos"
133 | AUTH_BASIC = "Basic_Access"
134 | 
135 | CONFIGURABLE_RETRY = "configurable"
136 | LINEAR_RETRY = "linear"
137 | 
138 | MIMETYPE_IMAGE_PNG = "image/png"
139 | MIMETYPE_TEXT_HTML = "text/html"
140 | MIMETYPE_TEXT_PLAIN = "text/plain"
141 | MIMETYPE_APPLICATION_JSON = "application/json"
142 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_pd_data_coerce.py:
--------------------------------------------------------------------------------
  1 | from pandas.testing import assert_frame_equal
  2 | import pandas as pd
  3 | 
  4 | from sparkmagic.utils.utils import coerce_pandas_df_to_numeric_datetime
  5 | 
  6 | 
  7 | def test_no_coercing():
  8 |     records = [
  9 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"},
 10 |         {"buildingID": 1, "date": "random", "temp_diff": "0adsf"},
 11 |     ]
 12 |     desired_df = pd.DataFrame(records)
 13 | 
 14 |     df = pd.DataFrame(records)
 15 |     coerce_pandas_df_to_numeric_datetime(df)
 16 | 
 17 |     assert_frame_equal(desired_df, df)
 18 | 
 19 | 
 20 | def test_date_coercing():
 21 |     records = [
 22 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"},
 23 |         {"buildingID": 1, "date": "6/1/13", "temp_diff": "0adsf"},
 24 |     ]
 25 |     desired_df = pd.DataFrame(records)
 26 |     desired_df["date"] = pd.to_datetime(desired_df["date"])
 27 | 
 28 |     df = pd.DataFrame(records)
 29 |     coerce_pandas_df_to_numeric_datetime(df)
 30 | 
 31 |     assert_frame_equal(desired_df, df)
 32 | 
 33 | 
 34 | def test_date_coercing_none_values():
 35 |     records = [
 36 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"},
 37 |         {"buildingID": 1, "date": None, "temp_diff": "0adsf"},
 38 |     ]
 39 |     desired_df = pd.DataFrame(records)
 40 |     desired_df["date"] = pd.to_datetime(desired_df["date"])
 41 | 
 42 |     df = pd.DataFrame(records)
 43 |     coerce_pandas_df_to_numeric_datetime(df)
 44 | 
 45 |     assert_frame_equal(desired_df, df)
 46 | 
 47 | 
 48 | def test_date_none_values_and_no_coercing():
 49 |     records = [
 50 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"},
 51 |         {"buildingID": 1, "date": None, "temp_diff": "0adsf"},
 52 |         {"buildingID": 1, "date": "adsf", "temp_diff": "0adsf"},
 53 |     ]
 54 |     desired_df = pd.DataFrame(records)
 55 | 
 56 |     df = pd.DataFrame(records)
 57 |     coerce_pandas_df_to_numeric_datetime(df)
 58 | 
 59 |     assert_frame_equal(desired_df, df)
 60 | 
 61 | 
 62 | def test_numeric_coercing():
 63 |     records = [
 64 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"},
 65 |         {"buildingID": 1, "date": "adsf", "temp_diff": "0"},
 66 |     ]
 67 |     desired_df = pd.DataFrame(records)
 68 |     desired_df["temp_diff"] = pd.to_numeric(desired_df["temp_diff"])
 69 | 
 70 |     df = pd.DataFrame(records)
 71 |     coerce_pandas_df_to_numeric_datetime(df)
 72 | 
 73 |     assert_frame_equal(desired_df, df)
 74 | 
 75 | 
 76 | def test_numeric_coercing_none_values():
 77 |     records = [
 78 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"},
 79 |         {"buildingID": 1, "date": "asdf", "temp_diff": None},
 80 |     ]
 81 |     desired_df = pd.DataFrame(records)
 82 |     desired_df["temp_diff"] = pd.to_numeric(desired_df["temp_diff"])
 83 | 
 84 |     df = pd.DataFrame(records)
 85 |     coerce_pandas_df_to_numeric_datetime(df)
 86 | 
 87 |     assert_frame_equal(desired_df, df)
 88 | 
 89 | 
 90 | def test_numeric_none_values_and_no_coercing():
 91 |     records = [
 92 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"},
 93 |         {"buildingID": 1, "date": "asdf", "temp_diff": None},
 94 |         {"buildingID": 1, "date": "adsf", "temp_diff": "0asdf"},
 95 |     ]
 96 |     desired_df = pd.DataFrame(records)
 97 | 
 98 |     df = pd.DataFrame(records)
 99 |     coerce_pandas_df_to_numeric_datetime(df)
100 | 
101 |     assert_frame_equal(desired_df, df)
102 | 
103 | 
104 | def test_df_dict_does_not_throw():
105 |     json_str = """
106 | [{
107 |     "id": 580320,
108 |     "name": "COUSIN'S GRILL",
109 |     "results": "Fail",
110 |     "violations": "37. TOILET area.",
111 |     "words": ["37.",
112 |     "toilet",
113 |     "area."],
114 |     "features": {
115 |         "type": 0,
116 |         "size": 262144,
117 |         "indices": [0,
118 |         45,
119 |         97],
120 |         "values": [7.0,
121 |         5.0,
122 |         1.0]
123 |     },
124 |     "rawPrediction": {
125 |         "type": 1,
126 |         "values": [3.640841752791392,
127 |         -3.640841752791392]
128 |     },
129 |     "probability": {
130 |         "type": 1,
131 |         "values": [0.974440185187647,
132 |         0.025559814812352966]
133 |     },
134 |     "prediction": 0.0
135 | }]
136 | """
137 |     df = pd.read_json(json_str)
138 |     coerce_pandas_df_to_numeric_datetime(df)
139 | 
140 | 
141 | def test_overflow_coercing():
142 |     records = [{"_c0": "12345678901"}]
143 |     desired_df = pd.DataFrame(records)
144 |     desired_df["_c0"] = pd.to_numeric(desired_df["_c0"])
145 |     df = pd.DataFrame(records)
146 |     coerce_pandas_df_to_numeric_datetime(df)
147 |     assert_frame_equal(desired_df, df)
148 | 
149 | 
150 | def test_all_null_columns():
151 |     records = [{"_c0": "12345", "nulla": None}, {"_c0": "12345", "nulla": None}]
152 |     desired_df = pd.DataFrame(records)
153 |     desired_df["_c0"] = pd.to_numeric(desired_df["_c0"])
154 |     df = pd.DataFrame(records)
155 |     coerce_pandas_df_to_numeric_datetime(df)
156 |     assert_frame_equal(desired_df, df)
157 | 


--------------------------------------------------------------------------------
/autovizwidget/autovizwidget/tests/test_encodingwidget.py:
--------------------------------------------------------------------------------
  1 | from mock import MagicMock, call
  2 | from ipywidgets import Widget
  3 | import pandas as pd
  4 | 
  5 | from ..widget.encodingwidget import EncodingWidget
  6 | from ..widget.encoding import Encoding
  7 | 
  8 | 
  9 | df = None
 10 | encoding = None
 11 | ipywidget_factory = None
 12 | change_hook = None
 13 | 
 14 | 
 15 | def setup_function():
 16 |     global df, encoding, ipywidget_factory, change_hook
 17 | 
 18 |     records = [
 19 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": 12, "\u263A": True},
 20 |         {"buildingID": 1, "date": "6/1/13", "temp_diff": 0, "\u263A": True},
 21 |         {"buildingID": 2, "date": "6/1/14", "temp_diff": 11, "\u263A": True},
 22 |         {"buildingID": 0, "date": "6/1/15", "temp_diff": 5, "\u263A": True},
 23 |         {"buildingID": 1, "date": "6/1/16", "temp_diff": 19, "\u263A": True},
 24 |         {"buildingID": 2, "date": "6/1/17", "temp_diff": 32, "\u263A": True},
 25 |     ]
 26 |     df = pd.DataFrame(records)
 27 | 
 28 |     encoding = Encoding(chart_type="table", x="date", y="temp_diff")
 29 | 
 30 |     ipywidget_factory = MagicMock()
 31 |     ipywidget_factory.get_vbox.return_value = MagicMock(spec=Widget)
 32 | 
 33 |     change_hook = MagicMock()
 34 | 
 35 | 
 36 | def teardown_function():
 37 |     pass
 38 | 
 39 | 
 40 | def test_encoding_with_all_none_doesnt_throw():
 41 |     records = [
 42 |         {"buildingID": 0, "date": "6/1/13", "temp_diff": 12},
 43 |         {"buildingID": 1, "date": "6/1/13", "temp_diff": 0},
 44 |         {"buildingID": 2, "date": "6/1/14", "temp_diff": 11},
 45 |         {"buildingID": 0, "date": "6/1/15", "temp_diff": 5},
 46 |         {"buildingID": 1, "date": "6/1/16", "temp_diff": 19},
 47 |         {"buildingID": 2, "date": "6/1/17", "temp_diff": 32},
 48 |     ]
 49 |     df = pd.DataFrame(records)
 50 | 
 51 |     encoding = Encoding()
 52 | 
 53 |     ipywidget_factory = MagicMock()
 54 |     ipywidget_factory.get_vbox.return_value = MagicMock(spec=Widget)
 55 | 
 56 |     EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True)
 57 | 
 58 |     assert (
 59 |         call(
 60 |             description="X",
 61 |             value=None,
 62 |             options=[
 63 |                 ("-", None),
 64 |                 ("buildingID", "buildingID"),
 65 |                 ("date", "date"),
 66 |                 ("temp_diff", "temp_diff"),
 67 |             ],
 68 |         )
 69 |         in ipywidget_factory.get_dropdown.mock_calls
 70 |     )
 71 |     assert (
 72 |         call(
 73 |             description="Y",
 74 |             value=None,
 75 |             options=[
 76 |                 ("-", None),
 77 |                 ("buildingID", "buildingID"),
 78 |                 ("date", "date"),
 79 |                 ("temp_diff", "temp_diff"),
 80 |             ],
 81 |         )
 82 |         in ipywidget_factory.get_dropdown.mock_calls
 83 |     )
 84 |     assert (
 85 |         call(
 86 |             description="Func.",
 87 |             value="none",
 88 |             options=[
 89 |                 ("-", "None"),
 90 |                 ("Avg", "Avg"),
 91 |                 ("Min", "Min"),
 92 |                 ("Max", "Max"),
 93 |                 ("Sum", "Sum"),
 94 |                 ("Count", "Count"),
 95 |             ],
 96 |         )
 97 |         in ipywidget_factory.get_dropdown.mock_calls
 98 |     )
 99 | 
100 | 
101 | def test_value_for_aggregation():
102 |     widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True)
103 | 
104 |     assert widget._get_value_for_aggregation(None) == "none"
105 |     assert widget._get_value_for_aggregation("avg") == "avg"
106 | 
107 | 
108 | def test_x_changed_callback():
109 |     widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True)
110 | 
111 |     widget._x_changed_callback("name", "old", "new")
112 | 
113 |     assert encoding.x == "new"
114 |     assert change_hook.call_count == 1
115 | 
116 | 
117 | def test_y_changed_callback():
118 |     widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True)
119 | 
120 |     widget._y_changed_callback("name", "old", "new")
121 | 
122 |     assert encoding.y == "new"
123 |     assert change_hook.call_count == 1
124 | 
125 | 
126 | def test_y_agg__changed_callback():
127 |     widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True)
128 | 
129 |     widget._y_agg_changed_callback("name", "old", "new")
130 | 
131 |     assert encoding.y_aggregation == "new"
132 |     assert change_hook.call_count == 1
133 | 
134 | 
135 | def test_log_x_changed_callback():
136 |     widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True)
137 | 
138 |     widget._logarithmic_x_callback("name", "old", "new")
139 | 
140 |     assert encoding.logarithmic_x_axis == "new"
141 |     assert change_hook.call_count == 1
142 | 
143 | 
144 | def test_log_y_changed_callback():
145 |     widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True)
146 | 
147 |     widget._logarithmic_y_callback("name", "old", "new")
148 | 
149 |     assert encoding.logarithmic_y_axis == "new"
150 |     assert change_hook.call_count == 1
151 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_sendstringtosparkcommand.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import pytest
  3 | from mock import MagicMock
  4 | 
  5 | from sparkmagic.livyclientlib.sendstringtosparkcommand import SendStringToSparkCommand
  6 | from sparkmagic.livyclientlib.exceptions import BadUserDataException
  7 | from sparkmagic.livyclientlib.command import Command
  8 | import sparkmagic.utils.constants as constants
  9 | 
 10 | 
 11 | def test_send_to_scala():
 12 |     input_variable_name = "input"
 13 |     input_variable_value = "value"
 14 |     output_variable_name = "output"
 15 |     sparkcommand = SendStringToSparkCommand(
 16 |         input_variable_name, input_variable_value, output_variable_name
 17 |     )
 18 |     sparkcommand._scala_command = MagicMock(return_value=MagicMock())
 19 |     sparkcommand.to_command(
 20 |         constants.SESSION_KIND_SPARK,
 21 |         input_variable_name,
 22 |         input_variable_value,
 23 |         output_variable_name,
 24 |     )
 25 |     sparkcommand._scala_command.assert_called_with(
 26 |         input_variable_name, input_variable_value, output_variable_name
 27 |     )
 28 | 
 29 | 
 30 | def test_send_to_r():
 31 |     input_variable_name = "input"
 32 |     input_variable_value = "value"
 33 |     output_variable_name = "output"
 34 |     sparkcommand = SendStringToSparkCommand(
 35 |         input_variable_name, input_variable_value, output_variable_name
 36 |     )
 37 |     sparkcommand._r_command = MagicMock(return_value=MagicMock())
 38 |     sparkcommand.to_command(
 39 |         constants.SESSION_KIND_SPARKR,
 40 |         input_variable_name,
 41 |         input_variable_value,
 42 |         output_variable_name,
 43 |     )
 44 |     sparkcommand._r_command.assert_called_with(
 45 |         input_variable_name, input_variable_value, output_variable_name
 46 |     )
 47 | 
 48 | 
 49 | def test_send_to_pyspark():
 50 |     input_variable_name = "input"
 51 |     input_variable_value = "value"
 52 |     output_variable_name = "output"
 53 |     sparkcommand = SendStringToSparkCommand(
 54 |         input_variable_name, input_variable_value, output_variable_name
 55 |     )
 56 |     sparkcommand._pyspark_command = MagicMock(return_value=MagicMock())
 57 |     sparkcommand.to_command(
 58 |         constants.SESSION_KIND_PYSPARK,
 59 |         input_variable_name,
 60 |         input_variable_value,
 61 |         output_variable_name,
 62 |     )
 63 |     sparkcommand._pyspark_command.assert_called_with(
 64 |         input_variable_name, input_variable_value, output_variable_name
 65 |     )
 66 | 
 67 | 
 68 | def test_to_command_invalid():
 69 |     input_variable_name = "input"
 70 |     input_variable_value = 42
 71 |     output_variable_name = "output"
 72 |     sparkcommand = SendStringToSparkCommand(
 73 |         input_variable_name, input_variable_value, output_variable_name
 74 |     )
 75 |     with pytest.raises(
 76 |         BadUserDataException,
 77 |     ):
 78 |         sparkcommand.to_command(
 79 |             "invalid",
 80 |             input_variable_name,
 81 |             input_variable_value,
 82 |             output_variable_name,
 83 |         )
 84 | 
 85 | 
 86 | def test_should_raise_when_input_aint_a_string():
 87 |     input_variable_name = "input"
 88 |     input_variable_value = 42
 89 |     output_variable_name = "output"
 90 |     sparkcommand = SendStringToSparkCommand(
 91 |         input_variable_name, input_variable_value, output_variable_name
 92 |     )
 93 |     with pytest.raises(
 94 |         BadUserDataException,
 95 |     ):
 96 |         sparkcommand.to_command(
 97 |             "spark",
 98 |             input_variable_name,
 99 |             input_variable_value,
100 |             output_variable_name,
101 |         )
102 | 
103 | 
104 | def test_should_create_a_valid_scala_expression():
105 |     input_variable_name = "input"
106 |     input_variable_value = "value"
107 |     output_variable_name = "output"
108 |     sparkcommand = SendStringToSparkCommand(
109 |         input_variable_name, input_variable_value, output_variable_name
110 |     )
111 |     assert sparkcommand._scala_command(
112 |         input_variable_name, input_variable_value, output_variable_name
113 |     ) == Command('var {} = """{}"""'.format(output_variable_name, input_variable_value))
114 | 
115 | 
116 | def test_should_create_a_valid_python_expression():
117 |     input_variable_name = "input"
118 |     input_variable_value = "value"
119 |     output_variable_name = "output"
120 |     sparkcommand = SendStringToSparkCommand(
121 |         input_variable_name, input_variable_value, output_variable_name
122 |     )
123 |     assert sparkcommand._pyspark_command(
124 |         input_variable_name, input_variable_value, output_variable_name
125 |     ) == Command("{} = {}".format(output_variable_name, repr(input_variable_value)))
126 | 
127 | 
128 | def test_should_create_a_valid_r_expression():
129 |     input_variable_name = "input"
130 |     input_variable_value = "value"
131 |     output_variable_name = "output"
132 |     sparkcommand = SendStringToSparkCommand(
133 |         input_variable_name, input_variable_value, output_variable_name
134 |     )
135 |     assert sparkcommand._r_command(
136 |         input_variable_name, input_variable_value, output_variable_name
137 |     ) == Command(
138 |         """assign("{}","{}")""".format(output_variable_name, input_variable_value)
139 |     )
140 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # Distributed under the terms of the Modified BSD License.
  2 | from IPython.core.error import UsageError
  3 | from IPython.core.magic_arguments import parse_argstring
  4 | import numpy as np
  5 | import pandas as pd
  6 | import json
  7 | import importlib
  8 | from collections import OrderedDict
  9 | 
 10 | import sparkmagic.utils.configuration as conf
 11 | import sparkmagic.utils.constants as constants
 12 | from sparkmagic.livyclientlib.exceptions import (
 13 |     BadUserDataException,
 14 |     DataFrameParseException,
 15 |     BadUserConfigurationException,
 16 | )
 17 | 
 18 | 
 19 | def get_coerce_value(coerce):
 20 |     if coerce is not None:
 21 |         coerce = coerce.lower() in ("yes", "true", "t", "y", "1")
 22 |     return coerce
 23 | 
 24 | 
 25 | def parse_argstring_or_throw(magic_func, argstring, parse_argstring=parse_argstring):
 26 |     """An alternative to the parse_argstring method from IPython.core.magic_arguments.
 27 |     Catches IPython.core.error.UsageError and propagates it as a
 28 |     livyclientlib.exceptions.BadUserDataException."""
 29 |     try:
 30 |         return parse_argstring(magic_func, argstring)
 31 |     except UsageError as e:
 32 |         raise BadUserDataException(str(e))
 33 | 
 34 | 
 35 | def coerce_pandas_df_to_numeric_datetime(df):
 36 |     for column_name in df.columns:
 37 |         coerced = False
 38 | 
 39 |         if df[column_name].isnull().all():
 40 |             continue
 41 | 
 42 |         if not coerced and df[column_name].dtype == np.dtype("object"):
 43 |             try:
 44 |                 df[column_name] = pd.to_datetime(df[column_name], errors="raise")
 45 |                 coerced = True
 46 |             except (ValueError, TypeError, OverflowError):
 47 |                 pass
 48 | 
 49 |         if not coerced and df[column_name].dtype == np.dtype("object"):
 50 |             try:
 51 |                 df[column_name] = pd.to_numeric(df[column_name], errors="raise")
 52 |                 coerced = True
 53 |             except (ValueError, TypeError):
 54 |                 pass
 55 | 
 56 | 
 57 | def records_to_dataframe(records_text, kind, coerce=None):
 58 |     if records_text in ["", "[]"]:
 59 |         strings = []
 60 |     else:
 61 |         strings = records_text.strip().split("\n")
 62 |     try:
 63 |         data_array = [
 64 |             json.JSONDecoder(object_pairs_hook=OrderedDict).decode(s) for s in strings
 65 |         ]
 66 | 
 67 |         if kind == constants.SESSION_KIND_SPARKR and len(data_array) > 0:
 68 |             data_array = data_array[0]
 69 | 
 70 |         df = pd.DataFrame(data_array)
 71 | 
 72 |         if len(data_array) > 0:
 73 |             # This will assign the columns in the right order. If we simply did
 74 |             # df = pd.DataFrame(data_array, columns=data_array[0].keys())
 75 |             # in the code defining df, above, we could get an issue where the first element
 76 |             # has some columns as null, and thus would drop the columns from the df altogether.
 77 |             # Refer to https://github.com/jupyter-incubator/sparkmagic/issues/346 for
 78 |             # more details.
 79 |             for data in data_array:
 80 |                 if len(data.keys()) == len(df.columns):
 81 |                     df = df[list(data.keys())]
 82 |                     break
 83 | 
 84 |         if coerce is None:
 85 |             coerce = conf.coerce_dataframe()
 86 |         if coerce:
 87 |             coerce_pandas_df_to_numeric_datetime(df)
 88 | 
 89 |         return df
 90 |     except ValueError:
 91 |         raise DataFrameParseException(
 92 |             "Cannot parse object as JSON: '{}'".format(strings)
 93 |         )
 94 | 
 95 | 
 96 | def get_sessions_info_html(info_sessions, current_session_id):
 97 |     html = (
 98 |         """<table>
 99 | <tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>User</th><th>Current session?</th></tr>"""
100 |         + "".join(
101 |             [session.get_row_html(current_session_id) for session in info_sessions]
102 |         )
103 |         + "</table>"
104 |     )
105 | 
106 |     return html
107 | 
108 | 
109 | def initialize_auth(args):
110 |     """Creates an authenticatior class instance for the given auth type
111 | 
112 |     Args:
113 |         args (IPython.core.magics.namespace): The namespace object that is created from
114 |         parsing %spark magic command
115 | 
116 |     Returns:
117 |         An instance of a valid Authenticator or None if args.auth is 'None'
118 | 
119 |     Raises:
120 |         sparkmagic.livyclientlib.BadUserConfigurationException: if args.auth is not a valid
121 |         authenticator class.
122 |     """
123 |     if args.auth is None:
124 |         auth = conf.get_auth_value(args.user, args.password)
125 |     else:
126 |         auth = args.auth
127 |     if auth == constants.NO_AUTH:
128 |         return None
129 |     else:
130 |         full_class = conf.authenticators().get(auth)
131 |         if full_class is None:
132 |             raise BadUserConfigurationException("Auth '{}' not supported".format(auth))
133 |         module, class_name = (full_class).rsplit(".", 1)
134 |         events_handler_module = importlib.import_module(module)
135 |         auth_class = getattr(events_handler_module, class_name)
136 |         return auth_class(args)
137 | 
138 | 
139 | class Namespace:
140 |     """Namespace to initialize authenticator class with"""
141 | 
142 |     def __init__(self, **kwargs):
143 |         self.__dict__.update(kwargs)
144 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/livyclientlib/reliablehttpclient.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2015  aggftw@gmail.com
  2 | # Distributed under the terms of the Modified BSD License.
  3 | import json
  4 | from time import sleep
  5 | import requests
  6 | import sparkmagic.utils.configuration as conf
  7 | from sparkmagic.utils.sparklogger import SparkLog
  8 | from .exceptions import HttpClientException, HttpSessionAdapterConfigException
  9 | import importlib
 10 | 
 11 | 
 12 | class ReliableHttpClient(object):
 13 |     """Http client that is reliable in its requests. Uses requests library."""
 14 | 
 15 |     def __init__(self, endpoint, headers, retry_policy):
 16 |         self._endpoint = endpoint
 17 |         self._headers = headers
 18 |         self._retry_policy = retry_policy
 19 |         self._auth = self._endpoint.auth
 20 |         self._session = requests.Session()
 21 |         self.logger = SparkLog("ReliableHttpClient")
 22 |         self.verify_ssl = not conf.ignore_ssl_errors()
 23 |         if not self.verify_ssl:
 24 |             self.logger.debug(
 25 |                 "ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
 26 |             )
 27 |             requests.packages.urllib3.disable_warnings()
 28 |         self._set_http_session_config()
 29 | 
 30 |     def _set_http_session_config(self):
 31 |         http_session_config = conf.http_session_config()
 32 |         if http_session_config and http_session_config.get("adapters"):
 33 |             self._set_http_session_adapters(http_session_config["adapters"])
 34 | 
 35 |     def _set_http_session_adapters(self, adapters):
 36 |         for adapter in adapters:
 37 |             full_class = adapter.get("adapter")
 38 |             adapter_prefix = adapter.get("prefix")
 39 |             if full_class is None or adapter_prefix is None:
 40 |                 raise HttpSessionAdapterConfigException(
 41 |                     "Invalid http session adapter config, prefix: {} or class: {} "
 42 |                     "not defined correctly".format(adapter_prefix, full_class)
 43 |                 )
 44 |             module, class_name = full_class.rsplit(".", 1)
 45 |             adapter_module = importlib.import_module(module)
 46 |             adapter_class = getattr(adapter_module, class_name)
 47 |             self._session.mount(adapter_prefix, adapter_class())
 48 | 
 49 |     def get_headers(self):
 50 |         return self._headers
 51 | 
 52 |     def compose_url(self, relative_url):
 53 |         r_u = "/{}".format(relative_url.rstrip("/").lstrip("/"))
 54 |         return self._endpoint.url + r_u
 55 | 
 56 |     def get(self, relative_url, accepted_status_codes):
 57 |         """Sends a get request. Returns a response."""
 58 |         return self._send_request(
 59 |             relative_url, accepted_status_codes, self._session.get
 60 |         )
 61 | 
 62 |     def post(self, relative_url, accepted_status_codes, data):
 63 |         """Sends a post request. Returns a response."""
 64 |         return self._send_request(
 65 |             relative_url, accepted_status_codes, self._session.post, data
 66 |         )
 67 | 
 68 |     def delete(self, relative_url, accepted_status_codes):
 69 |         """Sends a delete request. Returns a response."""
 70 |         return self._send_request(
 71 |             relative_url, accepted_status_codes, self._session.delete
 72 |         )
 73 | 
 74 |     def _send_request(self, relative_url, accepted_status_codes, function, data=None):
 75 |         return self._send_request_helper(
 76 |             self.compose_url(relative_url), accepted_status_codes, function, data, 0
 77 |         )
 78 | 
 79 |     def _send_request_helper(
 80 |         self, url, accepted_status_codes, function, data, retry_count
 81 |     ):
 82 |         while True:
 83 |             try:
 84 |                 if data is None:
 85 |                     r = function(
 86 |                         url,
 87 |                         headers=self._headers,
 88 |                         auth=self._auth,
 89 |                         verify=self.verify_ssl,
 90 |                     )
 91 |                 else:
 92 |                     r = function(
 93 |                         url,
 94 |                         headers=self._headers,
 95 |                         auth=self._auth,
 96 |                         data=json.dumps(data),
 97 |                         verify=self.verify_ssl,
 98 |                     )
 99 |             except requests.exceptions.RequestException as e:
100 |                 error = True
101 |                 r = None
102 |                 status = None
103 |                 text = None
104 | 
105 |                 self.logger.error("Request to '{}' failed with '{}'".format(url, e))
106 |             else:
107 |                 error = False
108 |                 status = r.status_code
109 |                 text = r.text
110 | 
111 |             if error or status not in accepted_status_codes:
112 |                 if self._retry_policy.should_retry(status, error, retry_count):
113 |                     sleep(self._retry_policy.seconds_to_sleep(retry_count))
114 |                     retry_count += 1
115 |                     continue
116 | 
117 |                 if error:
118 |                     raise HttpClientException(
119 |                         "Error sending http request and maximum retry encountered."
120 |                     )
121 |                 else:
122 |                     raise HttpClientException(
123 |                         "Invalid status code '{}' from {} with error payload: {}".format(
124 |                             status, url, text
125 |                         )
126 |                     )
127 |             return r
128 | 


--------------------------------------------------------------------------------
/sparkmagic/sparkmagic/tests/test_sessionmanager.py:
--------------------------------------------------------------------------------
  1 | import atexit
  2 | import pytest
  3 | from mock import MagicMock, PropertyMock
  4 | 
  5 | import sparkmagic.utils.configuration as conf
  6 | from sparkmagic.livyclientlib.exceptions import SessionManagementException
  7 | from sparkmagic.livyclientlib.sessionmanager import SessionManager
  8 | 
  9 | 
 10 | def test_get_client_throws_when_client_not_exists():
 11 |     with pytest.raises(SessionManagementException):
 12 |         manager = get_session_manager()
 13 |         manager.get_session("name")
 14 | 
 15 | 
 16 | def test_get_client():
 17 |     client = MagicMock()
 18 |     manager = get_session_manager()
 19 | 
 20 |     manager.add_session("name", client)
 21 | 
 22 |     assert client == manager.get_session("name")
 23 | 
 24 | 
 25 | def test_delete_client():
 26 |     with pytest.raises(SessionManagementException):
 27 |         client = MagicMock()
 28 |         manager = get_session_manager()
 29 | 
 30 |         manager.add_session("name", client)
 31 |         manager.delete_client("name")
 32 | 
 33 |         manager.get_session("name")
 34 | 
 35 | 
 36 | def test_delete_client_throws_when_client_not_exists():
 37 |     with pytest.raises(SessionManagementException):
 38 |         manager = get_session_manager()
 39 | 
 40 |         manager.delete_client("name")
 41 | 
 42 | 
 43 | def test_add_client_throws_when_client_exists():
 44 |     with pytest.raises(SessionManagementException):
 45 |         client = MagicMock()
 46 |         manager = get_session_manager()
 47 | 
 48 |         manager.add_session("name", client)
 49 |         manager.add_session("name", client)
 50 | 
 51 | 
 52 | def test_client_names_returned():
 53 |     client = MagicMock()
 54 |     manager = get_session_manager()
 55 | 
 56 |     manager.add_session("name0", client)
 57 |     manager.add_session("name1", client)
 58 | 
 59 |     assert {"name0", "name1"} == set(manager.get_sessions_list())
 60 | 
 61 | 
 62 | def test_get_any_client():
 63 |     client = MagicMock()
 64 |     manager = get_session_manager()
 65 | 
 66 |     manager.add_session("name", client)
 67 | 
 68 |     assert client == manager.get_any_session()
 69 | 
 70 | 
 71 | def test_get_any_client_raises_exception_with_no_client():
 72 |     with pytest.raises(SessionManagementException):
 73 |         manager = get_session_manager()
 74 | 
 75 |         manager.get_any_session()
 76 | 
 77 | 
 78 | def test_get_any_client_raises_exception_with_two_clients():
 79 |     with pytest.raises(SessionManagementException):
 80 |         client = MagicMock()
 81 |         manager = get_session_manager()
 82 |         manager.add_session("name0", client)
 83 |         manager.add_session("name1", client)
 84 | 
 85 |         manager.get_any_session()
 86 | 
 87 | 
 88 | def test_clean_up():
 89 |     client0 = MagicMock()
 90 |     client1 = MagicMock()
 91 |     manager = get_session_manager()
 92 |     manager.add_session("name0", client0)
 93 |     manager.add_session("name1", client1)
 94 | 
 95 |     manager.clean_up_all()
 96 | 
 97 |     client0.delete.assert_called_once_with()
 98 |     client1.delete.assert_called_once_with()
 99 | 
100 | 
101 | def test_cleanup_all_sessions_on_exit():
102 |     conf.override(conf.cleanup_all_sessions_on_exit.__name__, True)
103 |     client0 = MagicMock()
104 |     client1 = MagicMock()
105 |     manager = get_session_manager()
106 |     manager.add_session("name0", client0)
107 |     manager.add_session("name1", client1)
108 | 
109 |     atexit._run_exitfuncs()
110 | 
111 |     client0.delete.assert_called_once_with()
112 |     client1.delete.assert_called_once_with()
113 |     manager.ipython_display.writeln.assert_called_once_with(
114 |         "Cleaning up livy sessions on exit is enabled"
115 |     )
116 | 
117 | 
118 | def test_cleanup_all_sessions_on_exit_fails():
119 |     """Cleanup on exit is best effort only.
120 | 
121 |     When cleanup fails, exception is caught and error is logged.
122 |     """
123 |     conf.override(conf.cleanup_all_sessions_on_exit.__name__, True)
124 |     client0 = MagicMock()
125 |     client1 = MagicMock()
126 |     client0.delete.side_effect = Exception("Mocked exception for client1.delete")
127 |     manager = get_session_manager()
128 |     manager.add_session("name0", client0)
129 |     manager.add_session("name1", client1)
130 | 
131 |     atexit._run_exitfuncs()
132 | 
133 |     client0.delete.assert_called_once_with()
134 |     client1.delete.assert_not_called()
135 | 
136 | 
137 | def test_get_session_id_for_client():
138 |     manager = get_session_manager()
139 |     manager.get_sessions_list = MagicMock(return_value=["name"])
140 |     manager._sessions["name"] = MagicMock()
141 | 
142 |     id = manager.get_session_id_for_client("name")
143 | 
144 |     assert id is not None
145 | 
146 | 
147 | def test_get_session_name_by_id_endpoint():
148 |     manager = get_session_manager()
149 |     id_to_search = "0"
150 |     endpoint_to_search = "endpoint"
151 |     name_to_search = "name"
152 | 
153 |     name = manager.get_session_name_by_id_endpoint(id_to_search, endpoint_to_search)
154 |     assert None == name
155 | 
156 |     session = MagicMock()
157 |     type(session).id = PropertyMock(return_value=int(id_to_search))
158 |     session.endpoint = endpoint_to_search
159 | 
160 |     manager.add_session(name_to_search, session)
161 |     name = manager.get_session_name_by_id_endpoint(id_to_search, endpoint_to_search)
162 |     assert name_to_search == name
163 | 
164 | 
165 | def test_get_session_id_for_client_not_there():
166 |     manager = get_session_manager()
167 |     manager.get_sessions_list = MagicMock(return_value=[])
168 | 
169 |     id = manager.get_session_id_for_client("name")
170 | 
171 |     assert id is None
172 | 
173 | 
174 | def get_session_manager():
175 |     ipython_display = MagicMock()
176 |     return SessionManager(ipython_display)
177 | 


--------------------------------------------------------------------------------