├── config.json ├── sparkmagic ├── sparkmagic │ ├── auth │ │ ├── __init__.py │ │ ├── kerberos.py │ │ ├── customauth.py │ │ └── basic.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_endpoint.py │ │ ├── test_kernels.py │ │ ├── test_heartbeatthread.py │ │ ├── test_usercodeparser.py │ │ ├── test_configurableretrypolicy.py │ │ ├── test_exceptions.py │ │ ├── test_livyreliablehttpclient.py │ │ ├── test_pd_data_coerce.py │ │ ├── test_sendstringtosparkcommand.py │ │ └── test_sessionmanager.py │ ├── utils │ │ ├── __init__.py │ │ ├── sparklogger.py │ │ ├── constants.py │ │ └── utils.py │ ├── livyclientlib │ │ ├── __init__.py │ │ ├── linearretrypolicy.py │ │ ├── endpoint.py │ │ ├── configurableretrypolicy.py │ │ ├── sendstringtosparkcommand.py │ │ ├── sendtosparkcommand.py │ │ ├── livyreliablehttpclient.py │ │ ├── sessionmanager.py │ │ ├── sendpandasdftosparkcommand.py │ │ └── reliablehttpclient.py │ ├── controllerwidget │ │ ├── __init__.py │ │ ├── abstractmenuwidget.py │ │ ├── createsessionwidget.py │ │ ├── managesessionwidget.py │ │ ├── addendpointwidget.py │ │ └── magicscontrollerwidget.py │ ├── serverextension │ │ └── __init__.py │ ├── kernels │ │ ├── wrapperkernel │ │ │ ├── __init__.py │ │ │ └── usercodeparser.py │ │ ├── pysparkkernel │ │ │ ├── __init__.py │ │ │ ├── kernel.json │ │ │ ├── kernel.js │ │ │ └── pysparkkernel.py │ │ ├── sparkkernel │ │ │ ├── __init__.py │ │ │ ├── kernel.json │ │ │ ├── kernel.js │ │ │ └── sparkkernel.py │ │ ├── sparkrkernel │ │ │ ├── __init__.py │ │ │ ├── kernel.json │ │ │ ├── kernel.js │ │ │ └── sparkrkernel.py │ │ └── __init__.py │ ├── magics │ │ └── __init__.py │ └── __init__.py ├── setup.cfg ├── MANIFEST.in ├── requirements.txt ├── example_config.json └── setup.py ├── autovizwidget ├── autovizwidget │ ├── tests │ │ ├── __init__.py │ │ ├── test_plotlygraphrenderer.py │ │ ├── test_sparkevents.py │ │ ├── test_utils.py │ │ └── test_encodingwidget.py │ ├── utils │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── events.py │ │ └── configuration.py │ ├── widget │ │ ├── __init__.py │ │ ├── invalidencodingerror.py │ │ ├── encoding.py │ │ └── utils.py │ ├── plotlygraphs │ │ ├── __init__.py │ │ ├── scattergraph.py │ │ ├── bargraph.py │ │ ├── linegraph.py │ │ ├── areagraph.py │ │ ├── datagraph.py │ │ ├── graphrenderer.py │ │ └── piegraph.py │ └── __init__.py ├── MANIFEST.in ├── setup.cfg ├── README.md ├── requirements.txt ├── examples │ ├── Capture0.PNG │ └── Capture1.PNG ├── setup.py └── LICENSE.md ├── hdijupyterutils ├── hdijupyterutils │ ├── tests │ │ ├── __init__.py │ │ ├── test_ipythondisplay.py │ │ ├── test_events.py │ │ ├── test_filesystemreaderwriter.py │ │ ├── test_configuration.py │ │ └── test_logger.py │ ├── __init__.py │ ├── guid.py │ ├── constants.py │ ├── eventshandler.py │ ├── events.py │ ├── utils.py │ ├── ipythondisplay.py │ ├── filehandler.py │ ├── filesystemreaderwriter.py │ ├── log.py │ ├── ipywidgetfactory.py │ └── configuration.py ├── MANIFEST.in ├── setup.cfg ├── requirements.txt ├── README.md ├── setup.py └── LICENSE.md ├── screenshots ├── help.png ├── autoviz.png ├── diagram.png ├── matplotlib.png └── sparkcontext.png ├── examples └── images │ ├── cleanup.PNG │ ├── widget.PNG │ ├── addendpoint.PNG │ ├── addsession.PNG │ └── addsession_s.PNG ├── helm ├── charts │ └── sparkmagic-0.1.0.tgz ├── values.yaml ├── templates │ ├── jupyter-service.yaml │ ├── sparkmagic-service.yaml │ ├── sparkmagic-deployment.yaml │ └── jupyter-deployment.yaml └── Chart.yaml ├── .git-blame-ignore-revs ├── .github ├── workflows │ ├── lint.yaml │ ├── tests.yml │ ├── release.yml │ ├── publish.yml │ └── docker_build.yml ├── dependabot.yml ├── pull_request_template.md └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── RELEASING.md ├── SECURITY.md ├── docker-compose.yml ├── .bumpversion.cfg ├── pyproject.toml ├── .gitignore ├── Dockerfile.jupyter ├── Dockerfile.spark ├── .vscode └── tasks.json ├── LICENSE.md └── CODE_OF_CONDUCT.md /config.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/auth/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/widget/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/controllerwidget/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/serverextension/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autovizwidget/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/wrapperkernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hdijupyterutils/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | 3 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.23.0" 2 | -------------------------------------------------------------------------------- /autovizwidget/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /sparkmagic/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.23.0" 2 | -------------------------------------------------------------------------------- /hdijupyterutils/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/pysparkkernel/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkkernel/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkrkernel/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from sparkmagic.kernels.kernelmagics import * 2 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/magics/__init__.py: -------------------------------------------------------------------------------- 1 | from sparkmagic.magics.remotesparkmagics import * 2 | -------------------------------------------------------------------------------- /autovizwidget/README.md: -------------------------------------------------------------------------------- 1 | # autovizwidget 2 | 3 | An Auto-Visualization library for pandas dataframes -------------------------------------------------------------------------------- /screenshots/help.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/help.png -------------------------------------------------------------------------------- /sparkmagic/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include sparkmagic/kernels *.js *.json 2 | include requirements.txt 3 | -------------------------------------------------------------------------------- /screenshots/autoviz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/autoviz.png -------------------------------------------------------------------------------- /screenshots/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/diagram.png -------------------------------------------------------------------------------- /examples/images/cleanup.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/cleanup.PNG -------------------------------------------------------------------------------- /examples/images/widget.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/widget.PNG -------------------------------------------------------------------------------- /screenshots/matplotlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/matplotlib.png -------------------------------------------------------------------------------- /screenshots/sparkcontext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/screenshots/sparkcontext.png -------------------------------------------------------------------------------- /autovizwidget/requirements.txt: -------------------------------------------------------------------------------- 1 | plotly>=3 2 | ipywidgets>5.0.0 3 | hdijupyterutils>=0.6 4 | notebook>=4.2 5 | pandas<3.0.0 6 | -------------------------------------------------------------------------------- /examples/images/addendpoint.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/addendpoint.PNG -------------------------------------------------------------------------------- /examples/images/addsession.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/addsession.PNG -------------------------------------------------------------------------------- /examples/images/addsession_s.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/examples/images/addsession_s.PNG -------------------------------------------------------------------------------- /helm/charts/sparkmagic-0.1.0.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/helm/charts/sparkmagic-0.1.0.tgz -------------------------------------------------------------------------------- /autovizwidget/examples/Capture0.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/autovizwidget/examples/Capture0.PNG -------------------------------------------------------------------------------- /autovizwidget/examples/Capture1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter-incubator/sparkmagic/HEAD/autovizwidget/examples/Capture1.PNG -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # .git-blame-ignore-revs 2 | # Re-formatted entire code base with black 3 | 7ebf0753485c931db4135953dcd0864b4d089ed5 4 | 5 | -------------------------------------------------------------------------------- /hdijupyterutils/requirements.txt: -------------------------------------------------------------------------------- 1 | ipython>=4.1.2 2 | ipywidgets>5.0.0 3 | ipykernel>=4.2.2 4 | jupyter>=1 5 | pandas<3.0.0 6 | numpy>=1.16.5 7 | notebook>=4.2 8 | -------------------------------------------------------------------------------- /hdijupyterutils/README.md: -------------------------------------------------------------------------------- 1 | # Hdi Jupyter Utils 2 | 3 | Project with useful classes/methods for all projects created by the HDInsight team at Microsoft around Jupyter. 4 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/guid.py: -------------------------------------------------------------------------------- 1 | from .utils import generate_uuid 2 | 3 | 4 | class ObjectWithGuid(object): 5 | def __init__(self): 6 | self.guid = generate_uuid() 7 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/utils/constants.py: -------------------------------------------------------------------------------- 1 | HOME_PATH = "~/.autovizwidget" 2 | CONFIG_FILE = "config.json" 3 | 4 | GRAPH_RENDER_EVENT = "notebookGraphRender" 5 | GRAPH_TYPE = "GraphType" 6 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: psf/black@stable 11 | -------------------------------------------------------------------------------- /sparkmagic/requirements.txt: -------------------------------------------------------------------------------- 1 | hdijupyterutils>=0.6 2 | autovizwidget>=0.6 3 | ipython>=4.1.2 4 | pandas<3.0.0 5 | numpy 6 | requests 7 | ipykernel>=4.2.2 8 | ipywidgets>5.0.0 9 | notebook>=4.2 10 | tornado>=4 11 | requests_kerberos>=0.8.0 12 | nest_asyncio>1.5.5 13 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/widget/invalidencodingerror.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | 5 | class InvalidEncodingError(Exception): 6 | """An exception for encodings you can't work with.""" 7 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkkernel/kernel.json: -------------------------------------------------------------------------------- 1 | { 2 | "argv": [ 3 | "python", 4 | "-m", 5 | "sparkmagic.kernels.sparkkernel.sparkkernel", 6 | "-f", 7 | "{connection_file}" 8 | ], 9 | "display_name": "Spark", 10 | "language": "scala" 11 | } 12 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkrkernel/kernel.json: -------------------------------------------------------------------------------- 1 | { 2 | "argv": [ 3 | "python", 4 | "-m", 5 | "sparkmagic.kernels.sparkrkernel.sparkrkernel", 6 | "-f", 7 | "{connection_file}" 8 | ], 9 | "display_name": "SparkR", 10 | "language": "r" 11 | } 12 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/constants.py: -------------------------------------------------------------------------------- 1 | LOGGING_CONFIG_CLASS_NAME = "hdijupyterutils.filehandler.MagicsFileHandler" 2 | 3 | EVENTS_HANDLER_CLASS_NAME = "hdijupyterutils.eventshandler.EventsHandler" 4 | INSTANCE_ID = "InstanceId" 5 | TIMESTAMP = "Timestamp" 6 | EVENT_NAME = "EventName" 7 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/pysparkkernel/kernel.json: -------------------------------------------------------------------------------- 1 | { 2 | "argv": [ 3 | "python", 4 | "-m", 5 | "sparkmagic.kernels.pysparkkernel.pysparkkernel", 6 | "-f", 7 | "{connection_file}" 8 | ], 9 | "display_name": "PySpark", 10 | "language": "python" 11 | } 12 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/pysparkkernel/kernel.js: -------------------------------------------------------------------------------- 1 | define(['base/js/namespace'], function(IPython){ 2 | var onload = function() { 3 | IPython.CodeCell.config_defaults.highlight_modes['magic_text/x-sql'] = {'reg':[/^%%sql/]}; 4 | } 5 | 6 | return { onload: onload } 7 | }) -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkrkernel/kernel.js: -------------------------------------------------------------------------------- 1 | define(["base/js/namespace"], function(IPython) { 2 | var onload = function() { 3 | IPython.CodeCell.config_defaults.highlight_modes["magic_text/x-sql"] = { 4 | reg: [/^%%sql/] 5 | }; 6 | }; 7 | 8 | return { onload: onload }; 9 | }); 10 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | # How to release 2 | 3 | 1. Make sure `CHANGELOG.md` is up-to-date with all changes since last release and available on `master` branch. 4 | 2. Go to Actions -> Release workflow -> Run workflow -> Select `patch|minor|major` depending of the changes you want to release, set `master` branch as desired branch 5 | -------------------------------------------------------------------------------- /helm/values.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | replicas: 1 4 | image: 5 | repository: ghcr.io/ljubon/sparkmagic/jupyter 6 | tag: latest 7 | service: 8 | port: 8888 9 | 10 | sparkmagic: 11 | image: 12 | repository: ghcr.io/ljubon/sparkmagic/sparkmagic-livy 13 | tag: latest 14 | service: 15 | port: 8998 16 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.23.0" 2 | 3 | from sparkmagic.serverextension.handlers import ( 4 | load_jupyter_server_extension, 5 | ) # noqa: #501 6 | 7 | 8 | def _jupyter_server_extension_paths(): 9 | return [{"module": "sparkmagic"}] 10 | 11 | 12 | def _jupyter_nbextension_paths(): 13 | return [] 14 | -------------------------------------------------------------------------------- /helm/templates/jupyter-service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: jupyter-notebook 6 | spec: 7 | selector: 8 | app: jupyter-notebook 9 | ports: 10 | - protocol: TCP 11 | port: {{ .Values.jupyter.service.port }} 12 | targetPort: {{ .Values.jupyter.service.port }} 13 | type: LoadBalancer 14 | -------------------------------------------------------------------------------- /helm/templates/sparkmagic-service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: sparkmagic 6 | spec: 7 | selector: 8 | app: sparkmagic 9 | ports: 10 | - protocol: TCP 11 | port: {{ .Values.sparkmagic.service.port }} 12 | targetPort: {{ .Values.sparkmagic.service.port }} 13 | type: LoadBalancer 14 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | 8 | # Additional configuration for Python using pip 9 | - package-ecosystem: "pip" 10 | directory: "/" 11 | schedule: 12 | interval: "daily" 13 | open-pull-requests-limit: 10 14 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/scattergraph.py: -------------------------------------------------------------------------------- 1 | from plotly.graph_objs import Scatter 2 | 3 | from .graphbase import GraphBase 4 | 5 | 6 | class ScatterGraph(GraphBase): 7 | def _get_data(self, df, encoding): 8 | x_values, y_values = GraphBase._get_x_y_values(df, encoding) 9 | return [Scatter(x=x_values, y=y_values, mode="markers")] 10 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | All IPython and Jupyter security are handled via security@ipython.org. 6 | You can find more information on the Jupyter website. https://jupyter.org/security 7 | 8 | ## Tidelift 9 | 10 | You can also report security concerns for autovizwidget,sparkmagic,autovizwidget via the [Tidelift platform](https://tidelift.com/security). 11 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkkernel/kernel.js: -------------------------------------------------------------------------------- 1 | define(["base/js/namespace"], function(IPython) { 2 | var onload = function() { 3 | IPython.CodeCell.config_defaults.highlight_modes["magic_text/x-sql"] = { 4 | reg: [/^%%sql/] 5 | }; 6 | IPython.CodeCell.config_defaults.highlight_modes["magic_text/x-python"] = { 7 | reg: [/^%%local/] 8 | }; 9 | }; 10 | 11 | return { onload: onload }; 12 | }); 13 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/bargraph.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from plotly.graph_objs import Bar 5 | 6 | from .graphbase import GraphBase 7 | 8 | 9 | class BarGraph(GraphBase): 10 | def _get_data(self, df, encoding): 11 | x_values, y_values = GraphBase._get_x_y_values(df, encoding) 12 | return [Bar(x=x_values, y=y_values)] 13 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/linegraph.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from plotly.graph_objs import Scatter 5 | 6 | from .graphbase import GraphBase 7 | 8 | 9 | class LineGraph(GraphBase): 10 | def _get_data(self, df, encoding): 11 | x_values, y_values = GraphBase._get_x_y_values(df, encoding) 12 | return [Scatter(x=x_values, y=y_values)] 13 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/utils/sparklogger.py: -------------------------------------------------------------------------------- 1 | # Distributed under the terms of the Modified BSD License. 2 | from hdijupyterutils.log import Log 3 | 4 | import sparkmagic.utils.configuration as conf 5 | from sparkmagic.utils.constants import MAGICS_LOGGER_NAME 6 | 7 | 8 | class SparkLog(Log): 9 | def __init__(self, class_name): 10 | super(SparkLog, self).__init__( 11 | MAGICS_LOGGER_NAME, conf.logging_config(), class_name 12 | ) 13 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/areagraph.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from plotly.graph_objs import Scatter 5 | 6 | from .graphbase import GraphBase 7 | 8 | 9 | class AreaGraph(GraphBase): 10 | def _get_data(self, df, encoding): 11 | x_values, y_values = GraphBase._get_x_y_values(df, encoding) 12 | return [Scatter(x=x_values, y=y_values, fill="tonexty")] 13 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | spark: 4 | image: jupyter/sparkmagic-livy 5 | build: 6 | context: . 7 | dockerfile: Dockerfile.spark 8 | hostname: spark 9 | ports: 10 | - "8998:8998" 11 | jupyter: 12 | image: jupyter/sparkmagic 13 | build: 14 | context: . 15 | dockerfile: Dockerfile.jupyter 16 | args: 17 | dev_mode: "false" 18 | links: 19 | - spark 20 | ports: 21 | - "8888:8888" 22 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/eventshandler.py: -------------------------------------------------------------------------------- 1 | from .log import Log 2 | 3 | 4 | class EventsHandler(object): 5 | def __init__(self, logger_name, logging_config): 6 | self.logger = Log(logger_name, logging_config, "EventsHandler") 7 | 8 | def handle_event(self, kwargs_list): 9 | """ 10 | Storing the Event details using the logger. 11 | """ 12 | event_line = ",".join("{}: {}".format(key, arg) for key, arg in kwargs_list) 13 | self.logger.info(event_line) 14 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | 4 | ### Checklist 5 | - [ ] Wrote a description of my changes above 6 | - [ ] Formatted my code with [`black`](https://black.readthedocs.io/en/stable/index.html) 7 | - [ ] Added a bullet point for my changes to the top of the `CHANGELOG.md` file 8 | - [ ] Added or modified unit tests to reflect my changes 9 | - [ ] Manually tested with a notebook 10 | - [ ] If adding a feature, there is an example notebook and/or documentation in the `README.md` file 11 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/events.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import importlib 3 | 4 | from hdijupyterutils.constants import INSTANCE_ID 5 | from hdijupyterutils.utils import get_instance_id 6 | 7 | 8 | class Events(object): 9 | def __init__(self, handler): 10 | self.handler = handler 11 | 12 | @staticmethod 13 | def get_utc_date_time(): 14 | return datetime.utcnow() 15 | 16 | def send_to_handler(self, kwargs_list): 17 | kwargs_list = [(INSTANCE_ID, get_instance_id())] + kwargs_list 18 | 19 | assert len(kwargs_list) <= 12 20 | 21 | self.handler.handle_event(kwargs_list) 22 | -------------------------------------------------------------------------------- /helm/templates/sparkmagic-deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: sparkmagic 6 | spec: 7 | replicas: {{ .Values.sparkmagic.replicas }} 8 | selector: 9 | matchLabels: 10 | app: sparkmagic 11 | template: 12 | metadata: 13 | labels: 14 | app: sparkmagic 15 | spec: 16 | containers: 17 | - name: sparkmagic 18 | image: "{{ .Values.sparkmagic.image.repository }}:{{ .Values.sparkmagic.image.tag }}" 19 | ports: 20 | - name: http 21 | containerPort: {{ .Values.sparkmagic.service.port }} 22 | protocol: TCP -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.23.0 3 | commit = True 4 | tag = True 5 | tag_name = {new_version} 6 | message = "Bump version: {current_version} → {new_version}" 7 | 8 | [bumpversion:file:sparkmagic/sparkmagic/__init__.py] 9 | search = __version__ = "{current_version}" 10 | replace = __version__ = "{new_version}" 11 | 12 | [bumpversion:file:autovizwidget/autovizwidget/__init__.py] 13 | search = __version__ = "{current_version}" 14 | replace = __version__ = "{new_version}" 15 | 16 | [bumpversion:file:hdijupyterutils/hdijupyterutils/__init__.py] 17 | search = __version__ = "{current_version}" 18 | replace = __version__ = "{new_version}" 19 | -------------------------------------------------------------------------------- /helm/templates/jupyter-deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: jupyter-notebook 6 | spec: 7 | replicas: {{ .Values.sparkmagic.replicas }} 8 | selector: 9 | matchLabels: 10 | app: jupyter-notebook 11 | template: 12 | metadata: 13 | labels: 14 | app: jupyter-notebook 15 | spec: 16 | containers: 17 | - name: jupyter-notebook 18 | image: "{{ .Values.jupyter.image.repository }}:{{ .Values.jupyter.image.tag }}" 19 | ports: 20 | - name: http 21 | containerPort: {{ .Values.jupyter.service.port }} 22 | protocol: TCP 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Screenshots** 20 | If applicable, add screenshots to help explain your problem. 21 | 22 | **Versions:** 23 | - SparkMagic 24 | - Livy (if you know it) 25 | - Spark 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/utils.py: -------------------------------------------------------------------------------- 1 | # Distributed under the terms of the Modified BSD License. 2 | 3 | import os 4 | import uuid 5 | 6 | 7 | first_run = True 8 | instance_id = None 9 | 10 | 11 | def expand_path(path): 12 | return os.path.expanduser(path) 13 | 14 | 15 | def join_paths(p1, p2): 16 | return os.path.join(p1, p2) 17 | 18 | 19 | def generate_uuid(): 20 | return uuid.uuid4() 21 | 22 | 23 | def get_instance_id(): 24 | global first_run, instance_id 25 | 26 | if first_run: 27 | first_run = False 28 | instance_id = generate_uuid() 29 | 30 | if instance_id is None: 31 | raise ValueError("Tried to return empty instance ID.") 32 | 33 | return instance_id 34 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/tests/test_ipythondisplay.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from hdijupyterutils.ipythondisplay import IpythonDisplay 3 | from mock import MagicMock 4 | import sys 5 | 6 | 7 | def test_stdout_flush(): 8 | ipython_shell = MagicMock() 9 | ipython_display = IpythonDisplay() 10 | ipython_display._ipython_shell = ipython_shell 11 | sys.stdout = MagicMock() 12 | 13 | ipython_display.write("Testing Stdout Flush è") 14 | assert sys.stdout.flush.call_count == 1 15 | 16 | 17 | def test_stderr_flush(): 18 | ipython_shell = MagicMock() 19 | ipython_display = IpythonDisplay() 20 | ipython_display._ipython_shell = ipython_shell 21 | sys.stderr = MagicMock() 22 | 23 | ipython_display.send_error("Testing Stderr Flush è") 24 | assert sys.stderr.flush.call_count == 1 25 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "development" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Devin Stein "] 6 | readme = "README.md" 7 | 8 | packages = [ 9 | { include = "hdijupyterutils", from = "./hdijupyterutils" }, 10 | { include = "autovizwidget", from = "./autovizwidget" }, 11 | { include = "sparkmagic", from = "./sparkmagic" }, 12 | ] 13 | 14 | [tool.poetry.dependencies] 15 | python = "^3.8" 16 | hdijupyterutils = {path = "./hdijupyterutils", develop = true } 17 | autovizwidget = {path = "./autovizwidget", develop = true } 18 | sparkmagic = {path = "./sparkmagic", develop = true } 19 | numpy = "^1.24.4" 20 | pandas = "^2.0.3" 21 | pytest = "^8.3.3" 22 | mock = "^5.1.0" 23 | 24 | 25 | [build-system] 26 | requires = ["poetry-core"] 27 | build-backend = "poetry.core.masonry.api" 28 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/ipythondisplay.py: -------------------------------------------------------------------------------- 1 | from IPython.display import display, HTML 2 | from IPython import get_ipython 3 | import sys 4 | 5 | 6 | class IpythonDisplay(object): 7 | def __init__(self): 8 | self._ipython_shell = get_ipython() 9 | 10 | def display(self, to_display): 11 | display(to_display) 12 | 13 | def html(self, to_display): 14 | self.display(HTML(to_display)) 15 | 16 | def stderr_flush(self): 17 | sys.stderr.flush() 18 | 19 | def stdout_flush(self): 20 | sys.stdout.flush() 21 | 22 | def write(self, msg): 23 | sys.stdout.write(msg) 24 | self.stdout_flush() 25 | 26 | def writeln(self, msg): 27 | self.write("{}\n".format(msg)) 28 | 29 | def send_error(self, error): 30 | sys.stderr.write("{}\n".format(error)) 31 | self.stderr_flush() 32 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/linearretrypolicy.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | 5 | class LinearRetryPolicy(object): 6 | """Retry policy that always returns the same number of seconds to sleep between calls, 7 | takes all status codes 500 or above to be retriable, and retries a given maximum number of times. 8 | """ 9 | 10 | def __init__(self, seconds_to_sleep, max_retries): 11 | self._seconds_to_sleep = seconds_to_sleep 12 | self.max_retries = max_retries 13 | 14 | def should_retry(self, status_code, error, retry_count): 15 | if None in (status_code, retry_count): 16 | return False 17 | return (status_code >= 500 and retry_count <= self.max_retries) or error 18 | 19 | def seconds_to_sleep(self, retry_count): 20 | return self._seconds_to_sleep 21 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/utils/events.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import importlib 3 | from hdijupyterutils.constants import EVENT_NAME, TIMESTAMP 4 | from hdijupyterutils.events import Events 5 | 6 | from .constants import GRAPH_TYPE, GRAPH_RENDER_EVENT 7 | from . import configuration as conf 8 | 9 | 10 | class AutoVizEvents(Events): 11 | def __init__(self): 12 | handler = conf.events_handler() 13 | self.emit = handler is not None 14 | super(AutoVizEvents, self).__init__(handler) 15 | 16 | def emit_graph_render_event(self, graph_type): 17 | event_name = GRAPH_RENDER_EVENT 18 | time_stamp = self.get_utc_date_time() 19 | 20 | kwargs_list = [ 21 | (EVENT_NAME, event_name), 22 | (TIMESTAMP, time_stamp), 23 | (GRAPH_TYPE, graph_type), 24 | ] 25 | 26 | if self.emit: 27 | self.send_to_handler(kwargs_list) 28 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/utils/configuration.py: -------------------------------------------------------------------------------- 1 | # Distributed under the terms of the Modified BSD License. 2 | from hdijupyterutils.constants import ( 3 | EVENTS_HANDLER_CLASS_NAME, 4 | LOGGING_CONFIG_CLASS_NAME, 5 | ) 6 | from hdijupyterutils.utils import join_paths 7 | from hdijupyterutils.configuration import override as _override 8 | from hdijupyterutils.configuration import override_all as _override_all 9 | from hdijupyterutils.configuration import with_override 10 | 11 | from .constants import HOME_PATH, CONFIG_FILE 12 | 13 | 14 | d = {} 15 | path = join_paths(HOME_PATH, CONFIG_FILE) 16 | 17 | 18 | def override(config, value): 19 | _override(d, path, config, value) 20 | 21 | 22 | def override_all(obj): 23 | _override_all(d, obj) 24 | 25 | 26 | _with_override = with_override(d, path) 27 | 28 | # Configs 29 | 30 | 31 | @_with_override 32 | def events_handler(): 33 | return None 34 | 35 | 36 | @_with_override 37 | def max_slices_pie_graph(): 38 | return 100 39 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/endpoint.py: -------------------------------------------------------------------------------- 1 | from .exceptions import BadUserDataException 2 | 3 | 4 | class Endpoint(object): 5 | def __init__(self, url, auth, implicitly_added=False): 6 | if not url: 7 | raise BadUserDataException("URL must not be empty") 8 | 9 | self.url = url.rstrip("/") 10 | self.auth = auth 11 | # implicitly_added is set to True only if the endpoint wasn't configured manually by the user through 12 | # a widget, but was instead implicitly defined as an endpoint to a wrapper kernel in the configuration 13 | # JSON file. 14 | self.implicitly_added = implicitly_added 15 | 16 | def __eq__(self, other): 17 | if type(other) is not Endpoint: 18 | return False 19 | return self.url == other.url and self.auth == other.auth 20 | 21 | def __hash__(self): 22 | return hash((self.url, self.auth)) 23 | 24 | def __ne__(self, other): 25 | return not self == other 26 | 27 | def __str__(self): 28 | return "Endpoint({})".format(self.url) 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # Notebook Checkpoints 60 | .ipynb_checkpoints 61 | 62 | .idea/* 63 | MANIFEST 64 | .vscode/* 65 | !.vscode/tasks.json 66 | 67 | venv/* 68 | */.idea/* 69 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/filehandler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .utils import join_paths, get_instance_id 4 | from .filesystemreaderwriter import FileSystemReaderWriter 5 | 6 | 7 | class MagicsFileHandler(logging.FileHandler): 8 | """The default logging handler used by the magics; this behavior can be overridden by modifying the config file""" 9 | 10 | def __init__(self, **kwargs): 11 | # Simply invokes the behavior of the superclass, but sets the filename keyword argument if it's not already set. 12 | if "filename" in kwargs: 13 | super(MagicsFileHandler, self).__init__(**kwargs) 14 | else: 15 | magics_home_path = kwargs.pop("home_path") 16 | logs_folder_name = "logs" 17 | log_file_name = "log_{}.log".format(get_instance_id()) 18 | directory = FileSystemReaderWriter( 19 | join_paths(magics_home_path, logs_folder_name) 20 | ) 21 | directory.ensure_path_exists() 22 | super(MagicsFileHandler, self).__init__( 23 | filename=join_paths(directory.path, log_file_name), **kwargs 24 | ) 25 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/auth/kerberos.py: -------------------------------------------------------------------------------- 1 | """Class for implementing a Kerberos authenticator for SparkMagic""" 2 | 3 | from requests_kerberos import HTTPKerberosAuth 4 | import sparkmagic.utils.configuration as conf 5 | from .customauth import Authenticator 6 | 7 | 8 | class Kerberos(HTTPKerberosAuth, Authenticator): 9 | """Kerberos authenticator for SparkMagic""" 10 | 11 | def __init__(self, parsed_attributes=None): 12 | """Initializes the Authenticator with the attributes in the attributes 13 | parsed from a %spark magic command if applicable, or with default values 14 | otherwise. 15 | 16 | Args: 17 | self, 18 | parsed_attributes (IPython.core.magics.namespace): The namespace object that 19 | is created from parsing %spark magic command. 20 | """ 21 | HTTPKerberosAuth.__init__(self, **conf.kerberos_auth_configuration()) 22 | Authenticator.__init__(self, parsed_attributes) 23 | 24 | def __call__(self, request): 25 | return HTTPKerberosAuth.__call__(self, request) 26 | 27 | def __hash__(self): 28 | return hash((self.url, self.__class__.__name__)) 29 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_endpoint.py: -------------------------------------------------------------------------------- 1 | from sparkmagic.livyclientlib.exceptions import BadUserDataException 2 | from sparkmagic.livyclientlib.endpoint import Endpoint 3 | from sparkmagic.auth.basic import Basic 4 | from sparkmagic.auth.kerberos import Kerberos 5 | 6 | 7 | def test_equality(): 8 | basic_auth1 = Basic() 9 | basic_auth2 = Basic() 10 | kerberos_auth1 = Kerberos() 11 | kerberos_auth2 = Kerberos() 12 | assert Endpoint("http://url.com", basic_auth1) == Endpoint( 13 | "http://url.com", basic_auth2 14 | ) 15 | assert Endpoint("http://url.com", kerberos_auth1) == Endpoint( 16 | "http://url.com", kerberos_auth2 17 | ) 18 | 19 | 20 | def test_inequality(): 21 | basic_auth1 = Basic() 22 | basic_auth2 = Basic() 23 | basic_auth1.username = "user" 24 | basic_auth2.username = "different_user" 25 | assert Endpoint("http://url.com", basic_auth1) != Endpoint( 26 | "http://url.com", basic_auth2 27 | ) 28 | 29 | 30 | def test_invalid_url(): 31 | basic_auth = Basic() 32 | try: 33 | endpoint = Endpoint(None, basic_auth) 34 | assert False 35 | except BadUserDataException: 36 | assert True 37 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/filesystemreaderwriter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | import os 4 | 5 | 6 | class FileSystemReaderWriter(object): 7 | def __init__(self, path): 8 | from .utils import expand_path 9 | 10 | assert path is not None 11 | self.path = expand_path(path) 12 | 13 | def ensure_path_exists(self): 14 | self._ensure_path_exists(self.path) 15 | 16 | def ensure_file_exists(self): 17 | self._ensure_path_exists(os.path.dirname(self.path)) 18 | if not os.path.exists(self.path): 19 | open(self.path, "w").close() 20 | 21 | def read_lines(self): 22 | if os.path.isfile(self.path): 23 | with open(self.path, "r") as f: 24 | return f.readlines() 25 | else: 26 | return "" 27 | 28 | def overwrite_with_line(self, line): 29 | with open(self.path, "w+") as f: 30 | f.writelines(line) 31 | 32 | def _ensure_path_exists(self, path): 33 | try: 34 | os.makedirs(path) 35 | except OSError: 36 | if not os.path.isdir(path): 37 | raise 38 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/tests/test_events.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from mock import MagicMock 3 | 4 | from hdijupyterutils.events import Events 5 | from hdijupyterutils.utils import generate_uuid 6 | from hdijupyterutils.constants import INSTANCE_ID, TIMESTAMP 7 | from hdijupyterutils.utils import get_instance_id 8 | 9 | 10 | def setup_function(): 11 | global events, guid1, guid2, guid3, time_stamp 12 | 13 | events = Events(MagicMock()) 14 | events.get_utc_date_time = MagicMock() 15 | time_stamp = events.get_utc_date_time() 16 | guid1 = generate_uuid() 17 | guid2 = generate_uuid() 18 | guid3 = generate_uuid() 19 | 20 | 21 | def teardown_function(): 22 | pass 23 | 24 | 25 | def test_send_to_handler(): 26 | kwargs_list = [(TIMESTAMP, time_stamp)] 27 | expected_kwargs_list = [(INSTANCE_ID, get_instance_id())] + kwargs_list 28 | 29 | events.send_to_handler(kwargs_list) 30 | 31 | events.handler.handle_event.assert_called_once_with(expected_kwargs_list) 32 | 33 | 34 | def test_send_to_handler_asserts_less_than_12(): 35 | with pytest.raises(AssertionError): 36 | kwargs_list = [(TIMESTAMP, time_stamp)] * 13 37 | events.send_to_handler(kwargs_list) 38 | assert False 39 | -------------------------------------------------------------------------------- /helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v2 3 | name: sparkmagic 4 | description: A Helm chart for Kubernetes 5 | 6 | # A chart can be either an 'application' or a 'library' chart. 7 | # 8 | # Application charts are a collection of templates that can be packaged into versioned archives 9 | # to be deployed. 10 | # 11 | # Library charts provide useful utilities or functions for the chart developer. They're included as 12 | # a dependency of application charts to inject those utilities and functions into the rendering 13 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 14 | type: application 15 | 16 | # This is the chart version. This version number should be incremented each time you make changes 17 | # to the chart and its templates, including the app version. 18 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 19 | version: 0.1.0 20 | 21 | # This is the version number of the application being deployed. This version number should be 22 | # incremented each time you make changes to the application. Versions are not expected to 23 | # follow Semantic Versioning. They should reflect the version the application is using. 24 | # It is recommended to use it with quotes. 25 | appVersion: "0.21.0" 26 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkrkernel/sparkrkernel.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | from sparkmagic.utils.constants import LANG_R 4 | from sparkmagic.kernels.wrapperkernel.sparkkernelbase import SparkKernelBase 5 | 6 | 7 | class SparkRKernel(SparkKernelBase): 8 | def __init__(self, **kwargs): 9 | implementation = "SparkR" 10 | implementation_version = "1.0" 11 | language = LANG_R 12 | language_version = "0.1" 13 | language_info = { 14 | "name": "sparkR", 15 | "mimetype": "text/x-rsrc", 16 | "codemirror_mode": "text/x-rsrc", 17 | "file_extension": ".r", 18 | "pygments_lexer": "r", 19 | } 20 | 21 | session_language = LANG_R 22 | 23 | super(SparkRKernel, self).__init__( 24 | implementation, 25 | implementation_version, 26 | language, 27 | language_version, 28 | language_info, 29 | session_language, 30 | **kwargs 31 | ) 32 | 33 | 34 | if __name__ == "__main__": 35 | from ipykernel.kernelapp import IPKernelApp 36 | 37 | IPKernelApp.launch_instance(kernel_class=SparkRKernel) 38 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/sparkkernel/sparkkernel.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | from sparkmagic.utils.constants import LANG_SCALA 4 | from sparkmagic.kernels.wrapperkernel.sparkkernelbase import SparkKernelBase 5 | 6 | 7 | class SparkKernel(SparkKernelBase): 8 | def __init__(self, **kwargs): 9 | implementation = "Spark" 10 | implementation_version = "1.0" 11 | language = LANG_SCALA 12 | language_version = "0.1" 13 | language_info = { 14 | "name": "scala", 15 | "mimetype": "text/x-scala", 16 | "codemirror_mode": "text/x-scala", 17 | "file_extension": ".sc", 18 | "pygments_lexer": "scala", 19 | } 20 | 21 | session_language = LANG_SCALA 22 | 23 | super(SparkKernel, self).__init__( 24 | implementation, 25 | implementation_version, 26 | language, 27 | language_version, 28 | language_info, 29 | session_language, 30 | **kwargs 31 | ) 32 | 33 | 34 | if __name__ == "__main__": 35 | from ipykernel.kernelapp import IPKernelApp 36 | 37 | IPKernelApp.launch_instance(kernel_class=SparkKernel) 38 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/pysparkkernel/pysparkkernel.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | from sparkmagic.utils.constants import LANG_PYTHON 4 | from sparkmagic.kernels.wrapperkernel.sparkkernelbase import SparkKernelBase 5 | 6 | 7 | class PySparkKernel(SparkKernelBase): 8 | def __init__(self, **kwargs): 9 | implementation = "PySpark" 10 | implementation_version = "1.0" 11 | language = LANG_PYTHON 12 | language_version = "0.1" 13 | language_info = { 14 | "name": "pyspark", 15 | "mimetype": "text/x-python", 16 | "codemirror_mode": {"name": "python", "version": 3}, 17 | "file_extension": ".py", 18 | "pygments_lexer": "python3", 19 | } 20 | 21 | session_language = LANG_PYTHON 22 | 23 | super(PySparkKernel, self).__init__( 24 | implementation, 25 | implementation_version, 26 | language, 27 | language_version, 28 | language_info, 29 | session_language, 30 | **kwargs 31 | ) 32 | 33 | 34 | if __name__ == "__main__": 35 | from ipykernel.kernelapp import IPKernelApp 36 | 37 | IPKernelApp.launch_instance(kernel_class=PySparkKernel) 38 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run Unit Tests 2 | 3 | on: 4 | push: {} 5 | pull_request: {} 6 | schedule: 7 | # Run daily 8 | - cron: "6 4 * * *" 9 | workflow_call: 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install system dependencies 25 | run: | 26 | sudo apt-get install -y libkrb5-dev 27 | - name: Install package dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install pytest mock 31 | pip install -r hdijupyterutils/requirements.txt -e hdijupyterutils 32 | pip install -r autovizwidget/requirements.txt -e autovizwidget 33 | pip install -r sparkmagic/requirements.txt -e sparkmagic 34 | - name: Run hdijupyterutils tests 35 | run: | 36 | pytest hdijupyterutils 37 | - name: Run autovizwidget tests 38 | run: | 39 | pytest autovizwidget 40 | - name: Run sparkmagic tests 41 | run: | 42 | mkdir ~/.sparkmagic 43 | pytest sparkmagic 44 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/controllerwidget/abstractmenuwidget.py: -------------------------------------------------------------------------------- 1 | from ipywidgets import Box 2 | 3 | from hdijupyterutils.ipythondisplay import IpythonDisplay 4 | from hdijupyterutils.ipywidgetfactory import IpyWidgetFactory 5 | 6 | 7 | class AbstractMenuWidget(Box): 8 | def __init__( 9 | self, 10 | spark_controller, 11 | ipywidget_factory=None, 12 | ipython_display=None, 13 | nested_widget_mode=False, 14 | testing=False, 15 | **kwargs 16 | ): 17 | kwargs["orientation"] = "vertical" 18 | 19 | if not testing: 20 | super(AbstractMenuWidget, self).__init__((), **kwargs) 21 | 22 | self.spark_controller = spark_controller 23 | 24 | if ipywidget_factory is None: 25 | ipywidget_factory = IpyWidgetFactory() 26 | self.ipywidget_factory = ipywidget_factory 27 | 28 | if ipython_display is None: 29 | ipython_display = IpythonDisplay() 30 | self.ipython_display = ipython_display 31 | 32 | self.children = [] 33 | 34 | if not nested_widget_mode: 35 | self._repr_html_() 36 | 37 | def _repr_html_(self): 38 | for child in self.children: 39 | self.ipython_display.display(child) 40 | return "" 41 | 42 | def hide_all(self): 43 | for child in self.children: 44 | child.visible = False 45 | 46 | def run(self): 47 | raise NotImplementedError("Concrete menu widget must define run") 48 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/tests/test_plotlygraphrenderer.py: -------------------------------------------------------------------------------- 1 | from mock import MagicMock 2 | 3 | from ..plotlygraphs.graphrenderer import GraphRenderer 4 | from ..widget.encoding import Encoding 5 | 6 | 7 | def test_support_all_graph_types(): 8 | renderer = GraphRenderer() 9 | 10 | for chart_type in Encoding.supported_chart_types: 11 | graph = renderer._get_graph(chart_type) 12 | assert graph is not None 13 | getattr(graph, "render") 14 | getattr(graph, "display_x") 15 | getattr(graph, "display_y") 16 | getattr(graph, "display_logarithmic_x_axis") 17 | getattr(graph, "display_logarithmic_y_axis") 18 | 19 | 20 | def test_display_controls(): 21 | renderer = GraphRenderer() 22 | 23 | GraphRenderer.display_x = MagicMock(return_value=True) 24 | GraphRenderer.display_y = MagicMock(return_value=True) 25 | assert renderer.display_controls(Encoding.chart_type_line) 26 | 27 | GraphRenderer.display_x = MagicMock(return_value=True) 28 | GraphRenderer.display_y = MagicMock(return_value=False) 29 | assert renderer.display_controls(Encoding.chart_type_line) 30 | 31 | GraphRenderer.display_x = MagicMock(return_value=False) 32 | GraphRenderer.display_y = MagicMock(return_value=True) 33 | assert renderer.display_controls(Encoding.chart_type_line) 34 | 35 | GraphRenderer.display_x = MagicMock(return_value=False) 36 | GraphRenderer.display_y = MagicMock(return_value=False) 37 | assert not renderer.display_controls(Encoding.chart_type_line) 38 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/tests/test_filesystemreaderwriter.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | from hdijupyterutils.filesystemreaderwriter import FileSystemReaderWriter 4 | 5 | 6 | def test_read(): 7 | path = "test" 8 | if os.path.isfile(path): 9 | os.remove(path) 10 | 11 | expected_lines = ["a\n", "b"] 12 | rw = FileSystemReaderWriter(path) 13 | with open("test", "w") as f: 14 | f.writelines(expected_lines) 15 | 16 | read_lines = rw.read_lines() 17 | assert expected_lines == read_lines 18 | 19 | os.remove(path) 20 | 21 | 22 | def test_write_non_existent_file(): 23 | path = "test" 24 | if os.path.isfile(path): 25 | os.remove(path) 26 | 27 | expected_line = "hi" 28 | 29 | rw = FileSystemReaderWriter(path) 30 | rw.overwrite_with_line(expected_line) 31 | 32 | with open("test", "r") as f: 33 | lines = f.readlines() 34 | assert len(lines) == 1 35 | assert lines[0] == expected_line 36 | 37 | os.remove(path) 38 | 39 | 40 | def test_overwrite_existent_file(): 41 | path = "test" 42 | if os.path.isfile(path): 43 | os.remove(path) 44 | 45 | with open("test", "w") as f: 46 | f.writelines(["ab"]) 47 | 48 | expected_line = "hi" 49 | 50 | rw = FileSystemReaderWriter(path) 51 | rw.overwrite_with_line(expected_line) 52 | 53 | with open("test", "r") as f: 54 | lines = f.readlines() 55 | assert len(lines) == 1 56 | assert lines[0] == expected_line 57 | 58 | os.remove(path) 59 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/configurableretrypolicy.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from .linearretrypolicy import LinearRetryPolicy 5 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException 6 | 7 | 8 | class ConfigurableRetryPolicy(LinearRetryPolicy): 9 | """Retry policy that returns a configurable number of seconds to sleep 10 | between calls, takes all status codes 500 or above to be retriable, and 11 | retries a given maximum number of times. 12 | 13 | If the retry count exceeds the number of items in the list, last 14 | item in the list is always returned. 15 | """ 16 | 17 | def __init__(self, retry_seconds_to_sleep_list, max_retries): 18 | super(ConfigurableRetryPolicy, self).__init__(-1, max_retries) 19 | 20 | # If user configured to an empty list, let's make this behave as 21 | # a Linear Retry Policy by assigning a list of 1 element. 22 | if len(retry_seconds_to_sleep_list) == 0: 23 | retry_seconds_to_sleep_list = [5] 24 | elif not all(n > 0 for n in retry_seconds_to_sleep_list): 25 | raise BadUserConfigurationException( 26 | "All items in the list in your config need to be positive for configurable retry policy" 27 | ) 28 | 29 | self.retry_seconds_to_sleep_list = retry_seconds_to_sleep_list 30 | self._max_index = len(self.retry_seconds_to_sleep_list) - 1 31 | 32 | def seconds_to_sleep(self, retry_count): 33 | index = max(retry_count - 1, 0) 34 | if index > self._max_index: 35 | index = self._max_index 36 | 37 | return self.retry_seconds_to_sleep_list[index] 38 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/tests/test_sparkevents.py: -------------------------------------------------------------------------------- 1 | from hdijupyterutils.constants import INSTANCE_ID, EVENT_NAME, TIMESTAMP 2 | from hdijupyterutils.utils import get_instance_id 3 | from mock import MagicMock 4 | 5 | from autovizwidget.utils.events import AutoVizEvents 6 | from autovizwidget.utils.constants import GRAPH_RENDER_EVENT, GRAPH_TYPE 7 | import autovizwidget.utils.configuration as conf 8 | 9 | 10 | def setup_function(): 11 | global events, time_stamp 12 | 13 | events = AutoVizEvents() 14 | events.handler = MagicMock() 15 | events.get_utc_date_time = MagicMock() 16 | time_stamp = events.get_utc_date_time() 17 | 18 | 19 | def teardown_function(): 20 | conf.override_all({}) 21 | 22 | 23 | def test_not_emit_graph_render_event_when_not_registered(): 24 | event_name = GRAPH_RENDER_EVENT 25 | graph_type = "Bar" 26 | 27 | kwargs_list = [ 28 | (INSTANCE_ID, get_instance_id()), 29 | (EVENT_NAME, event_name), 30 | (TIMESTAMP, time_stamp), 31 | (GRAPH_TYPE, graph_type), 32 | ] 33 | 34 | events.emit_graph_render_event(graph_type) 35 | 36 | events.get_utc_date_time.assert_called_with() 37 | assert not events.handler.handle_event.called 38 | 39 | 40 | def test_emit_graph_render_event_when_registered(): 41 | conf.override(conf.events_handler.__name__, events.handler) 42 | event_name = GRAPH_RENDER_EVENT 43 | graph_type = "Bar" 44 | 45 | kwargs_list = [ 46 | (INSTANCE_ID, get_instance_id()), 47 | (EVENT_NAME, event_name), 48 | (TIMESTAMP, time_stamp), 49 | (GRAPH_TYPE, graph_type), 50 | ] 51 | 52 | events.emit_graph_render_event(graph_type) 53 | 54 | events.get_utc_date_time.assert_called_with() 55 | assert not events.handler.handle_event.called 56 | -------------------------------------------------------------------------------- /Dockerfile.jupyter: -------------------------------------------------------------------------------- 1 | FROM jupyter/base-notebook 2 | 3 | ARG dev_mode=false 4 | 5 | USER root 6 | 7 | # This is needed because requests-kerberos fails to install on debian due to missing linux headers 8 | RUN conda install requests-kerberos -y 9 | 10 | USER $NB_USER 11 | 12 | RUN pip install --upgrade pip 13 | RUN pip install --upgrade --ignore-installed setuptools 14 | 15 | COPY examples /home/jovyan/work 16 | 17 | # Install sparkmagic - if DEV_MODE is set, use the one in the host directory. 18 | # Otherwise, just install from pip. 19 | COPY hdijupyterutils hdijupyterutils/ 20 | COPY autovizwidget autovizwidget/ 21 | COPY sparkmagic sparkmagic/ 22 | 23 | USER root 24 | RUN chown -R $NB_USER . 25 | 26 | USER $NB_USER 27 | RUN if [ "$dev_mode" = "true" ]; then \ 28 | cd hdijupyterutils && pip install -e . && cd ../ && \ 29 | cd autovizwidget && pip install -e . && cd ../ && \ 30 | cd sparkmagic && pip install -e . && cd ../ ; \ 31 | else pip install sparkmagic ; fi 32 | 33 | 34 | RUN mkdir /home/$NB_USER/.sparkmagic 35 | COPY sparkmagic/example_config.json /home/$NB_USER/.sparkmagic/config.json 36 | RUN sed -i 's/localhost/spark/g' /home/$NB_USER/.sparkmagic/config.json 37 | RUN pip install ipywidgets 38 | RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkkernel 39 | RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/pysparkkernel 40 | RUN jupyter-kernelspec install --user $(pip show sparkmagic | grep Location | cut -d" " -f2)/sparkmagic/kernels/sparkrkernel 41 | RUN jupyter server extension enable --py sparkmagic 42 | 43 | USER root 44 | RUN chown $NB_USER /home/$NB_USER/.sparkmagic/config.json 45 | 46 | CMD ["start-notebook.sh", "--NotebookApp.iopub_data_rate_limit=1000000000"] 47 | 48 | USER $NB_USER 49 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/kernels/wrapperkernel/usercodeparser.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from sparkmagic.kernels.kernelmagics import KernelMagics 5 | 6 | 7 | class UserCodeParser(object): 8 | # A list of the names of all magics that are cell magics, but which have no cell body input. 9 | # For example, the %%info magic has no cell body input, i.e. it is incorrect to call 10 | # %%info 11 | # some_input 12 | _magics_with_no_cell_body = [ 13 | i.__name__ 14 | for i in [ 15 | KernelMagics.info, 16 | KernelMagics.logs, 17 | KernelMagics.cleanup, 18 | KernelMagics.delete, 19 | KernelMagics.help, 20 | KernelMagics.spark, 21 | KernelMagics.send_to_spark, 22 | ] 23 | ] 24 | 25 | def get_code_to_run(self, code): 26 | try: 27 | all_but_first_line = code.split(None, 1)[1] 28 | except IndexError: 29 | all_but_first_line = "" 30 | 31 | if code.startswith("%%local") or code.startswith("%local"): 32 | return all_but_first_line 33 | elif any(code.startswith("%%" + s) for s in self._magics_with_no_cell_body): 34 | return "{}\n ".format(code) 35 | elif any(code.startswith("%" + s) for s in self._magics_with_no_cell_body): 36 | return "%{}\n ".format(code) 37 | elif code.startswith("%%") or code.startswith("%"): 38 | # If they use other line magics: 39 | # %autosave 40 | # my spark code 41 | # my spark code would be run locally and there might be an error. 42 | return code 43 | elif not code: 44 | return code 45 | else: 46 | return "%%spark\n{}".format(code) 47 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/tests/test_configuration.py: -------------------------------------------------------------------------------- 1 | from mock import MagicMock 2 | 3 | from hdijupyterutils.configuration import override, override_all, with_override 4 | 5 | 6 | # This is a sample implementation of how a module would use the config methods. 7 | # We'll use these three functions to test it works. 8 | d = {} 9 | path = "~/.testing/config.json" 10 | original_value = 0 11 | 12 | 13 | def module_override(config, value): 14 | global d, path 15 | override(d, path, config, value) 16 | 17 | 18 | def module_override_all(obj): 19 | global d 20 | override_all(d, obj) 21 | 22 | 23 | # Configs 24 | @with_override(d, path) 25 | def my_config(): 26 | global original_value 27 | return original_value 28 | 29 | 30 | @with_override(d, path) 31 | def my_config_2(): 32 | global original_value 33 | return original_value 34 | 35 | 36 | # Test helper functions 37 | def setup_function(): 38 | module_override_all({}) 39 | 40 | 41 | def teardown_function(): 42 | module_override_all({}) 43 | 44 | 45 | # Unit tests begin 46 | def test_original_value_without_overrides(): 47 | assert original_value == my_config() 48 | 49 | 50 | def test_original_value_with_overrides(): 51 | new_value = 2 52 | module_override(my_config.__name__, new_value) 53 | assert new_value == my_config() 54 | 55 | 56 | def test_original_values_when_others_override(): 57 | new_value = 2 58 | module_override(my_config.__name__, new_value) 59 | assert new_value == my_config() 60 | assert original_value == my_config_2() 61 | 62 | 63 | def test_resetting_values_when_others_override(): 64 | new_value = 2 65 | module_override(my_config.__name__, new_value) 66 | assert new_value == my_config() 67 | assert original_value == my_config_2() 68 | 69 | # Reset 70 | module_override_all({}) 71 | assert original_value == my_config() 72 | assert original_value == my_config_2() 73 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/datagraph.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | import pandas as pd 5 | from hdijupyterutils.ipythondisplay import IpythonDisplay 6 | 7 | 8 | class DataGraph(object): 9 | """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use 10 | pandas df HTML representation.""" 11 | 12 | def __init__(self, display=None): 13 | if display is None: 14 | self.display = IpythonDisplay() 15 | else: 16 | self.display = display 17 | 18 | def render(self, df, encoding, output): 19 | with output: 20 | max_rows = pd.get_option("display.max_rows") 21 | max_cols = pd.get_option("display.max_columns") 22 | show_dimensions = pd.get_option("display.show_dimensions") 23 | 24 | # This will hide the index column for pandas df. 25 | self.display.html( 26 | """ 27 | 35 | """ 36 | ) 37 | self.display.html( 38 | df.to_html( 39 | max_rows=max_rows, 40 | max_cols=max_cols, 41 | show_dimensions=show_dimensions, 42 | notebook=True, 43 | classes="hideme", 44 | ) 45 | ) 46 | 47 | @staticmethod 48 | def display_logarithmic_x_axis(): 49 | return False 50 | 51 | @staticmethod 52 | def display_logarithmic_y_axis(): 53 | return False 54 | 55 | @staticmethod 56 | def display_x(): 57 | return False 58 | 59 | @staticmethod 60 | def display_y(): 61 | return False 62 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/log.py: -------------------------------------------------------------------------------- 1 | # Distributed under the terms of the Modified BSD License. 2 | 3 | from __future__ import print_function 4 | import logging 5 | import logging.config 6 | 7 | from .constants import LOGGING_CONFIG_CLASS_NAME 8 | 9 | 10 | class Log(object): 11 | """Logger for magics. A small wrapper class around the configured logger described in the configuration file""" 12 | 13 | def __init__(self, logger_name, logging_config, caller_name): 14 | logging.config.dictConfig(logging_config) 15 | 16 | assert caller_name is not None 17 | self._caller_name = caller_name 18 | self.logger_name = logger_name 19 | self._getLogger() 20 | 21 | def debug(self, message): 22 | self.logger.debug(self._transform_log_message(message)) 23 | 24 | def error(self, message): 25 | self.logger.error(self._transform_log_message(message)) 26 | 27 | def info(self, message): 28 | self.logger.info(self._transform_log_message(message)) 29 | 30 | def _getLogger(self): 31 | self.logger = logging.getLogger(self.logger_name) 32 | 33 | def _transform_log_message(self, message): 34 | return "{}\t{}".format(self._caller_name, message) 35 | 36 | 37 | def logging_config(): 38 | return { 39 | "version": 1, 40 | "formatters": { 41 | "magicsFormatter": { 42 | "format": "%(asctime)s\t%(levelname)s\t%(message)s", 43 | "datefmt": "", 44 | } 45 | }, 46 | "handlers": { 47 | "magicsHandler": { 48 | "class": LOGGING_CONFIG_CLASS_NAME, 49 | "formatter": "magicsFormatter", 50 | "home_path": "~/.hdijupyterutils", 51 | } 52 | }, 53 | "loggers": { 54 | "magicsLogger": { 55 | "handlers": ["magicsHandler"], 56 | "level": "DEBUG", 57 | "propagate": 0, 58 | } 59 | }, 60 | } 61 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Release 3 | 4 | on: 5 | workflow_dispatch: 6 | inputs: 7 | type: 8 | type: choice 9 | description: Select type of bump version 10 | # https://github.com/jaumann/github-bumpversion-action/tree/v0.0.7/?tab=readme-ov-file#bumping 11 | options: 12 | - major 13 | - minor 14 | - patch 15 | default: 'patch' 16 | jobs: 17 | release: 18 | # Do not run this job for pull requests where both branches are from the same repo. 19 | # Jobs that depend on this one will be skipped too. 20 | # This prevents duplicate CI runs for our own pull requests, whilst preserving the ability to 21 | # run the CI for each branch push to a fork, and for each pull request originating from a fork. 22 | if: github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.id != github.event.pull_request.base.repo.id 23 | permissions: 24 | contents: write 25 | packages: write 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Check out code 29 | uses: actions/checkout@v4 30 | with: 31 | persist-credentials: true 32 | fetch-depth: 0 33 | ref: master 34 | 35 | - name: Bump version and create tag 36 | uses: jasonamyers/github-bumpversion-action@v1.0.5 37 | id: bump 38 | env: 39 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 40 | DEFAULT_BUMP: ${{ inputs.type }} 41 | 42 | - name: Push tag 43 | run: git push origin master --tags 44 | env: 45 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 46 | 47 | - name: Create GitHub Release with changelog 48 | id: create_release 49 | uses: ncipollo/release-action@v1.14.0 50 | with: 51 | name: ${{ steps.bump.outputs.new_ver }} 52 | bodyFile: CHANGELOG.md 53 | tag: ${{ steps.bump.outputs.new_ver }} 54 | env: 55 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 56 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/ipywidgetfactory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from ipywidgets import ( 5 | VBox, 6 | Output, 7 | Button, 8 | HTML, 9 | HBox, 10 | Dropdown, 11 | Checkbox, 12 | ToggleButtons, 13 | Text, 14 | Textarea, 15 | Tab, 16 | Password, 17 | ) 18 | 19 | 20 | class IpyWidgetFactory(object): 21 | """This class exists solely for unit testing purposes.""" 22 | 23 | @staticmethod 24 | def get_vbox(**kwargs): 25 | return VBox(**kwargs) 26 | 27 | @staticmethod 28 | def get_output(**kwargs): 29 | return Output(**kwargs) 30 | 31 | @staticmethod 32 | def get_button(**kwargs): 33 | return Button(**kwargs) 34 | 35 | @staticmethod 36 | def get_html(value, **kwargs): 37 | return HTML(value, **kwargs) 38 | 39 | @staticmethod 40 | def get_hbox(**kwargs): 41 | return HBox(**kwargs) 42 | 43 | @staticmethod 44 | def get_dropdown(**kwargs): 45 | return Dropdown(**kwargs) 46 | 47 | @staticmethod 48 | def get_checkbox(**kwargs): 49 | return Checkbox(**kwargs) 50 | 51 | @staticmethod 52 | def get_toggle_buttons(**kwargs): 53 | return ToggleButtons(**kwargs) 54 | 55 | @staticmethod 56 | def get_text(**kwargs): 57 | return Text(**kwargs) 58 | 59 | @staticmethod 60 | def get_password(**kwargs): 61 | return Password(**kwargs) 62 | 63 | @staticmethod 64 | def get_text_area(**kwargs): 65 | return Textarea(**kwargs) 66 | 67 | @staticmethod 68 | def get_submit_button(**kwargs): 69 | return SubmitButton(**kwargs) 70 | 71 | @staticmethod 72 | def get_tab(**kwargs): 73 | return Tab(**kwargs) 74 | 75 | 76 | class SubmitButton(Button): 77 | def __init__(self, **kwargs): 78 | super(SubmitButton, self).__init__(**kwargs) 79 | self.on_click(self.submit_clicked) 80 | 81 | def submit_clicked(self, button): 82 | self.parent_widget.run() 83 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/sendstringtosparkcommand.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Jupyter Development Team. 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from sparkmagic.livyclientlib.sendtosparkcommand import SendToSparkCommand 5 | from sparkmagic.livyclientlib.command import Command 6 | from sparkmagic.livyclientlib.exceptions import BadUserDataException 7 | 8 | 9 | class SendStringToSparkCommand(SendToSparkCommand): 10 | def _scala_command( 11 | self, input_variable_name, input_variable_value, output_variable_name 12 | ): 13 | self._assert_input_is_string_type(input_variable_name, input_variable_value) 14 | scala_code = 'var {} = """{}"""'.format( 15 | output_variable_name, input_variable_value 16 | ) 17 | return Command(scala_code) 18 | 19 | def _pyspark_command( 20 | self, input_variable_name, input_variable_value, output_variable_name 21 | ): 22 | self._assert_input_is_string_type(input_variable_name, input_variable_value) 23 | pyspark_code = "{} = {}".format( 24 | output_variable_name, repr(input_variable_value) 25 | ) 26 | return Command(pyspark_code) 27 | 28 | def _r_command( 29 | self, input_variable_name, input_variable_value, output_variable_name 30 | ): 31 | self._assert_input_is_string_type(input_variable_name, input_variable_value) 32 | escaped_input_variable_value = input_variable_value.replace( 33 | "\\", "\\\\" 34 | ).replace('"', '\\"') 35 | r_code = """assign("{}","{}")""".format( 36 | output_variable_name, escaped_input_variable_value 37 | ) 38 | return Command(r_code) 39 | 40 | def _assert_input_is_string_type(self, input_variable_name, input_variable_value): 41 | if not isinstance(input_variable_value, str): 42 | wrong_type = input_variable_value.__class__.__name__ 43 | raise BadUserDataException( 44 | "{} is not a str or bytes! Got {} instead".format( 45 | input_variable_name, wrong_type 46 | ) 47 | ) 48 | -------------------------------------------------------------------------------- /hdijupyterutils/setup.py: -------------------------------------------------------------------------------- 1 | DESCRIPTION = "HdiJupyterUtils: Utils for Jupyter projects from HDInsight team" 2 | NAME = "hdijupyterutils" 3 | PACKAGES = ["hdijupyterutils"] 4 | AUTHOR = "Jupyter Development Team" 5 | AUTHOR_EMAIL = "jupyter@googlegroups.org" 6 | URL = "https://github.com/jupyter-incubator/sparkmagic" 7 | DOWNLOAD_URL = "https://github.com/jupyter-incubator/sparkmagic" 8 | LICENSE = "BSD 3-clause" 9 | 10 | import io 11 | import os 12 | import re 13 | 14 | from distutils.core import setup 15 | 16 | 17 | def read(path, encoding="utf-8"): 18 | path = os.path.join(os.path.dirname(__file__), path) 19 | with io.open(path, encoding=encoding) as fp: 20 | return fp.read() 21 | 22 | 23 | # read requirements.txt and convert to install_requires format 24 | def requirements(path): 25 | lines = read(path).splitlines() 26 | # remove comments and empty lines 27 | lines = [line for line in lines if not line.startswith("#") and line] 28 | return lines 29 | 30 | 31 | def version(path): 32 | """Obtain the package version from a python file e.g. pkg/__init__.py. 33 | 34 | See . 35 | """ 36 | version_file = read(path) 37 | version_match = re.search( 38 | r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M 39 | ) 40 | if version_match: 41 | return version_match.group(1) 42 | raise RuntimeError("Unable to find version string.") 43 | 44 | 45 | VERSION = version("hdijupyterutils/__init__.py") 46 | 47 | setup( 48 | name=NAME, 49 | version=VERSION, 50 | description=DESCRIPTION, 51 | author=AUTHOR, 52 | author_email=AUTHOR_EMAIL, 53 | url=URL, 54 | download_url=DOWNLOAD_URL, 55 | license=LICENSE, 56 | packages=PACKAGES, 57 | classifiers=[ 58 | "Development Status :: 4 - Beta", 59 | "Environment :: Console", 60 | "Intended Audience :: Science/Research", 61 | "License :: OSI Approved :: BSD License", 62 | "Natural Language :: English", 63 | "Programming Language :: Python :: 3.7", 64 | ], 65 | install_requires=requirements("requirements.txt"), 66 | ) 67 | -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/tests/test_logger.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import logging 3 | 4 | from hdijupyterutils.log import Log, logging_config 5 | 6 | 7 | def get_logging_config(): 8 | return logging_config() 9 | 10 | 11 | def test_log_init(): 12 | logging_config = get_logging_config() 13 | logger = Log("name", logging_config, "something") 14 | assert isinstance(logger.logger, logging.Logger) 15 | 16 | 17 | # A MockLogger class with debug and error methods that store the most recent level + message in an 18 | # instance variable. 19 | class MockLogger(object): 20 | def __init__(self): 21 | self.level = self.message = None 22 | 23 | def debug(self, message): 24 | self.level, self.message = "DEBUG", message 25 | 26 | def error(self, message): 27 | self.level, self.message = "ERROR", message 28 | 29 | def info(self, message): 30 | self.level, self.message = "INFO", message 31 | 32 | 33 | class MockLog(Log): 34 | def __init__(self, name): 35 | logging_config = get_logging_config() 36 | super(MockLog, self).__init__(name, logging_config, name) 37 | 38 | def _getLogger(self): 39 | self.logger = MockLogger() 40 | 41 | 42 | def test_log_returnvalue(): 43 | logger = MockLog("test2") 44 | assert isinstance(logger.logger, MockLogger) 45 | mock = logger.logger 46 | logger.debug("word1") 47 | assert mock.level == "DEBUG" 48 | assert mock.message == "test2\tword1" 49 | logger.error("word2") 50 | assert mock.level == "ERROR" 51 | assert mock.message == "test2\tword2" 52 | logger.info("word3") 53 | assert mock.level == "INFO" 54 | assert mock.message == "test2\tword3" 55 | 56 | 57 | def test_log_unicode(): 58 | logger = MockLog("test2") 59 | assert isinstance(logger.logger, MockLogger) 60 | mock = logger.logger 61 | logger.debug("word1è") 62 | assert mock.level == "DEBUG" 63 | assert mock.message == "test2\tword1è" 64 | logger.error("word2è") 65 | assert mock.level == "ERROR" 66 | assert mock.message == "test2\tword2è" 67 | logger.info("word3è") 68 | assert mock.level == "INFO" 69 | assert mock.message == "test2\tword3è" 70 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/auth/customauth.py: -------------------------------------------------------------------------------- 1 | """Base class for implementing an authentication provider for SparkMagic""" 2 | 3 | from hdijupyterutils.ipywidgetfactory import IpyWidgetFactory 4 | from sparkmagic.utils.constants import WIDGET_WIDTH 5 | 6 | 7 | class Authenticator(object): 8 | """Base Authenticator for all Sparkmagic authentication providers.""" 9 | 10 | def __init__(self, parsed_attributes=None): 11 | """Initializes the Authenticator with the attributes in the attributes 12 | parsed from a %spark magic command if applicable, or with default values 13 | otherwise. 14 | 15 | Args: 16 | self, 17 | parsed_attributes (IPython.core.magics.namespace): The namespace object that 18 | is created from parsing %spark magic command. 19 | """ 20 | if parsed_attributes is not None: 21 | self.url = parsed_attributes.url 22 | else: 23 | self.url = "http://example.com/livy" 24 | self.widgets = self.get_widgets(WIDGET_WIDTH) 25 | 26 | def get_widgets(self, widget_width): 27 | """Creates and returns an address widget 28 | 29 | Args: 30 | widget_width (str): The width of all widgets to be created. 31 | 32 | Returns: 33 | Sequence[hdijupyterutils.ipywidgetfactory.IpyWidgetFactory]: list of widgets 34 | """ 35 | ipywidget_factory = IpyWidgetFactory() 36 | 37 | self.address_widget = ipywidget_factory.get_text( 38 | description="Address:", value="http://example.com/livy", width=widget_width 39 | ) 40 | widgets = [self.address_widget] 41 | return widgets 42 | 43 | def update_with_widget_values(self): 44 | """Updates url to be value in address widget.""" 45 | self.url = self.address_widget.value 46 | 47 | def __call__(self, request): 48 | """subclasses should override""" 49 | return None 50 | 51 | def __eq__(self, other): 52 | if not isinstance(other, Authenticator): 53 | return False 54 | return self.url == other.url 55 | 56 | def __hash__(self): 57 | return hash((self.url, self.__class__.__name__)) 58 | -------------------------------------------------------------------------------- /Dockerfile.spark: -------------------------------------------------------------------------------- 1 | # Pin to Spark 2.x for Scala 2.11 (https://issues.apache.org/jira/browse/LIVY-423) 2 | FROM datamechanics/spark:2.4.7-hadoop-3.1.0-java-8-scala-2.11-python-3.7-latest 3 | 4 | # Use root user for development. This shouldn't be used in production. 5 | USER 0 6 | 7 | # ---------- 8 | # Setup Python and Livy/Spark Deps 9 | # 10 | # Install GCP keys to avoid error: 11 | # "GPG error: https://packages.cloud.google.com/apt cloud-sdk InRelease: The following signatures couldn't be verified because the public key is not available: NO_PUBKEY B53DC80D13EDEF05" 12 | RUN apt-get install apt-transport-https ca-certificates gnupg 13 | RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list 14 | RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - 15 | # Livy Requires: 16 | # - mvn (from maven package or maven3 tarball) 17 | # - openjdk-8-jdk (or Oracle JDK 8) 18 | # - Python 2.7+ 19 | # - R 3.x 20 | RUN apt-get update && apt-get install -yq --no-install-recommends --force-yes \ 21 | curl \ 22 | git \ 23 | python3 python3-setuptools python3-venv python3-pip \ 24 | r-base \ 25 | r-base-core && \ 26 | rm -rf /var/lib/apt/lists/* 27 | 28 | ENV PYTHONHASHSEED 0 29 | ENV PYTHONIOENCODING UTF-8 30 | ENV PIP_DISABLE_PIP_VERSION_CHECK 1 31 | 32 | # ---------- 33 | # Build Livy 34 | # ---------- 35 | ARG LIVY_VERSION=0.7.1-incubating 36 | ENV LIVY_HOME /usr/livy 37 | ENV LIVY_CONF_DIR "${LIVY_HOME}/conf" 38 | ENV LIVY_PORT 8998 39 | 40 | RUN curl --progress-bar -L --retry 3 \ 41 | "http://archive.apache.org/dist/incubator/livy/${LIVY_VERSION}/apache-livy-${LIVY_VERSION}-bin.zip" \ 42 | -o "./apache-livy-${LIVY_VERSION}-bin.zip" \ 43 | && unzip -qq "./apache-livy-${LIVY_VERSION}-bin.zip" -d /usr \ 44 | && mv "/usr/apache-livy-${LIVY_VERSION}-bin" "${LIVY_HOME}" \ 45 | && rm -rf "./apache-livy-${LIVY_VERSION}-bin.zip" \ 46 | && mkdir "${LIVY_HOME}/logs" \ 47 | && chown -R root:root "${LIVY_HOME}" 48 | 49 | EXPOSE 8998 50 | 51 | HEALTHCHECK CMD curl -f "http://host.docker.internal:${LIVY_PORT}/" || exit 1 52 | 53 | CMD ${LIVY_HOME}/bin/livy-server 54 | -------------------------------------------------------------------------------- /autovizwidget/setup.py: -------------------------------------------------------------------------------- 1 | DESCRIPTION = "AutoVizWidget: An Auto-Visualization library for pandas dataframes" 2 | NAME = "autovizwidget" 3 | PACKAGES = [ 4 | "autovizwidget", 5 | "autovizwidget/plotlygraphs", 6 | "autovizwidget/widget", 7 | "autovizwidget/utils", 8 | ] 9 | AUTHOR = "Jupyter Development Team" 10 | AUTHOR_EMAIL = "jupyter@googlegroups.org" 11 | URL = "https://github.com/jupyter-incubator/sparkmagic" 12 | DOWNLOAD_URL = "https://github.com/jupyter-incubator/sparkmagic" 13 | LICENSE = "BSD 3-clause" 14 | 15 | import io 16 | import os 17 | import re 18 | 19 | from distutils.core import setup 20 | 21 | 22 | def read(path, encoding="utf-8"): 23 | path = os.path.join(os.path.dirname(__file__), path) 24 | with io.open(path, encoding=encoding) as fp: 25 | return fp.read() 26 | 27 | 28 | # read requirements.txt and convert to install_requires format 29 | def requirements(path): 30 | lines = read(path).splitlines() 31 | # remove comments and empty lines 32 | lines = [line for line in lines if not line.startswith("#") and line] 33 | return lines 34 | 35 | 36 | def version(path): 37 | """Obtain the package version from a python file e.g. pkg/__init__.py. 38 | 39 | See . 40 | """ 41 | version_file = read(path) 42 | version_match = re.search( 43 | r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M 44 | ) 45 | if version_match: 46 | return version_match.group(1) 47 | raise RuntimeError("Unable to find version string.") 48 | 49 | 50 | VERSION = version("autovizwidget/__init__.py") 51 | 52 | 53 | setup( 54 | name=NAME, 55 | version=VERSION, 56 | description=DESCRIPTION, 57 | author=AUTHOR, 58 | author_email=AUTHOR_EMAIL, 59 | url=URL, 60 | download_url=DOWNLOAD_URL, 61 | license=LICENSE, 62 | packages=PACKAGES, 63 | classifiers=[ 64 | "Development Status :: 4 - Beta", 65 | "Environment :: Console", 66 | "Intended Audience :: Science/Research", 67 | "License :: OSI Approved :: BSD License", 68 | "Natural Language :: English", 69 | "Programming Language :: Python :: 3.7", 70 | ], 71 | install_requires=requirements("requirements.txt"), 72 | ) 73 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/graphrenderer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from plotly.offline import init_notebook_mode 5 | import plotly as p 6 | 7 | from .datagraph import DataGraph 8 | from .piegraph import PieGraph 9 | from .linegraph import LineGraph 10 | from .areagraph import AreaGraph 11 | from .bargraph import BarGraph 12 | from .scattergraph import ScatterGraph 13 | from ..widget.encoding import Encoding 14 | 15 | 16 | class GraphRenderer(object): 17 | @staticmethod 18 | def render(df, encoding, output): 19 | with output: 20 | init_notebook_mode() 21 | 22 | GraphRenderer._get_graph(encoding.chart_type).render(df, encoding, output) 23 | 24 | @staticmethod 25 | def display_x(chart_type): 26 | return GraphRenderer._get_graph(chart_type).display_x() 27 | 28 | @staticmethod 29 | def display_y(chart_type): 30 | return GraphRenderer._get_graph(chart_type).display_y() 31 | 32 | @staticmethod 33 | def display_logarithmic_x_axis(chart_type): 34 | return GraphRenderer._get_graph(chart_type).display_logarithmic_x_axis() 35 | 36 | @staticmethod 37 | def display_logarithmic_y_axis(chart_type): 38 | return GraphRenderer._get_graph(chart_type).display_logarithmic_y_axis() 39 | 40 | @staticmethod 41 | def display_controls(chart_type): 42 | display_x = GraphRenderer.display_x(chart_type) 43 | display_y = GraphRenderer.display_y(chart_type) 44 | return display_x or display_y 45 | 46 | @staticmethod 47 | def _get_graph(chart_type): 48 | if chart_type == Encoding.chart_type_scatter: 49 | graph = ScatterGraph() 50 | elif chart_type == Encoding.chart_type_line: 51 | graph = LineGraph() 52 | elif chart_type == Encoding.chart_type_area: 53 | graph = AreaGraph() 54 | elif chart_type == Encoding.chart_type_bar: 55 | graph = BarGraph() 56 | elif chart_type == Encoding.chart_type_pie: 57 | graph = PieGraph() 58 | elif chart_type == Encoding.chart_type_table: 59 | graph = DataGraph() 60 | else: 61 | raise ValueError("Cannot display chart of type {}".format(chart_type)) 62 | 63 | return graph 64 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_kernels.py: -------------------------------------------------------------------------------- 1 | from sparkmagic.utils.constants import LANG_PYTHON, LANG_SCALA, LANG_R 2 | from sparkmagic.kernels.sparkkernel.sparkkernel import SparkKernel 3 | from sparkmagic.kernels.pysparkkernel.pysparkkernel import PySparkKernel 4 | from sparkmagic.kernels.sparkrkernel.sparkrkernel import SparkRKernel 5 | 6 | 7 | class TestPyparkKernel(PySparkKernel): 8 | def __init__(self): 9 | kwargs = {"testing": True} 10 | super(TestPyparkKernel, self).__init__(**kwargs) 11 | 12 | 13 | class TestSparkKernel(SparkKernel): 14 | def __init__(self): 15 | kwargs = {"testing": True} 16 | super(TestSparkKernel, self).__init__(**kwargs) 17 | 18 | 19 | class TestSparkRKernel(SparkRKernel): 20 | def __init__(self): 21 | kwargs = {"testing": True} 22 | super(TestSparkRKernel, self).__init__(**kwargs) 23 | 24 | 25 | def test_pyspark_kernel_configs(): 26 | kernel = TestPyparkKernel() 27 | assert kernel.session_language == LANG_PYTHON 28 | 29 | assert kernel.implementation == "PySpark" 30 | assert kernel.language == LANG_PYTHON 31 | assert kernel.language_version == "0.1" 32 | assert kernel.language_info == { 33 | "name": "pyspark", 34 | "mimetype": "text/x-python", 35 | "codemirror_mode": {"name": "python", "version": 3}, 36 | "file_extension": ".py", 37 | "pygments_lexer": "python3", 38 | } 39 | 40 | 41 | def test_spark_kernel_configs(): 42 | kernel = TestSparkKernel() 43 | 44 | assert kernel.session_language == LANG_SCALA 45 | 46 | assert kernel.implementation == "Spark" 47 | assert kernel.language == LANG_SCALA 48 | assert kernel.language_version == "0.1" 49 | assert kernel.language_info == { 50 | "name": "scala", 51 | "mimetype": "text/x-scala", 52 | "pygments_lexer": "scala", 53 | "file_extension": ".sc", 54 | "codemirror_mode": "text/x-scala", 55 | } 56 | 57 | 58 | def test_sparkr_kernel_configs(): 59 | kernel = TestSparkRKernel() 60 | 61 | assert kernel.session_language == LANG_R 62 | 63 | assert kernel.implementation == "SparkR" 64 | assert kernel.language == LANG_R 65 | assert kernel.language_version == "0.1" 66 | assert kernel.language_info == { 67 | "name": "sparkR", 68 | "mimetype": "text/x-rsrc", 69 | "pygments_lexer": "r", 70 | "file_extension": ".r", 71 | "codemirror_mode": "text/x-rsrc", 72 | } 73 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_heartbeatthread.py: -------------------------------------------------------------------------------- 1 | from mock import MagicMock 2 | from time import sleep 3 | 4 | from sparkmagic.livyclientlib.livysession import _HeartbeatThread 5 | 6 | 7 | def test_create_thread(): 8 | session = MagicMock() 9 | refresh_seconds = 1 10 | retry_seconds = 2 11 | heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds) 12 | 13 | assert heartbeat_thread.livy_session == session 14 | assert heartbeat_thread.refresh_seconds == refresh_seconds 15 | assert heartbeat_thread.retry_seconds == retry_seconds 16 | 17 | 18 | def test_run_once(): 19 | session = MagicMock() 20 | refresh_seconds = 0.1 21 | retry_seconds = 2 22 | heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds, 1) 23 | 24 | heartbeat_thread.start() 25 | sleep(0.15) 26 | heartbeat_thread.stop() 27 | 28 | session.refresh_status_and_info.assert_called_once_with() 29 | assert heartbeat_thread.livy_session is None 30 | 31 | 32 | def test_run_stops(): 33 | session = MagicMock() 34 | refresh_seconds = 0.01 35 | retry_seconds = 2 36 | heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds) 37 | 38 | heartbeat_thread.start() 39 | sleep(0.1) 40 | heartbeat_thread.stop() 41 | 42 | assert session.refresh_status_and_info.called 43 | assert heartbeat_thread.livy_session is None 44 | 45 | 46 | def test_run_retries(): 47 | msg = "oh noes!" 48 | session = MagicMock() 49 | session.refresh_status_and_info = MagicMock(side_effect=ValueError(msg)) 50 | refresh_seconds = 0.1 51 | retry_seconds = 0.1 52 | heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds, 1) 53 | 54 | heartbeat_thread.start() 55 | sleep(0.15) 56 | heartbeat_thread.stop() 57 | 58 | session.refresh_status_and_info.assert_called_once_with() 59 | session.logger.error.assert_called_once_with(msg) 60 | assert heartbeat_thread.livy_session is None 61 | 62 | 63 | def test_run_retries_stops(): 64 | msg = "oh noes!" 65 | session = MagicMock() 66 | session.refresh_status_and_info = MagicMock(side_effect=ValueError(msg)) 67 | refresh_seconds = 0.01 68 | retry_seconds = 0.01 69 | heartbeat_thread = _HeartbeatThread(session, refresh_seconds, retry_seconds) 70 | 71 | heartbeat_thread.start() 72 | sleep(0.1) 73 | heartbeat_thread.stop() 74 | 75 | assert session.refresh_status_and_info.called 76 | assert session.logger.error.called 77 | assert heartbeat_thread.livy_session is None 78 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/widget/encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | 5 | class Encoding(object): 6 | chart_type_scatter = "Scatter" 7 | chart_type_line = "Line" 8 | chart_type_area = "Area" 9 | chart_type_bar = "Bar" 10 | chart_type_pie = "Pie" 11 | chart_type_table = "Table" 12 | supported_chart_types = [ 13 | chart_type_line, 14 | chart_type_area, 15 | chart_type_bar, 16 | chart_type_pie, 17 | chart_type_table, 18 | ] 19 | 20 | y_agg_avg = "Avg" 21 | y_agg_min = "Min" 22 | y_agg_max = "Max" 23 | y_agg_sum = "Sum" 24 | y_agg_none = "None" 25 | y_agg_count = "Count" 26 | supported_y_agg = [ 27 | y_agg_avg, 28 | y_agg_min, 29 | y_agg_max, 30 | y_agg_sum, 31 | y_agg_none, 32 | y_agg_count, 33 | ] 34 | 35 | def __init__( 36 | self, 37 | chart_type=None, 38 | x=None, 39 | y=None, 40 | y_aggregation=None, 41 | logarithmic_x_axis=False, 42 | logarithmic_y_axis=False, 43 | ): 44 | self._chart_type = chart_type 45 | self._x = x 46 | self._y = y 47 | self._y_aggregation = y_aggregation 48 | self._logarithmic_x_axis = logarithmic_x_axis 49 | self._logarithmic_y_axis = logarithmic_y_axis 50 | 51 | @property 52 | def chart_type(self): 53 | return self._chart_type 54 | 55 | @chart_type.setter 56 | def chart_type(self, value): 57 | self._chart_type = value 58 | 59 | @property 60 | def x(self): 61 | return self._x 62 | 63 | @x.setter 64 | def x(self, value): 65 | self._x = value 66 | 67 | @property 68 | def y(self): 69 | return self._y 70 | 71 | @y.setter 72 | def y(self, value): 73 | self._y = value 74 | 75 | @property 76 | def y_aggregation(self): 77 | return self._y_aggregation 78 | 79 | @y_aggregation.setter 80 | def y_aggregation(self, value): 81 | self._y_aggregation = value 82 | 83 | @property 84 | def logarithmic_x_axis(self): 85 | return self._logarithmic_x_axis 86 | 87 | @logarithmic_x_axis.setter 88 | def logarithmic_x_axis(self, value): 89 | self._logarithmic_x_axis = value 90 | 91 | @property 92 | def logarithmic_y_axis(self): 93 | return self._logarithmic_y_axis 94 | 95 | @logarithmic_y_axis.setter 96 | def logarithmic_y_axis(self, value): 97 | self._logarithmic_y_axis = value 98 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // vscode tasks to easily run things. 3 | // Ctrl + Shift + B to bring tasks up and select one 4 | // See https://go.microsoft.com/fwlink/?LinkId=733558 5 | // for the documentation about the tasks.json format 6 | "version": "0.1.0", 7 | "command": "cmd", 8 | "isShellCommand": true, 9 | "showOutput": "silent", 10 | "args": ["/C"], 11 | 12 | "tasks": [ 13 | { 14 | "taskName": "go to github", 15 | "suppressTaskName": true, 16 | "isBuildCommand": true, 17 | "args": ["start https://github.com/jupyter-incubator/sparkmagic"] 18 | }, 19 | { 20 | "taskName": "docker build", 21 | "suppressTaskName": true, 22 | "isBuildCommand": true, 23 | "args": ["docker-compose build"] 24 | }, 25 | { 26 | "taskName": "docker-compose up", 27 | "suppressTaskName": true, 28 | "isBuildCommand": false, 29 | "isTestCommand": true, 30 | "args": ["docker-compose up -d && start http://localhost:8888"] 31 | }, 32 | { 33 | "taskName": "docker-compose down", 34 | "suppressTaskName": true, 35 | "isBuildCommand": false, 36 | "isTestCommand": true, 37 | "args": ["docker-compose down"] 38 | }, 39 | { 40 | "taskName": "test all", 41 | "suppressTaskName": true, 42 | "isBuildCommand": false, 43 | "isTestCommand": true, 44 | "args": ["nosetests autovizwidget hdijupyterutils sparkmagic"] 45 | }, 46 | { 47 | "taskName": "test autovizwidget", 48 | "suppressTaskName": true, 49 | "isBuildCommand": false, 50 | "isTestCommand": true, 51 | "args": ["nosetests autovizwidget"] 52 | }, 53 | { 54 | "taskName": "test hdijupyterutils", 55 | "suppressTaskName": true, 56 | "isBuildCommand": false, 57 | "isTestCommand": true, 58 | "args": ["nosetests hdijupyterutils"] 59 | }, 60 | { 61 | "taskName": "test sparkmagic", 62 | "suppressTaskName": true, 63 | "isBuildCommand": false, 64 | "isTestCommand": true, 65 | "args": ["nosetests sparkmagic"] 66 | }, 67 | { 68 | "taskName": "test file", 69 | "suppressTaskName": true, 70 | "isBuildCommand": false, 71 | "isTestCommand": true, 72 | "args": ["nosetests --nocapture ${file}"] 73 | } 74 | ] 75 | } -------------------------------------------------------------------------------- /hdijupyterutils/hdijupyterutils/configuration.py: -------------------------------------------------------------------------------- 1 | """Utility to read configs from file.""" 2 | 3 | # Distributed under the terms of the Modified BSD License. 4 | import json 5 | import sys 6 | 7 | from .utils import join_paths 8 | from .filesystemreaderwriter import FileSystemReaderWriter 9 | 10 | 11 | def with_override(overrides, path, fsrw_class=None): 12 | """A decorator which first initializes the overrided configurations, 13 | then checks the global overrided defaults for the given configuration, 14 | calling the function to get the default result otherwise.""" 15 | 16 | def ret(f): 17 | def wrapped_f(*args): 18 | # Can access overrides and path here 19 | _initialize(overrides, path, fsrw_class) 20 | name = f.__name__ 21 | if name in overrides: 22 | return overrides[name] 23 | else: 24 | return f(*args) 25 | 26 | # Hack! We do this so that we can query the .__name__ of the function 27 | # later to get the name of the configuration dynamically, e.g. for unit tests 28 | wrapped_f.__name__ = f.__name__ 29 | return wrapped_f 30 | 31 | return ret 32 | 33 | 34 | def override(overrides, path, config, value, fsrw_class=None): 35 | """Given a string representing a configuration and a value for that configuration, 36 | override the configuration. Initialize the overrided configuration beforehand.""" 37 | _initialize(overrides, path, fsrw_class) 38 | overrides[config] = value 39 | 40 | 41 | def override_all(overrides, new_overrides): 42 | """Given a dictionary representing the overrided defaults for this 43 | configuration, initialize the global configuration.""" 44 | overrides.clear() 45 | overrides.update(new_overrides) 46 | 47 | 48 | def _initialize(overrides, path, fsrw_class): 49 | """Checks if the configuration is initialized. If it isn't, initializes the 50 | overrides object by reading from the configuration 51 | file, overwriting the current set of overrides if there is one.""" 52 | if not overrides: 53 | new_overrides = _load(path, fsrw_class) 54 | override_all(overrides, new_overrides) 55 | 56 | 57 | def _load(path, fsrw_class=None): 58 | """Returns a dictionary of configuration by reading from the configuration 59 | file.""" 60 | if fsrw_class is None: 61 | fsrw_class = FileSystemReaderWriter 62 | 63 | config_file = fsrw_class(path) 64 | config_file.ensure_file_exists() 65 | config_text = config_file.read_lines() 66 | line = "".join(config_text).strip() 67 | 68 | if line == "": 69 | overrides = {} 70 | else: 71 | overrides = json.loads(line) 72 | return overrides 73 | -------------------------------------------------------------------------------- /sparkmagic/example_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel_python_credentials" : { 3 | "username": "", 4 | "password": "", 5 | "url": "http://localhost:8998", 6 | "auth": "None" 7 | }, 8 | 9 | "kernel_scala_credentials" : { 10 | "username": "", 11 | "password": "", 12 | "url": "http://localhost:8998", 13 | "auth": "None" 14 | }, 15 | "kernel_r_credentials": { 16 | "username": "", 17 | "password": "", 18 | "url": "http://localhost:8998" 19 | }, 20 | 21 | "logging_config": { 22 | "version": 1, 23 | "formatters": { 24 | "magicsFormatter": { 25 | "format": "%(asctime)s\t%(levelname)s\t%(message)s", 26 | "datefmt": "" 27 | } 28 | }, 29 | "handlers": { 30 | "magicsHandler": { 31 | "class": "hdijupyterutils.filehandler.MagicsFileHandler", 32 | "formatter": "magicsFormatter", 33 | "home_path": "~/.sparkmagic" 34 | } 35 | }, 36 | "loggers": { 37 | "magicsLogger": { 38 | "handlers": ["magicsHandler"], 39 | "level": "DEBUG", 40 | "propagate": 0 41 | } 42 | } 43 | }, 44 | "authenticators": { 45 | "Kerberos": "sparkmagic.auth.kerberos.Kerberos", 46 | "None": "sparkmagic.auth.customauth.Authenticator", 47 | "Basic_Access": "sparkmagic.auth.basic.Basic" 48 | }, 49 | 50 | "wait_for_idle_timeout_seconds": 15, 51 | "livy_session_startup_timeout_seconds": 60, 52 | 53 | "http_session_config": { 54 | "adapters": 55 | [ 56 | { 57 | "prefix": "http://", 58 | "adapter": "requests.adapters.HTTPAdapter" 59 | } 60 | ] 61 | }, 62 | 63 | "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.", 64 | 65 | "ignore_ssl_errors": false, 66 | 67 | "session_configs": { 68 | "driverMemory": "1000M", 69 | "executorCores": 2 70 | }, 71 | "session_configs_defaults": { 72 | "conf": { 73 | "spark.sql.catalog.spark_catalog.type": "hive" 74 | } 75 | }, 76 | "use_auto_viz": true, 77 | "coerce_dataframe": true, 78 | "max_results_sql": 2500, 79 | "pyspark_dataframe_encoding": "utf-8", 80 | 81 | "heartbeat_refresh_seconds": 30, 82 | "livy_server_heartbeat_timeout_seconds": 0, 83 | "heartbeat_retry_seconds": 10, 84 | 85 | "server_extension_default_kernel_name": "pysparkkernel", 86 | "custom_headers": {}, 87 | 88 | "retry_policy": "configurable", 89 | "retry_seconds_to_sleep_list": [0.2, 0.5, 1, 3, 5], 90 | "configurable_retry_policy_max_retries": 8 91 | } 92 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Packages 2 | 3 | on: 4 | release: 5 | types: [released] 6 | 7 | jobs: 8 | tests: 9 | name: "Tests" 10 | uses: "./.github/workflows/tests.yml" 11 | 12 | build: 13 | needs: [tests] 14 | if: ${{ success() }} 15 | env: 16 | PYPI_TOKEN_AUTOVIZWIDGET: ${{ secrets.PYPI_TOKEN_AUTOVIZWIDGET }} 17 | PYPI_TOKEN_HDIJUPYTERUTILS: ${{ secrets.PYPI_TOKEN_HDIJUPYTERUTILS }} 18 | PYPI_TOKEN_SPARKMAGIC: ${{ secrets.PYPI_TOKEN_SPARKMAGIC }} 19 | 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: 3.8 28 | 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | 34 | - name: Setup PyPI 35 | run: | 36 | cat > ~/.pypirc <<- EOF 37 | [distutils] 38 | index-servers = 39 | pypi 40 | pypitest 41 | 42 | [autovizwidget] 43 | username=__token__ 44 | password=$PYPI_TOKEN_AUTOVIZWIDGET 45 | 46 | [hdijupyterutils] 47 | username=__token__ 48 | password=$PYPI_TOKEN_HDIJUPYTERUTILS 49 | 50 | [sparkmagic] 51 | username=__token__ 52 | password=$PYPI_TOKEN_SPARKMAGIC 53 | 54 | EOF 55 | 56 | - name: Build hdijupyterutils dist 57 | run: | 58 | cd hdijupyterutils 59 | python -m build 60 | cd .. 61 | 62 | - name: Publish hdijupyterutils to PyPI 63 | uses: pypa/gh-action-pypi-publish@release/v1 64 | with: 65 | user: __token__ 66 | password: ${{ secrets.PYPI_TOKEN_HDIJUPYTERUTILS }} 67 | packages_dir: hdijupyterutils/dist/ 68 | verbose: true 69 | 70 | - name: Build autovizwidget dist 71 | run: | 72 | cd autovizwidget 73 | python -m build 74 | cd .. 75 | 76 | - name: Publish autovizwidget to PyPI 77 | uses: pypa/gh-action-pypi-publish@release/v1 78 | with: 79 | user: __token__ 80 | password: ${{ secrets.PYPI_TOKEN_AUTOVIZWIDGET }} 81 | packages_dir: autovizwidget/dist/ 82 | verbose: true 83 | 84 | - name: Build sparkmagic dist 85 | run: | 86 | cd sparkmagic 87 | python -m build 88 | cd .. 89 | 90 | - name: Publish sparkmagic to PyPI 91 | uses: pypa/gh-action-pypi-publish@release/v1 92 | with: 93 | user: __token__ 94 | password: ${{ secrets.PYPI_TOKEN_SPARKMAGIC }} 95 | packages_dir: sparkmagic/dist/ 96 | verbose: true 97 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/sendtosparkcommand.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Jupyter Development Team. 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from sparkmagic.livyclientlib.command import Command 5 | from sparkmagic.livyclientlib.exceptions import BadUserDataException 6 | import sparkmagic.utils.constants as constants 7 | 8 | from abc import abstractmethod 9 | 10 | 11 | class SendToSparkCommand(Command): 12 | def __init__( 13 | self, 14 | input_variable_name, 15 | input_variable_value, 16 | output_variable_name, 17 | spark_events=None, 18 | ): 19 | super(SendToSparkCommand, self).__init__("", spark_events) 20 | self.input_variable_name = input_variable_name 21 | self.input_variable_value = input_variable_value 22 | self.output_variable_name = output_variable_name 23 | 24 | def execute(self, session): 25 | try: 26 | command = self.to_command( 27 | session.kind, 28 | self.input_variable_name, 29 | self.input_variable_value, 30 | self.output_variable_name, 31 | ) 32 | return command.execute(session) 33 | except Exception as e: 34 | raise e 35 | 36 | def to_command( 37 | self, kind, input_variable_name, input_variable_value, output_variable_name 38 | ): 39 | if kind == constants.SESSION_KIND_PYSPARK: 40 | return self._pyspark_command( 41 | input_variable_name, input_variable_value, output_variable_name 42 | ) 43 | elif kind == constants.SESSION_KIND_SPARK: 44 | return self._scala_command( 45 | input_variable_name, input_variable_value, output_variable_name 46 | ) 47 | elif kind == constants.SESSION_KIND_SPARKR: 48 | return self._r_command( 49 | input_variable_name, input_variable_value, output_variable_name 50 | ) 51 | else: 52 | raise BadUserDataException("Kind '{}' is not supported.".format(kind)) 53 | 54 | @abstractmethod 55 | def _scala_command( 56 | self, input_variable_name, input_variable_value, output_variable_name 57 | ): 58 | raise NotImplementedError # override and provide proper implementation in supertype! 59 | 60 | @abstractmethod 61 | def _pyspark_command( 62 | self, input_variable_name, input_variable_value, output_variable_name 63 | ): 64 | raise NotImplementedError # override and provide proper implementation in supertype! 65 | 66 | @abstractmethod 67 | def _r_command( 68 | self, input_variable_name, input_variable_value, output_variable_name 69 | ): 70 | raise NotImplementedError # override and provide proper implementation in supertype! 71 | -------------------------------------------------------------------------------- /.github/workflows/docker_build.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker Image 2 | 3 | on: 4 | push: {} 5 | pull_request: {} 6 | schedule: 7 | # Run daily 8 | - cron: "6 4 * * *" 9 | 10 | jobs: 11 | docker: 12 | runs-on: ubuntu-latest 13 | env: 14 | REPOSITORY_URL: ghcr.io 15 | SPARKMAGIC_IMAGE_NAME: ${{ github.repository }}/jupyter:latest 16 | SPARKMAGIC_DOCKERFILE: Dockerfile.jupyter 17 | JUPYTER_IMAGE_NAME: ${{ github.repository }}/sparkmagic-livy:latest 18 | JUPYTER_DOCKERFILE: Dockerfile.spark 19 | permissions: 20 | contents: read 21 | packages: write 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v4 25 | 26 | - name: Set up QEMU 27 | uses: docker/setup-qemu-action@v3 28 | 29 | - name: Set up Docker Buildx 30 | uses: docker/setup-buildx-action@v3 31 | 32 | - name: Login to GitHub Container Registry 33 | uses: docker/login-action@v3 34 | with: 35 | registry: ${{ env.REPOSITORY_URL }} 36 | username: ${{ github.repository_owner }} 37 | password: ${{ secrets.GITHUB_TOKEN }} 38 | 39 | - name: Build and push with latest release - SparkMagic 40 | uses: docker/build-push-action@v6 41 | if: github.event_name == 'schedule' 42 | with: 43 | context: . 44 | file: ${{ env.JUPYTER_DOCKERFILE }} 45 | platforms: linux/amd64,linux/arm64 46 | push: true 47 | tags: | 48 | ${{ env.REPOSITORY_URL }}/${{ env.SPARKMAGIC_IMAGE_NAME }} 49 | 50 | - name: Build and push with latest release - Jupyter 51 | uses: docker/build-push-action@v6 52 | if: github.event_name == 'schedule' 53 | with: 54 | context: . 55 | file: ${{ env.JUPYTER_DOCKERFILE }} 56 | platforms: linux/amd64,linux/arm64 57 | push: true 58 | tags: | 59 | ${{ env.REPOSITORY_URL }}/${{ env.JUPYTER_IMAGE_NAME }} 60 | 61 | - name: Build and push with local packages - SparkMagic 62 | uses: docker/build-push-action@v6 63 | if: github.event_name == 'push' 64 | with: 65 | context: . 66 | file: ${{ env.JUPYTER_DOCKERFILE }} 67 | platforms: linux/amd64,linux/arm64 68 | push: true 69 | build-args: | 70 | dev_mode="true" 71 | tags: | 72 | ${{ env.REPOSITORY_URL }}/${{ env.SPARKMAGIC_IMAGE_NAME }} 73 | 74 | - name: Build and push with local packages - Jupyter 75 | uses: docker/build-push-action@v6 76 | if: github.event_name == 'push' 77 | with: 78 | context: . 79 | file: ${{ env.JUPYTER_DOCKERFILE }} 80 | platforms: linux/amd64,linux/arm64 81 | push: true 82 | build-args: | 83 | dev_mode="true" 84 | tags: | 85 | ${{ env.REPOSITORY_URL }}/${{ env.JUPYTER_IMAGE_NAME }} 86 | -------------------------------------------------------------------------------- /sparkmagic/setup.py: -------------------------------------------------------------------------------- 1 | DESCRIPTION = "SparkMagic: Spark execution via Livy" 2 | NAME = "sparkmagic" 3 | PACKAGES = [ 4 | "sparkmagic", 5 | "sparkmagic/controllerwidget", 6 | "sparkmagic/kernels", 7 | "sparkmagic/livyclientlib", 8 | "sparkmagic/auth", 9 | "sparkmagic/magics", 10 | "sparkmagic/kernels/pysparkkernel", 11 | "sparkmagic/kernels/sparkkernel", 12 | "sparkmagic/kernels/sparkrkernel", 13 | "sparkmagic/kernels/wrapperkernel", 14 | "sparkmagic/utils", 15 | "sparkmagic/serverextension", 16 | ] 17 | AUTHOR = "Jupyter Development Team" 18 | AUTHOR_EMAIL = "jupyter@googlegroups.org" 19 | URL = "https://github.com/jupyter-incubator/sparkmagic" 20 | DOWNLOAD_URL = "https://github.com/jupyter-incubator/sparkmagic" 21 | LICENSE = "BSD 3-clause" 22 | 23 | import io 24 | import os 25 | import re 26 | 27 | from distutils.core import setup 28 | 29 | 30 | def read(path, encoding="utf-8"): 31 | path = os.path.join(os.path.dirname(__file__), path) 32 | with io.open(path, encoding=encoding) as fp: 33 | return fp.read() 34 | 35 | 36 | # read requirements.txt and convert to install_requires format 37 | def requirements(path): 38 | lines = read(path).splitlines() 39 | # remove comments and empty lines 40 | lines = [line for line in lines if not line.startswith("#") and line] 41 | return lines 42 | 43 | 44 | def version(path): 45 | """Obtain the package version from a python file e.g. pkg/__init__.py. 46 | 47 | See . 48 | """ 49 | version_file = read(path) 50 | version_match = re.search( 51 | r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M 52 | ) 53 | if version_match: 54 | return version_match.group(1) 55 | raise RuntimeError("Unable to find version string.") 56 | 57 | 58 | VERSION = version("sparkmagic/__init__.py") 59 | 60 | 61 | setup( 62 | name=NAME, 63 | version=VERSION, 64 | description=DESCRIPTION, 65 | author=AUTHOR, 66 | author_email=AUTHOR_EMAIL, 67 | url=URL, 68 | download_url=DOWNLOAD_URL, 69 | license=LICENSE, 70 | packages=PACKAGES, 71 | include_package_data=True, 72 | package_data={ 73 | "sparkmagic": [ 74 | "kernels/pysparkkernel/kernel.js", 75 | "kernels/sparkkernel/kernel.js", 76 | "kernels/sparkrkernel/kernel.js", 77 | "kernels/pysparkkernel/kernel.json", 78 | "kernels/sparkkernel/kernel.json", 79 | "kernels/sparkrkernel/kernel.json", 80 | ] 81 | }, 82 | classifiers=[ 83 | "Development Status :: 4 - Beta", 84 | "Environment :: Console", 85 | "Intended Audience :: Science/Research", 86 | "License :: OSI Approved :: BSD License", 87 | "Natural Language :: English", 88 | "Programming Language :: Python :: 3.7", 89 | ], 90 | install_requires=requirements("requirements.txt"), 91 | ) 92 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/plotlygraphs/piegraph.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from plotly.graph_objs import Pie, Figure 5 | from plotly.offline import iplot 6 | 7 | try: 8 | from pandas.core.base import DataError 9 | except ImportError: 10 | try: 11 | from pandas.core.groupby import DataError 12 | except ImportError: 13 | from pandas.errors import DataError 14 | 15 | import autovizwidget.utils.configuration as conf 16 | from .graphbase import GraphBase 17 | 18 | 19 | class PieGraph(GraphBase): 20 | @staticmethod 21 | def render(df, encoding, output): 22 | if encoding.x is None: 23 | with output: 24 | print("\n\n\nPlease select an X axis.") 25 | return 26 | 27 | try: 28 | values, labels = PieGraph._get_x_values_labels(df, encoding) 29 | except TypeError: 30 | with output: 31 | print( 32 | "\n\n\nCannot group by X selection because of its type: '{}'. Please select another column.".format( 33 | df[encoding.x].dtype 34 | ) 35 | ) 36 | return 37 | except (ValueError, DataError): 38 | with output: 39 | print( 40 | "\n\n\nCannot group by X selection. Please select another column.".format( 41 | df[encoding.x].dtype 42 | ) 43 | ) 44 | if df.size == 0: 45 | print("\n\n\nCannot display a pie graph for an empty data set.") 46 | return 47 | 48 | max_slices_pie_graph = conf.max_slices_pie_graph() 49 | with output: 50 | # There's performance issues with a large amount of slices. 51 | # 1500 rows crash the browser. 52 | # 500 rows take ~15 s. 53 | # 100 rows is almost automatic. 54 | if len(values) > max_slices_pie_graph: 55 | print( 56 | "There's {} values in your pie graph, which would render the graph unresponsive.\n" 57 | "Please select another X with at most {} possible values.".format( 58 | len(values), max_slices_pie_graph 59 | ) 60 | ) 61 | else: 62 | data = [Pie(values=values, labels=labels)] 63 | 64 | fig = Figure(data=data) 65 | iplot(fig, show_link=False) 66 | 67 | @staticmethod 68 | def display_logarithmic_x_axis(): 69 | return False 70 | 71 | @staticmethod 72 | def display_logarithmic_y_axis(): 73 | return False 74 | 75 | @staticmethod 76 | def _get_x_values_labels(df, encoding): 77 | if encoding.y is None: 78 | series = df.groupby([encoding.x]).size() 79 | values = series.values.tolist() 80 | labels = series.index.tolist() 81 | else: 82 | labels, values = GraphBase._get_x_y_values(df, encoding) 83 | return values, labels 84 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # Licensing terms 2 | 3 | This project is licensed under the terms of the Modified BSD License 4 | (also known as New or Revised or 3-Clause BSD), as follows: 5 | 6 | - Copyright (c) 2001-2015, IPython Development Team 7 | - Copyright (c) 2015-, Jupyter Development Team 8 | 9 | All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are met: 13 | 14 | Redistributions of source code must retain the above copyright notice, this 15 | list of conditions and the following disclaimer. 16 | 17 | Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | Neither the name of the Jupyter Development Team nor the names of its 22 | contributors may be used to endorse or promote products derived from this 23 | software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | 36 | ## About the Jupyter Development Team 37 | 38 | The Jupyter Development Team is the set of all contributors to the Jupyter project. 39 | This includes all of the Jupyter Subprojects, which are the different repositories 40 | under the [jupyter](https://github.com/jupyter/) GitHub organization. 41 | 42 | The core team that coordinates development on GitHub can be found here: 43 | https://github.com/jupyter/. 44 | 45 | ## Our copyright policy 46 | 47 | Jupyter uses a shared copyright model. Each contributor maintains copyright 48 | over their contributions to Jupyter. But, it is important to note that these 49 | contributions are typically only changes to the repositories. Thus, the Jupyter 50 | source code, in its entirety is not the copyright of any single person or 51 | institution. Instead, it is the collective copyright of the entire Jupyter 52 | Development Team. If individual contributors want to maintain a record of what 53 | changes/contributions they have specific copyright on, they should indicate 54 | their copyright in the commit message of the change, when they commit the 55 | change to one of the Jupyter repositories. 56 | 57 | With this in mind, the following banner should be used in any source code file 58 | to indicate the copyright and license terms: 59 | 60 | # Copyright (c) Jupyter Development Team. 61 | # Distributed under the terms of the Modified BSD License. 62 | -------------------------------------------------------------------------------- /autovizwidget/LICENSE.md: -------------------------------------------------------------------------------- 1 | # Licensing terms 2 | 3 | This project is licensed under the terms of the Modified BSD License 4 | (also known as New or Revised or 3-Clause BSD), as follows: 5 | 6 | - Copyright (c) 2001-2015, IPython Development Team 7 | - Copyright (c) 2015-, Jupyter Development Team 8 | 9 | All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are met: 13 | 14 | Redistributions of source code must retain the above copyright notice, this 15 | list of conditions and the following disclaimer. 16 | 17 | Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | Neither the name of the Jupyter Development Team nor the names of its 22 | contributors may be used to endorse or promote products derived from this 23 | software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | 36 | ## About the Jupyter Development Team 37 | 38 | The Jupyter Development Team is the set of all contributors to the Jupyter project. 39 | This includes all of the Jupyter Subprojects, which are the different repositories 40 | under the [jupyter](https://github.com/jupyter/) GitHub organization. 41 | 42 | The core team that coordinates development on GitHub can be found here: 43 | https://github.com/jupyter/. 44 | 45 | ## Our copyright policy 46 | 47 | Jupyter uses a shared copyright model. Each contributor maintains copyright 48 | over their contributions to Jupyter. But, it is important to note that these 49 | contributions are typically only changes to the repositories. Thus, the Jupyter 50 | source code, in its entirety is not the copyright of any single person or 51 | institution. Instead, it is the collective copyright of the entire Jupyter 52 | Development Team. If individual contributors want to maintain a record of what 53 | changes/contributions they have specific copyright on, they should indicate 54 | their copyright in the commit message of the change, when they commit the 55 | change to one of the Jupyter repositories. 56 | 57 | With this in mind, the following banner should be used in any source code file 58 | to indicate the copyright and license terms: 59 | 60 | # Copyright (c) Jupyter Development Team. 61 | # Distributed under the terms of the Modified BSD License. 62 | -------------------------------------------------------------------------------- /hdijupyterutils/LICENSE.md: -------------------------------------------------------------------------------- 1 | # Licensing terms 2 | 3 | This project is licensed under the terms of the Modified BSD License 4 | (also known as New or Revised or 3-Clause BSD), as follows: 5 | 6 | - Copyright (c) 2001-2015, IPython Development Team 7 | - Copyright (c) 2015-, Jupyter Development Team 8 | 9 | All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are met: 13 | 14 | Redistributions of source code must retain the above copyright notice, this 15 | list of conditions and the following disclaimer. 16 | 17 | Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | Neither the name of the Jupyter Development Team nor the names of its 22 | contributors may be used to endorse or promote products derived from this 23 | software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | 36 | ## About the Jupyter Development Team 37 | 38 | The Jupyter Development Team is the set of all contributors to the Jupyter project. 39 | This includes all of the Jupyter Subprojects, which are the different repositories 40 | under the [jupyter](https://github.com/jupyter/) GitHub organization. 41 | 42 | The core team that coordinates development on GitHub can be found here: 43 | https://github.com/jupyter/. 44 | 45 | ## Our copyright policy 46 | 47 | Jupyter uses a shared copyright model. Each contributor maintains copyright 48 | over their contributions to Jupyter. But, it is important to note that these 49 | contributions are typically only changes to the repositories. Thus, the Jupyter 50 | source code, in its entirety is not the copyright of any single person or 51 | institution. Instead, it is the collective copyright of the entire Jupyter 52 | Development Team. If individual contributors want to maintain a record of what 53 | changes/contributions they have specific copyright on, they should indicate 54 | their copyright in the commit message of the change, when they commit the 55 | change to one of the Jupyter repositories. 56 | 57 | With this in mind, the following banner should be used in any source code file 58 | to indicate the copyright and license terms: 59 | 60 | # Copyright (c) Jupyter Development Team. 61 | # Distributed under the terms of the Modified BSD License. 62 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/controllerwidget/createsessionwidget.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | import json 4 | 5 | import sparkmagic.utils.configuration as conf 6 | from sparkmagic.utils.constants import LANG_SCALA, LANG_PYTHON 7 | from sparkmagic.controllerwidget.abstractmenuwidget import AbstractMenuWidget 8 | 9 | 10 | class CreateSessionWidget(AbstractMenuWidget): 11 | def __init__( 12 | self, 13 | spark_controller, 14 | ipywidget_factory, 15 | ipython_display, 16 | endpoints_dropdown_widget, 17 | refresh_method, 18 | ): 19 | # This is nested 20 | super(CreateSessionWidget, self).__init__( 21 | spark_controller, ipywidget_factory, ipython_display, True 22 | ) 23 | 24 | self.refresh_method = refresh_method 25 | 26 | self.endpoints_dropdown_widget = endpoints_dropdown_widget 27 | 28 | self.session_widget = self.ipywidget_factory.get_text( 29 | description="Name:", value="session-name" 30 | ) 31 | self.lang_widget = self.ipywidget_factory.get_toggle_buttons( 32 | description="Language:", 33 | options=[LANG_SCALA, LANG_PYTHON], 34 | ) 35 | self.properties = self.ipywidget_factory.get_text( 36 | description="Properties:", value=json.dumps(conf.session_configs()) 37 | ) 38 | self.submit_widget = self.ipywidget_factory.get_submit_button( 39 | description="Create Session" 40 | ) 41 | 42 | self.children = [ 43 | self.ipywidget_factory.get_html(value="
", width="600px"), 44 | self.endpoints_dropdown_widget, 45 | self.session_widget, 46 | self.lang_widget, 47 | self.properties, 48 | self.ipywidget_factory.get_html(value="
", width="600px"), 49 | self.submit_widget, 50 | ] 51 | 52 | for child in self.children: 53 | child.parent_widget = self 54 | 55 | def run(self): 56 | try: 57 | properties_json = self.properties.value 58 | if properties_json.strip() != "": 59 | conf.override( 60 | conf.session_configs.__name__, json.loads(self.properties.value) 61 | ) 62 | except ValueError as e: 63 | self.ipython_display.send_error( 64 | "Session properties must be a valid JSON string. Error:\n{}".format(e) 65 | ) 66 | return 67 | 68 | endpoint = self.endpoints_dropdown_widget.value 69 | language = self.lang_widget.value 70 | alias = self.session_widget.value 71 | skip = False 72 | properties = conf.get_session_properties(language) 73 | 74 | try: 75 | self.spark_controller.add_session(alias, endpoint, skip, properties) 76 | except ValueError as e: 77 | self.ipython_display.send_error( 78 | """Could not add session with 79 | name: 80 | {} 81 | properties: 82 | {} 83 | 84 | due to error: '{}'""".format( 85 | alias, properties, e 86 | ) 87 | ) 88 | return 89 | 90 | self.refresh_method() 91 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_usercodeparser.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from sparkmagic.kernels.wrapperkernel.usercodeparser import UserCodeParser 3 | from sparkmagic.kernels.kernelmagics import KernelMagics 4 | 5 | 6 | def test_empty_string(): 7 | parser = UserCodeParser() 8 | 9 | assert "" == parser.get_code_to_run("") 10 | 11 | 12 | def test_spark_code(): 13 | parser = UserCodeParser() 14 | cell = "my code\nand more" 15 | 16 | assert "%%spark\nmy code\nand more" == parser.get_code_to_run(cell) 17 | 18 | 19 | def test_local_single(): 20 | parser = UserCodeParser() 21 | cell = """%local 22 | hi 23 | hi 24 | hi""" 25 | 26 | assert "hi\nhi\nhi" == parser.get_code_to_run(cell) 27 | 28 | 29 | def test_local_double(): 30 | parser = UserCodeParser() 31 | cell = """%%local 32 | hi 33 | hi 34 | hi""" 35 | 36 | assert "hi\nhi\nhi" == parser.get_code_to_run(cell) 37 | 38 | 39 | def test_our_line_magics(): 40 | parser = UserCodeParser() 41 | magic_name = KernelMagics.info.__name__ 42 | cell = "%{}".format(magic_name) 43 | 44 | assert "%%{}\n ".format(magic_name) == parser.get_code_to_run(cell) 45 | 46 | 47 | def test_our_line_magics_with_content(): 48 | parser = UserCodeParser() 49 | magic_name = KernelMagics.info.__name__ 50 | cell = """%{} 51 | my content 52 | more content""".format( 53 | magic_name 54 | ) 55 | 56 | assert "%%{}\nmy content\nmore content\n ".format( 57 | magic_name 58 | ) == parser.get_code_to_run(cell) 59 | 60 | 61 | def test_other_cell_magic(): 62 | parser = UserCodeParser() 63 | cell = """%%magic 64 | hi 65 | hi 66 | hi""" 67 | 68 | assert "{}".format(cell) == parser.get_code_to_run(cell) 69 | 70 | 71 | def test_other_line_magic(): 72 | parser = UserCodeParser() 73 | cell = """%magic 74 | hi 75 | hi 76 | hi""" 77 | 78 | assert cell == parser.get_code_to_run(cell) 79 | 80 | 81 | def test_scala_code(): 82 | parser = UserCodeParser() 83 | cell = """/* Place the cursor in the cell and press SHIFT + ENTER to run */ 84 | 85 | val fruits = sc.textFile("wasb:///example/data/fruits.txt") 86 | val yellowThings = sc.textFile("wasb:///example/data/yellowthings.txt")""" 87 | 88 | assert "%%spark\n{}".format(cell) == parser.get_code_to_run(cell) 89 | 90 | 91 | def test_unicode(): 92 | parser = UserCodeParser() 93 | cell = "print 'è🐙🐙🐙🐙'" 94 | 95 | assert "%%spark\n{}".format(cell) == parser.get_code_to_run(cell) 96 | 97 | 98 | def test_unicode_in_magics(): 99 | parser = UserCodeParser() 100 | magic_name = KernelMagics.info.__name__ 101 | cell = """%{} 102 | my content è🐙 103 | more content""".format( 104 | magic_name 105 | ) 106 | 107 | assert "%%{}\nmy content è🐙\nmore content\n ".format( 108 | magic_name 109 | ) == parser.get_code_to_run(cell) 110 | 111 | 112 | def test_unicode_in_double_magics(): 113 | parser = UserCodeParser() 114 | magic_name = KernelMagics.info.__name__ 115 | cell = """%%{} 116 | my content è🐙 117 | more content""".format( 118 | magic_name 119 | ) 120 | 121 | assert "%%{}\nmy content è🐙\nmore content\n ".format( 122 | magic_name 123 | ) == parser.get_code_to_run(cell) 124 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/auth/basic.py: -------------------------------------------------------------------------------- 1 | """Class for implementing a basic access authenticator for SparkMagic""" 2 | 3 | from sparkmagic.livyclientlib.exceptions import BadUserDataException 4 | from hdijupyterutils.ipywidgetfactory import IpyWidgetFactory 5 | from requests.auth import HTTPBasicAuth 6 | from .customauth import Authenticator 7 | 8 | 9 | class Basic(HTTPBasicAuth, Authenticator): 10 | """Basic Access authenticator for SparkMagic""" 11 | 12 | def __init__(self, parsed_attributes=None): 13 | """Initializes the Authenticator with the attributes in the attributes 14 | parsed from a %spark magic command if applicable, or with default values 15 | otherwise. 16 | 17 | Args: 18 | self, 19 | parsed_attributes (IPython.core.magics.namespace): The namespace object that 20 | is created from parsing %spark magic command. 21 | """ 22 | if parsed_attributes is not None: 23 | if parsed_attributes.user == "" or parsed_attributes.password == "": 24 | new_exc = BadUserDataException( 25 | "Need to supply username and password arguments for " 26 | "Basic Access Authentication. (e.g. -a username -p password)." 27 | ) 28 | raise new_exc 29 | self.username = parsed_attributes.user 30 | self.password = parsed_attributes.password 31 | else: 32 | self.username = "username" 33 | self.password = "password" 34 | HTTPBasicAuth.__init__(self, self.username, self.password) 35 | Authenticator.__init__(self, parsed_attributes) 36 | 37 | def get_widgets(self, widget_width): 38 | """Creates and returns a list with an address, username, and password widget 39 | 40 | Args: 41 | widget_width (str): The width of all widgets to be created. 42 | 43 | Returns: 44 | Sequence[hdijupyterutils.ipywidgetfactory.IpyWidgetFactory]: list of widgets 45 | """ 46 | ipywidget_factory = IpyWidgetFactory() 47 | 48 | self.user_widget = ipywidget_factory.get_text( 49 | description="Username:", value=self.username, width=widget_width 50 | ) 51 | 52 | self.password_widget = ipywidget_factory.get_password( 53 | description="Password:", value=self.password, width=widget_width 54 | ) 55 | 56 | widgets = [self.user_widget, self.password_widget] 57 | return Authenticator.get_widgets(self, widget_width) + widgets 58 | 59 | def update_with_widget_values(self): 60 | """Updates url, username, and password to be the value of their respective widgets.""" 61 | Authenticator.update_with_widget_values(self) 62 | self.username = self.user_widget.value 63 | self.password = self.password_widget.value 64 | 65 | def __eq__(self, other): 66 | if not isinstance(other, Basic): 67 | return False 68 | return ( 69 | self.url == other.url 70 | and self.username == other.username 71 | and self.password == other.password 72 | ) 73 | 74 | def __call__(self, request): 75 | return HTTPBasicAuth.__call__(self, request) 76 | 77 | def __hash__(self): 78 | return hash((self.username, self.password, self.url, self.__class__.__name__)) 79 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/controllerwidget/managesessionwidget.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | from sparkmagic.controllerwidget.abstractmenuwidget import AbstractMenuWidget 4 | 5 | 6 | class ManageSessionWidget(AbstractMenuWidget): 7 | def __init__( 8 | self, spark_controller, ipywidget_factory, ipython_display, refresh_method 9 | ): 10 | # This is nested 11 | super(ManageSessionWidget, self).__init__( 12 | spark_controller, ipywidget_factory, ipython_display, True 13 | ) 14 | 15 | self.refresh_method = refresh_method 16 | 17 | self.children = self.get_existing_session_widgets() 18 | 19 | for child in self.children: 20 | child.parent_widget = self 21 | 22 | def run(self): 23 | self.refresh_method() 24 | 25 | def get_existing_session_widgets(self): 26 | session_widgets = [] 27 | session_widgets.append( 28 | self.ipywidget_factory.get_html(value="
", width="600px") 29 | ) 30 | 31 | client_dict = self.spark_controller.get_managed_clients() 32 | if len(client_dict) > 0: 33 | # Header 34 | header = self.get_session_widget("Name", "Id", "Kind", "State", False) 35 | session_widgets.append(header) 36 | session_widgets.append( 37 | self.ipywidget_factory.get_html(value="
", width="600px") 38 | ) 39 | 40 | # Sessions 41 | for name, session in client_dict.items(): 42 | session_widgets.append( 43 | self.get_session_widget( 44 | name, session.id, session.kind, session.status 45 | ) 46 | ) 47 | 48 | session_widgets.append( 49 | self.ipywidget_factory.get_html(value="
", width="600px") 50 | ) 51 | else: 52 | session_widgets.append( 53 | self.ipywidget_factory.get_html(value="No sessions yet.", width="600px") 54 | ) 55 | 56 | return session_widgets 57 | 58 | def get_session_widget(self, name, session_id, kind, state, button=True): 59 | hbox = self.ipywidget_factory.get_hbox() 60 | 61 | name_w = self.ipywidget_factory.get_html( 62 | value=name, width="200px", padding="4px" 63 | ) 64 | id_w = self.ipywidget_factory.get_html( 65 | value=str(session_id), width="100px", padding="4px" 66 | ) 67 | kind_w = self.ipywidget_factory.get_html( 68 | value=kind, width="100px", padding="4px" 69 | ) 70 | state_w = self.ipywidget_factory.get_html( 71 | value=state, width="100px", padding="4px" 72 | ) 73 | 74 | if button: 75 | 76 | def delete_on_click(button): 77 | self.spark_controller.delete_session_by_name(name) 78 | self.refresh_method() 79 | 80 | delete_w = self.ipywidget_factory.get_button(description="Delete") 81 | delete_w.on_click(delete_on_click) 82 | else: 83 | delete_w = self.ipywidget_factory.get_html( 84 | value="", width="100px", padding="4px" 85 | ) 86 | 87 | hbox.children = [name_w, id_w, kind_w, state_w, delete_w] 88 | 89 | return hbox 90 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at itamar@pythonspeed.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/livyreliablehttpclient.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from .linearretrypolicy import LinearRetryPolicy 5 | from .configurableretrypolicy import ConfigurableRetryPolicy 6 | from .reliablehttpclient import ReliableHttpClient 7 | from sparkmagic.utils.constants import LINEAR_RETRY, CONFIGURABLE_RETRY 8 | import sparkmagic.utils.configuration as conf 9 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException 10 | 11 | 12 | class LivyReliableHttpClient(object): 13 | """A Livy-specific Http client which wraps the normal ReliableHttpClient. Propagates 14 | HttpClientExceptions up.""" 15 | 16 | def __init__(self, http_client, endpoint): 17 | self.endpoint = endpoint 18 | self._http_client = http_client 19 | 20 | @staticmethod 21 | def from_endpoint(endpoint): 22 | headers = {"Content-Type": "application/json"} 23 | headers.update(conf.custom_headers()) 24 | retry_policy = LivyReliableHttpClient._get_retry_policy() 25 | return LivyReliableHttpClient( 26 | ReliableHttpClient(endpoint, headers, retry_policy), endpoint 27 | ) 28 | 29 | def post_statement(self, session_id, data): 30 | return self._http_client.post( 31 | self._statements_url(session_id), [201], data 32 | ).json() 33 | 34 | def get_statement(self, session_id, statement_id): 35 | return self._http_client.get( 36 | self._statement_url(session_id, statement_id), [200] 37 | ).json() 38 | 39 | def get_sessions(self): 40 | return self._http_client.get("/sessions", [200]).json() 41 | 42 | def post_session(self, properties): 43 | return self._http_client.post("/sessions", [201], properties).json() 44 | 45 | def get_session(self, session_id): 46 | return self._http_client.get(self._session_url(session_id), [200]).json() 47 | 48 | def delete_session(self, session_id): 49 | self._http_client.delete(self._session_url(session_id), [200, 404]) 50 | 51 | def get_all_session_logs(self, session_id): 52 | return self._http_client.get( 53 | self._session_url(session_id) + "/log?from=0", [200] 54 | ).json() 55 | 56 | def get_headers(self): 57 | return self._http_client.get_headers() 58 | 59 | def cancel_statement(self, session_id, statement_id): 60 | return self._http_client.post( 61 | "{}/cancel".format(self._statement_url(session_id, statement_id)), [200], {} 62 | ).json() 63 | 64 | @staticmethod 65 | def _session_url(session_id): 66 | return "/sessions/{}".format(session_id) 67 | 68 | @staticmethod 69 | def _statements_url(session_id): 70 | return "/sessions/{}/statements".format(session_id) 71 | 72 | @staticmethod 73 | def _statement_url(session_id, statement_id): 74 | return "/sessions/{}/statements/{}".format(session_id, statement_id) 75 | 76 | @staticmethod 77 | def _get_retry_policy(): 78 | policy = conf.retry_policy() 79 | 80 | if policy == LINEAR_RETRY: 81 | return LinearRetryPolicy(seconds_to_sleep=5, max_retries=5) 82 | elif policy == CONFIGURABLE_RETRY: 83 | return ConfigurableRetryPolicy( 84 | retry_seconds_to_sleep_list=conf.retry_seconds_to_sleep_list(), 85 | max_retries=conf.configurable_retry_policy_max_retries(), 86 | ) 87 | else: 88 | raise BadUserConfigurationException( 89 | "Retry policy '{}' not supported".format(policy) 90 | ) 91 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_configurableretrypolicy.py: -------------------------------------------------------------------------------- 1 | from sparkmagic.livyclientlib.configurableretrypolicy import ConfigurableRetryPolicy 2 | import sparkmagic.utils.configuration as conf 3 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException 4 | 5 | 6 | def test_with_empty_list(): 7 | times = [] 8 | max_retries = 5 9 | policy = ConfigurableRetryPolicy(times, max_retries) 10 | 11 | assert 5 == policy.seconds_to_sleep(0) 12 | assert 5 == policy.seconds_to_sleep(4) 13 | assert 5 == policy.seconds_to_sleep(5) 14 | assert 5 == policy.seconds_to_sleep(6) 15 | 16 | # Check based on retry count 17 | assert True is policy.should_retry(500, False, 0) 18 | assert True is policy.should_retry(500, False, 4) 19 | assert True is policy.should_retry(500, False, 5) 20 | assert False is policy.should_retry(500, False, 6) 21 | 22 | # Check based on status code 23 | assert False is policy.should_retry(201, False, 0) 24 | assert False is policy.should_retry(201, False, 6) 25 | 26 | # Check based on error 27 | assert True is policy.should_retry(201, True, 0) 28 | assert True is policy.should_retry(201, True, 6) 29 | 30 | 31 | def test_with_one_element_list(): 32 | times = [2] 33 | max_retries = 5 34 | policy = ConfigurableRetryPolicy(times, max_retries) 35 | 36 | assert 2 == policy.seconds_to_sleep(0) 37 | assert 2 == policy.seconds_to_sleep(4) 38 | assert 2 == policy.seconds_to_sleep(5) 39 | assert 2 == policy.seconds_to_sleep(6) 40 | 41 | # Check based on retry count 42 | assert True is policy.should_retry(500, False, 0) 43 | assert True is policy.should_retry(500, False, 4) 44 | assert True is policy.should_retry(500, False, 5) 45 | assert False is policy.should_retry(500, False, 6) 46 | 47 | # Check based on status code 48 | assert False is policy.should_retry(201, False, 0) 49 | assert False is policy.should_retry(201, False, 6) 50 | 51 | # Check based on error 52 | assert True is policy.should_retry(201, True, 0) 53 | assert True is policy.should_retry(201, True, 6) 54 | 55 | 56 | def test_with_default_values(): 57 | times = conf.retry_seconds_to_sleep_list() 58 | max_retries = conf.configurable_retry_policy_max_retries() 59 | policy = ConfigurableRetryPolicy(times, max_retries) 60 | 61 | assert times[0] == policy.seconds_to_sleep(0) 62 | assert times[0] == policy.seconds_to_sleep(1) 63 | assert times[1] == policy.seconds_to_sleep(2) 64 | assert times[2] == policy.seconds_to_sleep(3) 65 | assert times[3] == policy.seconds_to_sleep(4) 66 | assert times[4] == policy.seconds_to_sleep(5) 67 | assert times[4] == policy.seconds_to_sleep(6) 68 | assert times[4] == policy.seconds_to_sleep(7) 69 | assert times[4] == policy.seconds_to_sleep(8) 70 | assert times[4] == policy.seconds_to_sleep(9) 71 | 72 | # Check based on retry count 73 | assert True is policy.should_retry(500, False, 0) 74 | assert True is policy.should_retry(500, False, 7) 75 | assert True is policy.should_retry(500, False, 8) 76 | assert False is policy.should_retry(500, False, 9) 77 | 78 | # Check based on status code 79 | assert False is policy.should_retry(201, False, 0) 80 | assert False is policy.should_retry(201, False, 9) 81 | 82 | # Check based on error 83 | assert True is policy.should_retry(201, True, 0) 84 | assert True is policy.should_retry(201, True, 9) 85 | 86 | 87 | def test_with_negative_values(): 88 | times = [0.1, -1] 89 | max_retries = 5 90 | 91 | try: 92 | policy = ConfigurableRetryPolicy(times, max_retries) 93 | assert False 94 | except BadUserConfigurationException: 95 | assert True 96 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/widget/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from .encoding import Encoding 4 | from .autovizwidget import AutoVizWidget 5 | 6 | 7 | def infer_vegalite_type(data): 8 | """ 9 | From an array-like input, infer the correct vega typecode 10 | ('O', 'N', 'Q', or 'T') 11 | Parameters 12 | ---------- 13 | data: Numpy array or Pandas Series 14 | """ 15 | 16 | typ = pd.api.types.infer_dtype(data) 17 | 18 | if typ in [ 19 | "floating", 20 | "mixed-integer-float", 21 | "integer", 22 | "mixed-integer", 23 | "complex", 24 | ]: 25 | typecode = "Q" 26 | elif typ in ["string", "bytes", "categorical", "boolean", "mixed", "unicode"]: 27 | typecode = "N" 28 | elif typ in [ 29 | "datetime", 30 | "datetime64", 31 | "timedelta", 32 | "timedelta64", 33 | "date", 34 | "time", 35 | "period", 36 | ]: 37 | typecode = "T" 38 | else: 39 | typecode = "N" 40 | 41 | return typecode 42 | 43 | 44 | def _validate_custom_order(order): 45 | assert len(order) == 4 46 | list_to_check = list(order) 47 | list_to_check.sort() 48 | assert list_to_check == ["N", "O", "Q", "T"] 49 | 50 | 51 | def _classify_data_by_type(data, order, skip=None): 52 | """Get O, N, Q, or T vegalite type for all columns in data except if in skip.""" 53 | if skip is None: 54 | skip = [] 55 | 56 | d = dict() 57 | for typ in order: 58 | d[typ] = [] 59 | 60 | for column_name in data: 61 | if column_name not in skip: 62 | typ = infer_vegalite_type(data[column_name]) 63 | d[typ].append(column_name) 64 | 65 | return d 66 | 67 | 68 | def select_x(data, order=None): 69 | """ 70 | Helper function that does a best effort of selecting an automatic x axis. 71 | Returns None if it cannot find x axis. 72 | """ 73 | if data is None: 74 | return None 75 | 76 | if len(data) < 1: 77 | return None 78 | 79 | if order is None: 80 | order = ["T", "O", "N", "Q"] 81 | else: 82 | _validate_custom_order(order) 83 | 84 | d = _classify_data_by_type(data, order) 85 | 86 | chosen_x = None 87 | for typ in order: 88 | if len(d[typ]) >= 1: 89 | chosen_x = d[typ][0] 90 | break 91 | 92 | return chosen_x 93 | 94 | 95 | def select_y(data, x_name, order=None, aggregator=None): 96 | """ 97 | Helper function that does a best effort of selecting an automatic y axis. 98 | It won't set the same axis that x is set to again. 99 | Returns None if it cannot find y axis. 100 | """ 101 | if data is None: 102 | return None 103 | 104 | if len(data) < 2: 105 | return None 106 | 107 | if x_name is None: 108 | return None 109 | 110 | if order is None: 111 | order = ["Q", "O", "N", "T"] 112 | else: 113 | _validate_custom_order(order) 114 | 115 | d = _classify_data_by_type(data, order, [x_name]) 116 | 117 | # Choose the first column found on the following order: Q, O, N, T 118 | chosen_y = None 119 | for typ in order: 120 | if len(d[typ]) >= 1: 121 | chosen_y = d[typ][0] 122 | break 123 | 124 | return chosen_y 125 | 126 | 127 | def display_dataframe(df): 128 | selected_x = select_x(df) 129 | selected_y = select_y(df, selected_x) 130 | encoding = Encoding( 131 | chart_type=Encoding.chart_type_table, 132 | x=selected_x, 133 | y=selected_y, 134 | y_aggregation=Encoding.y_agg_max, 135 | ) 136 | return AutoVizWidget(df, encoding) 137 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_exceptions.py: -------------------------------------------------------------------------------- 1 | from mock import MagicMock 2 | import pytest 3 | 4 | import sparkmagic.utils.configuration as conf 5 | from sparkmagic.livyclientlib.exceptions import * 6 | 7 | 8 | self = None 9 | ipython_display = None 10 | logger = None 11 | 12 | 13 | def setup_function(): 14 | global self, ipython_display, logger 15 | self = MagicMock() 16 | ipython_display = self.ipython_display 17 | logger = self.logger 18 | conf.override_all({}) 19 | 20 | 21 | def test_handle_expected_exceptions(): 22 | mock_method = MagicMock() 23 | mock_method.__name__ = "MockMethod" 24 | decorated = handle_expected_exceptions(mock_method) 25 | assert decorated.__name__ == mock_method.__name__ 26 | 27 | result = decorated(self, 1, 2, 3) 28 | assert result == mock_method.return_value 29 | assert ipython_display.send_error.call_count == 0 30 | mock_method.assert_called_once_with(self, 1, 2, 3) 31 | 32 | 33 | def test_handle_expected_exceptions_handle(): 34 | conf.override_all({"all_errors_are_fatal": False}) 35 | mock_method = MagicMock(side_effect=LivyUnexpectedStatusException("ridiculous")) 36 | mock_method.__name__ = "MockMethod2" 37 | decorated = handle_expected_exceptions(mock_method) 38 | assert decorated.__name__ == mock_method.__name__ 39 | 40 | result = decorated(self, 1, kwarg="foo") 41 | assert result is None 42 | assert ipython_display.send_error.call_count == 1 43 | mock_method.assert_called_once_with(self, 1, kwarg="foo") 44 | 45 | 46 | def test_handle_expected_exceptions_throw(): 47 | with pytest.raises(ValueError): 48 | mock_method = MagicMock(side_effect=ValueError("HALP")) 49 | mock_method.__name__ = "mock_meth" 50 | decorated = handle_expected_exceptions(mock_method) 51 | assert decorated.__name__ == mock_method.__name__ 52 | 53 | _ = decorated(self, 1, kwarg="foo") 54 | 55 | 56 | def test_handle_expected_exceptions_throws_if_all_errors_fatal(): 57 | with pytest.raises(LivyUnexpectedStatusException): 58 | conf.override_all({"all_errors_are_fatal": True}) 59 | mock_method = MagicMock(side_effect=LivyUnexpectedStatusException("Oh no!")) 60 | mock_method.__name__ = "mock_meth" 61 | decorated = handle_expected_exceptions(mock_method) 62 | assert decorated.__name__ == mock_method.__name__ 63 | 64 | _ = decorated(self, 1, kwarg="foo") 65 | 66 | 67 | # test wrap with unexpected to true 68 | def test_wrap_unexpected_exceptions(): 69 | mock_method = MagicMock() 70 | mock_method.__name__ = "tos" 71 | decorated = wrap_unexpected_exceptions(mock_method) 72 | assert decorated.__name__ == mock_method.__name__ 73 | 74 | result = decorated(self, 0.0) 75 | assert result == mock_method.return_value 76 | assert ipython_display.send_error.call_count == 0 77 | mock_method.assert_called_once_with(self, 0.0) 78 | 79 | 80 | def test_wrap_unexpected_exceptions_handle(): 81 | mock_method = MagicMock(side_effect=ValueError("~~~~~~")) 82 | mock_method.__name__ = "tos" 83 | decorated = wrap_unexpected_exceptions(mock_method) 84 | assert decorated.__name__ == mock_method.__name__ 85 | 86 | result = decorated(self, "FOOBAR", FOOBAR="FOOBAR") 87 | assert result is None 88 | assert ipython_display.send_error.call_count == 1 89 | mock_method.assert_called_once_with(self, "FOOBAR", FOOBAR="FOOBAR") 90 | 91 | 92 | def test_wrap_unexpected_exceptions_throws_if_all_errors_fatal(): 93 | with pytest.raises(ValueError): 94 | conf.override_all({"all_errors_are_fatal": True}) 95 | mock_method = MagicMock(side_effect=ValueError("~~~~~~")) 96 | mock_method.__name__ = "tos" 97 | decorated = wrap_unexpected_exceptions(mock_method) 98 | assert decorated.__name__ == mock_method.__name__ 99 | 100 | _ = decorated(self, "FOOBAR", FOOBAR="FOOBAR") 101 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/controllerwidget/addendpointwidget.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | import importlib 4 | from sparkmagic.livyclientlib.endpoint import Endpoint 5 | import sparkmagic.utils.configuration as conf 6 | from sparkmagic.utils.constants import WIDGET_WIDTH 7 | from .abstractmenuwidget import AbstractMenuWidget 8 | 9 | 10 | class AddEndpointWidget(AbstractMenuWidget): 11 | def __init__( 12 | self, 13 | spark_controller, 14 | ipywidget_factory, 15 | ipython_display, 16 | endpoints, 17 | endpoints_dropdown_widget, 18 | refresh_method, 19 | ): 20 | # This is nested 21 | super(AddEndpointWidget, self).__init__( 22 | spark_controller, ipywidget_factory, ipython_display, True 23 | ) 24 | self.endpoints = endpoints 25 | self.endpoints_dropdown_widget = endpoints_dropdown_widget 26 | self.refresh_method = refresh_method 27 | 28 | # map auth class path string to the instance of the class. 29 | self.auth_instances = {} 30 | for auth in conf.authenticators().values(): 31 | module, class_name = (auth).rsplit(".", 1) 32 | events_handler_module = importlib.import_module(module) 33 | auth_class = getattr(events_handler_module, class_name) 34 | self.auth_instances[auth] = auth_class() 35 | 36 | dropdown_options = [(k, v) for k, v in conf.authenticators().items()] 37 | self.auth_type = self.ipywidget_factory.get_dropdown( 38 | options=dropdown_options, description="Auth type:" 39 | ) 40 | 41 | # combine all authentication instance's widgets into one list to pass to self.children. 42 | self.all_widgets = list() 43 | for _class, instance in self.auth_instances.items(): 44 | for widget in instance.widgets: 45 | if _class == self.auth_type.value: 46 | widget.layout.display = "flex" 47 | self.auth = instance 48 | else: 49 | widget.layout.display = "none" 50 | self.all_widgets.append(widget) 51 | 52 | # Submit widget 53 | self.submit_widget = self.ipywidget_factory.get_submit_button( 54 | description="Add endpoint" 55 | ) 56 | 57 | self.auth_type.on_trait_change(self._update_auth) 58 | 59 | self.children = ( 60 | [ 61 | self.ipywidget_factory.get_html(value="
", width=WIDGET_WIDTH), 62 | self.auth_type, 63 | ] 64 | + self.all_widgets 65 | + [ 66 | self.ipywidget_factory.get_html(value="
", width=WIDGET_WIDTH), 67 | self.submit_widget, 68 | ] 69 | ) 70 | 71 | for child in self.children: 72 | child.parent_widget = self 73 | self._update_auth() 74 | 75 | def run(self): 76 | self.auth.update_with_widget_values() 77 | if self.auth_type.label == "None": 78 | endpoint = Endpoint(self.auth.url, None) 79 | else: 80 | endpoint = Endpoint(self.auth.url, self.auth) 81 | self.endpoints[self.auth.url] = endpoint 82 | self.ipython_display.writeln("Added endpoint {}".format(self.auth.url)) 83 | try: 84 | # We need to call the refresh method because drop down in Tab 2 for endpoints wouldn't 85 | # refresh with the new value otherwise. 86 | self.refresh_method() 87 | except: 88 | self.endpoints.pop(self.auth.url, None) 89 | self.refresh_method() 90 | raise 91 | 92 | def _update_auth(self): 93 | """Create an instance of the chosen auth type maps to in the config 94 | file.""" 95 | for widget in self.auth.widgets: 96 | widget.layout.display = "none" 97 | self.auth = self.auth_instances.get(self.auth_type.value) 98 | for widget in self.auth.widgets: 99 | widget.layout.display = "flex" 100 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/sessionmanager.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | import atexit 4 | from sparkmagic.utils.sparklogger import SparkLog 5 | from sparkmagic.livyclientlib.exceptions import SessionManagementException 6 | from sparkmagic.utils.constants import MAGICS_LOGGER_NAME 7 | import sparkmagic.utils.configuration as conf 8 | 9 | 10 | class SessionManager(object): 11 | def __init__(self, ipython_display): 12 | self.logger = SparkLog("SessionManager") 13 | self.ipython_display = ipython_display 14 | 15 | self._sessions = dict() 16 | 17 | self._register_cleanup_on_exit() 18 | 19 | @property 20 | def sessions(self): 21 | return self._sessions 22 | 23 | def get_sessions_list(self): 24 | return list(self._sessions.keys()) 25 | 26 | def get_sessions_info(self): 27 | return [ 28 | "Name: {}\t{}".format(k, str(self._sessions[k])) 29 | for k in list(self._sessions.keys()) 30 | ] 31 | 32 | def add_session(self, name, session): 33 | if name in self._sessions: 34 | raise SessionManagementException( 35 | "Session with name '{}' already exists. Please delete the session" 36 | " first if you intend to replace it.".format(name) 37 | ) 38 | 39 | self._sessions[name] = session 40 | 41 | def get_any_session(self): 42 | number_of_sessions = len(self._sessions) 43 | if number_of_sessions == 1: 44 | key = self.get_sessions_list()[0] 45 | return self._sessions[key] 46 | elif number_of_sessions == 0: 47 | raise SessionManagementException( 48 | "You need to have at least 1 client created to execute commands." 49 | ) 50 | else: 51 | raise SessionManagementException( 52 | "Please specify the client to use. Possible sessions are {}".format( 53 | self.get_sessions_list() 54 | ) 55 | ) 56 | 57 | def get_session(self, name): 58 | if name in self._sessions: 59 | return self._sessions[name] 60 | raise SessionManagementException( 61 | "Could not find '{}' session in list of saved sessions. Possible sessions are {}".format( 62 | name, self.get_sessions_list() 63 | ) 64 | ) 65 | 66 | def get_session_id_for_client(self, name): 67 | if name in self.get_sessions_list(): 68 | return self._sessions[name].id 69 | return None 70 | 71 | def get_session_name_by_id_endpoint(self, id, endpoint): 72 | for name, session in self._sessions.items(): 73 | if session.id == int(id) and session.endpoint == endpoint: 74 | return name 75 | return None 76 | 77 | def delete_client(self, name): 78 | self._remove_session(name) 79 | 80 | def clean_up_all(self): 81 | for name in self.get_sessions_list(): 82 | self._remove_session(name) 83 | 84 | def _remove_session(self, name): 85 | if name in self.get_sessions_list(): 86 | self._sessions[name].delete() 87 | del self._sessions[name] 88 | else: 89 | raise SessionManagementException( 90 | "Could not find '{}' session in list of saved sessions. Possible sessions are {}".format( 91 | name, self.get_sessions_list() 92 | ) 93 | ) 94 | 95 | def _register_cleanup_on_exit(self): 96 | """ 97 | Stop the livy sessions before python process exits for any reason (if enabled in conf) 98 | """ 99 | if conf.cleanup_all_sessions_on_exit(): 100 | 101 | def cleanup_spark_sessions(): 102 | try: 103 | self.clean_up_all() 104 | except Exception as e: 105 | self.logger.error( 106 | "Error cleaning up sessions on exit: {}".format(e) 107 | ) 108 | pass 109 | 110 | atexit.register(cleanup_spark_sessions) 111 | self.ipython_display.writeln("Cleaning up livy sessions on exit is enabled") 112 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/sendpandasdftosparkcommand.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Jupyter Development Team. 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from sparkmagic.livyclientlib.sendtosparkcommand import SendToSparkCommand 5 | from sparkmagic.livyclientlib.command import Command 6 | from sparkmagic.livyclientlib.exceptions import BadUserDataException 7 | 8 | import sparkmagic.utils.configuration as conf 9 | 10 | import pandas as pd 11 | 12 | 13 | class SendPandasDfToSparkCommand(SendToSparkCommand): 14 | # convert unicode to utf8 or pyspark will mark data as corrupted(and deserialize incorrectly) 15 | _python_decode = """ 16 | import sys 17 | import json 18 | 19 | if sys.version_info.major == 2: 20 | def json_loads_byteified(json_text): 21 | return _byteify( 22 | json.loads(json_text, object_hook=_byteify), 23 | ignore_dicts=True 24 | ) 25 | else: 26 | def json_loads_byteified(json_text): 27 | return json.loads(json_text) 28 | 29 | def _byteify(data, ignore_dicts = False): 30 | if isinstance(data, unicode): 31 | return data.encode('utf-8') 32 | if isinstance(data, list): 33 | return [ _byteify(item, ignore_dicts=True) for item in data ] 34 | if isinstance(data, dict) and not ignore_dicts: 35 | return { 36 | _byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True) 37 | for key, value in data.iteritems() 38 | } 39 | return data 40 | """ 41 | 42 | def __init__( 43 | self, input_variable_name, input_variable_value, output_variable_name, max_rows 44 | ): 45 | super(SendPandasDfToSparkCommand, self).__init__( 46 | input_variable_name, input_variable_value, output_variable_name 47 | ) 48 | self.max_rows = max_rows 49 | 50 | def _scala_command(self, input_variable_name, pandas_df, output_variable_name): 51 | self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df) 52 | pandas_json = self._get_dataframe_as_json(pandas_df) 53 | 54 | scala_code = ''' 55 | val rdd_json_array = spark.sparkContext.makeRDD("""{}""" :: Nil) 56 | val {} = spark.read.json(rdd_json_array)'''.format( 57 | pandas_json, output_variable_name 58 | ) 59 | 60 | return Command(scala_code) 61 | 62 | def _pyspark_command(self, input_variable_name, pandas_df, output_variable_name): 63 | self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df) 64 | 65 | pyspark_code = self._python_decode 66 | 67 | pandas_json = self._get_dataframe_as_json(pandas_df) 68 | 69 | pyspark_code += """ 70 | json_array = json_loads_byteified('{}') 71 | rdd_json_array = spark.sparkContext.parallelize(json_array) 72 | {} = spark.read.json(rdd_json_array)""".format( 73 | pandas_json, output_variable_name 74 | ) 75 | 76 | return Command(pyspark_code) 77 | 78 | def _r_command(self, input_variable_name, pandas_df, output_variable_name): 79 | self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df) 80 | pandas_json = self._get_dataframe_as_json(pandas_df) 81 | 82 | r_code = """ 83 | fileConn<-file("temporary_pandas_df_sparkmagics.txt") 84 | writeLines('{}', fileConn) 85 | close(fileConn) 86 | {} <- read.json("temporary_pandas_df_sparkmagics.txt") 87 | {}.persist() 88 | file.remove("temporary_pandas_df_sparkmagics.txt")""".format( 89 | pandas_json, output_variable_name, output_variable_name 90 | ) 91 | 92 | return Command(r_code) 93 | 94 | def _get_dataframe_as_json(self, pandas_df): 95 | return pandas_df.head(self.max_rows).to_json(orient="records") 96 | 97 | def _assert_input_is_pandas_dataframe( 98 | self, input_variable_name, input_variable_value 99 | ): 100 | if not isinstance(input_variable_value, pd.DataFrame): 101 | wrong_type = input_variable_value.__class__.__name__ 102 | raise BadUserDataException( 103 | "{} is not a Pandas DataFrame! Got {} instead.".format( 104 | input_variable_name, wrong_type 105 | ) 106 | ) 107 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/controllerwidget/magicscontrollerwidget.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | from sparkmagic.controllerwidget.abstractmenuwidget import AbstractMenuWidget 4 | from sparkmagic.controllerwidget.addendpointwidget import AddEndpointWidget 5 | from sparkmagic.controllerwidget.manageendpointwidget import ManageEndpointWidget 6 | from sparkmagic.controllerwidget.managesessionwidget import ManageSessionWidget 7 | from sparkmagic.controllerwidget.createsessionwidget import CreateSessionWidget 8 | from sparkmagic.livyclientlib.endpoint import Endpoint 9 | from sparkmagic.utils.constants import LANGS_SUPPORTED 10 | import sparkmagic.utils.configuration as conf 11 | from sparkmagic.utils.utils import Namespace, initialize_auth 12 | 13 | 14 | class MagicsControllerWidget(AbstractMenuWidget): 15 | def __init__( 16 | self, spark_controller, ipywidget_factory, ipython_display, endpoints=None 17 | ): 18 | super(MagicsControllerWidget, self).__init__( 19 | spark_controller, ipywidget_factory, ipython_display 20 | ) 21 | 22 | if endpoints is None: 23 | endpoints = { 24 | endpoint.url: endpoint for endpoint in self._get_default_endpoints() 25 | } 26 | self.endpoints = endpoints 27 | 28 | self._refresh() 29 | 30 | def run(self): 31 | pass 32 | 33 | @staticmethod 34 | def _get_default_endpoints(): 35 | default_endpoints = set() 36 | 37 | for kernel_type in LANGS_SUPPORTED: 38 | endpoint_config = getattr(conf, "kernel_%s_credentials" % kernel_type)() 39 | if ( 40 | all([p in endpoint_config for p in ["url", "password", "username"]]) 41 | and endpoint_config["url"] != "" 42 | ): 43 | user = endpoint_config["username"] 44 | passwd = endpoint_config["password"] 45 | args = Namespace( 46 | user=user, 47 | password=passwd, 48 | auth=endpoint_config.get("auth", None), 49 | url=endpoint_config.get("url", None), 50 | ) 51 | auth_instance = initialize_auth(args) 52 | 53 | default_endpoints.add( 54 | Endpoint( 55 | auth=auth_instance, 56 | url=endpoint_config["url"], 57 | implicitly_added=True, 58 | ) 59 | ) 60 | 61 | return default_endpoints 62 | 63 | def _refresh(self): 64 | dropdown_options = [(k, v) for k, v in self.endpoints.items()] 65 | self.endpoints_dropdown_widget = self.ipywidget_factory.get_dropdown( 66 | description="Endpoint:", options=dropdown_options 67 | ) 68 | 69 | self.manage_session = ManageSessionWidget( 70 | self.spark_controller, 71 | self.ipywidget_factory, 72 | self.ipython_display, 73 | self._refresh, 74 | ) 75 | self.create_session = CreateSessionWidget( 76 | self.spark_controller, 77 | self.ipywidget_factory, 78 | self.ipython_display, 79 | self.endpoints_dropdown_widget, 80 | self._refresh, 81 | ) 82 | self.add_endpoint = AddEndpointWidget( 83 | self.spark_controller, 84 | self.ipywidget_factory, 85 | self.ipython_display, 86 | self.endpoints, 87 | self.endpoints_dropdown_widget, 88 | self._refresh, 89 | ) 90 | self.manage_endpoint = ManageEndpointWidget( 91 | self.spark_controller, 92 | self.ipywidget_factory, 93 | self.ipython_display, 94 | self.endpoints, 95 | self._refresh, 96 | ) 97 | 98 | self.tabs = self.ipywidget_factory.get_tab( 99 | children=[ 100 | self.manage_session, 101 | self.create_session, 102 | self.add_endpoint, 103 | self.manage_endpoint, 104 | ] 105 | ) 106 | self.tabs.set_title(0, "Manage Sessions") 107 | self.tabs.set_title(1, "Create Session") 108 | self.tabs.set_title(2, "Add Endpoint") 109 | self.tabs.set_title(3, "Manage Endpoints") 110 | 111 | self.children = [self.tabs] 112 | 113 | for child in self.children: 114 | child.parent_widget = self 115 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from ..widget import utils as utils 4 | from ..widget.encoding import Encoding 5 | 6 | 7 | df = None 8 | encoding = None 9 | 10 | 11 | def setup_function(): 12 | global df, encoding 13 | 14 | records = [ 15 | { 16 | "buildingID": 0, 17 | "date": "6/1/13", 18 | "temp_diff": 12, 19 | "mystr": "alejandro", 20 | "mystr2": "1", 21 | }, 22 | { 23 | "buildingID": 1, 24 | "date": "6/1/13", 25 | "temp_diff": 0, 26 | "mystr": "alejandro", 27 | "mystr2": "1", 28 | }, 29 | { 30 | "buildingID": 2, 31 | "date": "6/1/14", 32 | "temp_diff": 11, 33 | "mystr": "alejandro", 34 | "mystr2": "1", 35 | }, 36 | { 37 | "buildingID": 0, 38 | "date": "6/1/15", 39 | "temp_diff": 5, 40 | "mystr": "alejandro", 41 | "mystr2": "1.0", 42 | }, 43 | { 44 | "buildingID": 1, 45 | "date": "6/1/16", 46 | "temp_diff": 19, 47 | "mystr": "alejandro", 48 | "mystr2": "1", 49 | }, 50 | { 51 | "buildingID": 2, 52 | "date": "6/1/17", 53 | "temp_diff": 32, 54 | "mystr": "alejandro", 55 | "mystr2": "1", 56 | }, 57 | ] 58 | df = pd.DataFrame(records) 59 | 60 | encoding = Encoding(chart_type="table", x="date", y="temp_diff") 61 | 62 | 63 | def teardown_function(): 64 | pass 65 | 66 | 67 | def test_on_render_viz(): 68 | df["date"] = pd.to_datetime(df["date"]) 69 | df["mystr2"] = pd.to_numeric(df["mystr2"]) 70 | 71 | assert utils.infer_vegalite_type(df["buildingID"]) == "Q" 72 | assert utils.infer_vegalite_type(df["date"]) == "T" 73 | assert utils.infer_vegalite_type(df["temp_diff"]) == "Q" 74 | assert utils.infer_vegalite_type(df["mystr"]) == "N" 75 | assert utils.infer_vegalite_type(df["mystr2"]) == "Q" 76 | 77 | 78 | def test_select_x(): 79 | assert utils.select_x(None) is None 80 | 81 | def _check(d, expected): 82 | x = utils.select_x(d) 83 | assert x == expected 84 | 85 | data = dict( 86 | col1=[1.0, 2.0, 3.0], # Q 87 | col2=["A", "B", "C"], # N 88 | col3=pd.date_range("2012", periods=3, freq="A"), 89 | ) # T 90 | _check(data, "col3") 91 | 92 | data = dict(col1=[1.0, 2.0, 3.0], col2=["A", "B", "C"]) # Q # N 93 | _check(data, "col2") 94 | 95 | data = dict(col1=[1.0, 2.0, 3.0]) # Q 96 | _check(data, "col1") 97 | 98 | # Custom order 99 | data = dict( 100 | col1=[1.0, 2.0, 3.0], # Q 101 | col2=["A", "B", "C"], # N 102 | col3=pd.date_range("2012", periods=3, freq="A"), # T 103 | col4=pd.date_range("2012", periods=3, freq="A"), 104 | ) # T 105 | selected_x = utils.select_x(data, ["N", "T", "Q", "O"]) 106 | assert selected_x == "col2" 107 | 108 | # Len < 1 109 | assert utils.select_x(dict()) is None 110 | 111 | 112 | def test_select_y(): 113 | def _check(d, expected): 114 | x = "col1" 115 | y = utils.select_y(d, x) 116 | assert y == expected 117 | 118 | data = dict( 119 | col1=[1.0, 2.0, 3.0], # Chosen X 120 | col2=["A", "B", "C"], # N 121 | col3=pd.date_range("2012", periods=3, freq="A"), # T 122 | col4=pd.date_range("2012", periods=3, freq="A"), # T 123 | col5=[1.0, 2.0, 3.0], 124 | ) # Q 125 | _check(data, "col5") 126 | 127 | data = dict( 128 | col1=[1.0, 2.0, 3.0], # Chosen X 129 | col2=["A", "B", "C"], # N 130 | col3=pd.date_range("2012", periods=3, freq="A"), 131 | ) # T 132 | _check(data, "col2") 133 | 134 | data = dict( 135 | col1=[1.0, 2.0, 3.0], # Chosen X 136 | col2=pd.date_range("2012", periods=3, freq="A"), 137 | ) # T 138 | _check(data, "col2") 139 | 140 | # No data 141 | assert utils.select_y(None, "something") is None 142 | 143 | # Len < 2 144 | assert utils.select_y(dict(col1=[1.0, 2.0, 3.0]), "something") is None 145 | 146 | # No x 147 | assert utils.select_y(df, None) is None 148 | 149 | # Custom order 150 | data = dict( 151 | col1=[1.0, 2.0, 3.0], # Chosen X 152 | col2=["A", "B", "C"], # N 153 | col3=pd.date_range("2012", periods=3, freq="A"), # T 154 | col4=pd.date_range("2012", periods=3, freq="A"), # T 155 | col5=[1.0, 2.0, 3.0], # Q 156 | col6=[1.0, 2.0, 3.0], 157 | ) # Q 158 | selected_x = "col1" 159 | selected_y = utils.select_y(data, selected_x, ["N", "T", "Q", "O"]) 160 | assert selected_y == "col2" 161 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_livyreliablehttpclient.py: -------------------------------------------------------------------------------- 1 | from mock import MagicMock 2 | 3 | from sparkmagic.livyclientlib.livyreliablehttpclient import LivyReliableHttpClient 4 | from sparkmagic.livyclientlib.endpoint import Endpoint 5 | import sparkmagic.utils.configuration as conf 6 | import sparkmagic.utils.constants as constants 7 | from sparkmagic.livyclientlib.exceptions import BadUserConfigurationException 8 | from sparkmagic.livyclientlib.configurableretrypolicy import ConfigurableRetryPolicy 9 | from sparkmagic.livyclientlib.linearretrypolicy import LinearRetryPolicy 10 | 11 | 12 | def test_post_statement(): 13 | http_client = MagicMock() 14 | livy_client = LivyReliableHttpClient(http_client, None) 15 | data = {"adlfj": "sadflkjsdf"} 16 | out = livy_client.post_statement(100, data) 17 | assert out == http_client.post.return_value.json.return_value 18 | http_client.post.assert_called_once_with("/sessions/100/statements", [201], data) 19 | 20 | 21 | def test_get_statement(): 22 | http_client = MagicMock() 23 | livy_client = LivyReliableHttpClient(http_client, None) 24 | out = livy_client.get_statement(100, 4) 25 | assert out == http_client.get.return_value.json.return_value 26 | http_client.get.assert_called_once_with("/sessions/100/statements/4", [200]) 27 | 28 | 29 | def test_cancel_statement(): 30 | http_client = MagicMock() 31 | livy_client = LivyReliableHttpClient(http_client, None) 32 | out = livy_client.cancel_statement(100, 104) 33 | assert out == http_client.post.return_value.json.return_value 34 | http_client.post.assert_called_once_with( 35 | "/sessions/100/statements/104/cancel", [200], {} 36 | ) 37 | 38 | 39 | def test_get_sessions(): 40 | http_client = MagicMock() 41 | livy_client = LivyReliableHttpClient(http_client, None) 42 | out = livy_client.get_sessions() 43 | assert out == http_client.get.return_value.json.return_value 44 | http_client.get.assert_called_once_with("/sessions", [200]) 45 | 46 | 47 | def test_post_session(): 48 | http_client = MagicMock() 49 | livy_client = LivyReliableHttpClient(http_client, None) 50 | properties = {"adlfj": "sadflkjsdf", 1: [2, 3, 4, 5]} 51 | out = livy_client.post_session(properties) 52 | assert out == http_client.post.return_value.json.return_value 53 | http_client.post.assert_called_once_with("/sessions", [201], properties) 54 | 55 | 56 | def test_get_session(): 57 | http_client = MagicMock() 58 | livy_client = LivyReliableHttpClient(http_client, None) 59 | out = livy_client.get_session(4) 60 | assert out == http_client.get.return_value.json.return_value 61 | http_client.get.assert_called_once_with("/sessions/4", [200]) 62 | 63 | 64 | def test_delete_session(): 65 | http_client = MagicMock() 66 | livy_client = LivyReliableHttpClient(http_client, None) 67 | livy_client.delete_session(99) 68 | http_client.delete.assert_called_once_with("/sessions/99", [200, 404]) 69 | 70 | 71 | def test_get_all_session_logs(): 72 | http_client = MagicMock() 73 | livy_client = LivyReliableHttpClient(http_client, None) 74 | out = livy_client.get_all_session_logs(42) 75 | assert out == http_client.get.return_value.json.return_value 76 | http_client.get.assert_called_once_with("/sessions/42/log?from=0", [200]) 77 | 78 | 79 | def test_custom_headers(): 80 | custom_headers = {"header1": "value1"} 81 | overrides = {conf.custom_headers.__name__: custom_headers} 82 | conf.override_all(overrides) 83 | endpoint = Endpoint("http://url.com", None) 84 | client = LivyReliableHttpClient.from_endpoint(endpoint) 85 | headers = client.get_headers() 86 | assert len(headers) == 2 87 | assert ("Content-Type" in headers) == True 88 | assert ("header1" in headers) == True 89 | 90 | 91 | def test_retry_policy(): 92 | # Default is configurable retry 93 | times = conf.retry_seconds_to_sleep_list() 94 | max_retries = conf.configurable_retry_policy_max_retries() 95 | policy = LivyReliableHttpClient._get_retry_policy() 96 | assert type(policy) is ConfigurableRetryPolicy 97 | assert times == policy.retry_seconds_to_sleep_list 98 | assert max_retries == policy.max_retries 99 | 100 | # Configure to linear retry 101 | _override_policy(constants.LINEAR_RETRY) 102 | policy = LivyReliableHttpClient._get_retry_policy() 103 | assert type(policy) is LinearRetryPolicy 104 | assert 5 == policy.seconds_to_sleep(1) 105 | assert 5 == policy.max_retries 106 | 107 | # Configure to something invalid 108 | _override_policy("garbage") 109 | try: 110 | policy = LivyReliableHttpClient._get_retry_policy() 111 | assert False 112 | except BadUserConfigurationException: 113 | assert True 114 | 115 | 116 | def _override_policy(policy): 117 | overrides = {conf.retry_policy.__name__: policy} 118 | conf.override_all(overrides) 119 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/utils/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | import os 5 | 6 | HOME_PATH = os.environ.get("SPARKMAGIC_CONF_DIR", "~/.sparkmagic") 7 | CONFIG_FILE = os.environ.get("SPARKMAGIC_CONF_FILE", "config.json") 8 | 9 | SESSION_KIND_SPARK = "spark" 10 | SESSION_KIND_PYSPARK = "pyspark" 11 | SESSION_KIND_SPARKR = "sparkr" 12 | SESSION_KINDS_SUPPORTED = [ 13 | SESSION_KIND_SPARK, 14 | SESSION_KIND_PYSPARK, 15 | SESSION_KIND_SPARKR, 16 | ] 17 | 18 | LIBRARY_LOADED_EVENT = "notebookLoaded" 19 | CLUSTER_CHANGE_EVENT = "notebookClusterChange" 20 | SESSION_CREATION_START_EVENT = "notebookSessionCreationStart" 21 | SESSION_CREATION_END_EVENT = "notebookSessionCreationEnd" 22 | SESSION_DELETION_START_EVENT = "notebookSessionDeletionStart" 23 | SESSION_DELETION_END_EVENT = "notebookSessionDeletionEnd" 24 | STATEMENT_EXECUTION_START_EVENT = "notebookStatementExecutionStart" 25 | STATEMENT_EXECUTION_END_EVENT = "notebookStatementExecutionEnd" 26 | SQL_EXECUTION_START_EVENT = "notebookSqlExecutionStart" 27 | SQL_EXECUTION_END_EVENT = "notebookSqlExecutionEnd" 28 | MAGIC_EXECUTION_START_EVENT = "notebookMagicExecutionStart" 29 | MAGIC_EXECUTION_END_EVENT = "notebookMagicExecutionEnd" 30 | 31 | CLUSTER_DNS_NAME = "ClusterDnsName" 32 | SESSION_ID = "SessionId" 33 | SESSION_GUID = "SessionGuid" 34 | STATEMENT_ID = "StatementId" 35 | STATEMENT_GUID = "StatementGuid" 36 | SQL_GUID = "SqlGuid" 37 | MAGIC_NAME = "MagicName" 38 | MAGIC_GUID = "MagicGuid" 39 | LIVY_KIND = "LivyKind" 40 | STATUS = "Status" 41 | SUCCESS = "Success" 42 | EXCEPTION_TYPE = "ExceptionType" 43 | EXCEPTION_MESSAGE = "ExceptionMessage" 44 | SAMPLE_METHOD = "SampleMethod" 45 | MAX_ROWS = "MaxRows" 46 | SAMPLE_FRACTION = "SampleFraction" 47 | ERROR_MESSAGE = "ErrorMessage" 48 | STATUS_CODE = "StatusCode" 49 | 50 | CONTEXT_NAME_SPARK = "spark" 51 | CONTEXT_NAME_SQL = "sql" 52 | 53 | LANG_SCALA = "scala" 54 | LANG_PYTHON = "python" 55 | LANG_R = "r" 56 | LANGS_SUPPORTED = [LANG_SCALA, LANG_PYTHON, LANG_R] 57 | 58 | LONG_RANDOM_VARIABLE_NAME = "yQeKOYBsFgLWWGWZJu3y" 59 | 60 | WIDGET_WIDTH = "800px" 61 | 62 | MAGICS_LOGGER_NAME = "magicsLogger" 63 | 64 | # The list here https://livy.incubator.apache.org/docs/latest/rest-api.html 65 | # appears incomplete; full list is in 66 | # https://github.com/apache/incubator-livy/blob/master/core/src/main/scala/org/apache/livy/sessions/SessionState.scala: 67 | IDLE_SESSION_STATUS = "idle" 68 | ERROR_SESSION_STATUS = "error" 69 | DEAD_SESSION_STATUS = "dead" 70 | NOT_STARTED_SESSION_STATUS = "not_started" 71 | STARTING_SESSION_STATUS = "starting" 72 | BUSY_SESSION_STATUS = "busy" 73 | SUCCESS_SESSION_STATUS = "success" 74 | SHUT_DOWN_SESSION_STATUS = "shutting_down" 75 | RUNNING_SESSION_STATUS = "running" 76 | KILLED_SESSION_STATUS = "killed" 77 | RECOVERING_SESSION_STATUS = "recovering" 78 | 79 | POSSIBLE_SESSION_STATUS = [ 80 | NOT_STARTED_SESSION_STATUS, 81 | IDLE_SESSION_STATUS, 82 | STARTING_SESSION_STATUS, 83 | BUSY_SESSION_STATUS, 84 | ERROR_SESSION_STATUS, 85 | DEAD_SESSION_STATUS, 86 | SUCCESS_SESSION_STATUS, 87 | SHUT_DOWN_SESSION_STATUS, 88 | RUNNING_SESSION_STATUS, 89 | KILLED_SESSION_STATUS, 90 | RECOVERING_SESSION_STATUS, 91 | ] 92 | FINAL_STATUS = [ 93 | DEAD_SESSION_STATUS, 94 | ERROR_SESSION_STATUS, 95 | SUCCESS_SESSION_STATUS, 96 | KILLED_SESSION_STATUS, 97 | ] 98 | 99 | ERROR_STATEMENT_STATUS = "error" 100 | CANCELLED_STATEMENT_STATUS = "cancelled" 101 | AVAILABLE_STATEMENT_STATUS = "available" 102 | FINAL_STATEMENT_STATUS = [ 103 | ERROR_STATEMENT_STATUS, 104 | CANCELLED_STATEMENT_STATUS, 105 | AVAILABLE_STATEMENT_STATUS, 106 | ] 107 | 108 | DELETE_SESSION_ACTION = "delete" 109 | START_SESSION_ACTION = "start" 110 | DO_NOTHING_ACTION = "nothing" 111 | 112 | INTERNAL_ERROR_MSG = ( 113 | "An internal error was encountered.\n" 114 | "Please file an issue at https://github.com/jupyter-incubator/sparkmagic\nError:\n{}" 115 | ) 116 | EXPECTED_ERROR_MSG = "An error was encountered:\n{}" 117 | 118 | YARN_RESOURCE_LIMIT_MSG = "Queue's AM resource limit exceeded." 119 | RESOURCE_LIMIT_WARNING = ( 120 | "Warning: The Spark session does not have enough YARN resources to start. {}" 121 | ) 122 | COMMAND_INTERRUPTED_MSG = "Interrupted by user" 123 | COMMAND_CANCELLATION_FAILED_MSG = ( 124 | "Interrupted by user but Livy failed to cancel the Spark statement. " 125 | "The Livy session might have become unusable." 126 | ) 127 | 128 | LIVY_HEARTBEAT_TIMEOUT_PARAM = "heartbeatTimeoutInSecond" 129 | LIVY_KIND_PARAM = "kind" 130 | 131 | NO_AUTH = "None" 132 | AUTH_KERBEROS = "Kerberos" 133 | AUTH_BASIC = "Basic_Access" 134 | 135 | CONFIGURABLE_RETRY = "configurable" 136 | LINEAR_RETRY = "linear" 137 | 138 | MIMETYPE_IMAGE_PNG = "image/png" 139 | MIMETYPE_TEXT_HTML = "text/html" 140 | MIMETYPE_TEXT_PLAIN = "text/plain" 141 | MIMETYPE_APPLICATION_JSON = "application/json" 142 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_pd_data_coerce.py: -------------------------------------------------------------------------------- 1 | from pandas.testing import assert_frame_equal 2 | import pandas as pd 3 | 4 | from sparkmagic.utils.utils import coerce_pandas_df_to_numeric_datetime 5 | 6 | 7 | def test_no_coercing(): 8 | records = [ 9 | {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"}, 10 | {"buildingID": 1, "date": "random", "temp_diff": "0adsf"}, 11 | ] 12 | desired_df = pd.DataFrame(records) 13 | 14 | df = pd.DataFrame(records) 15 | coerce_pandas_df_to_numeric_datetime(df) 16 | 17 | assert_frame_equal(desired_df, df) 18 | 19 | 20 | def test_date_coercing(): 21 | records = [ 22 | {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"}, 23 | {"buildingID": 1, "date": "6/1/13", "temp_diff": "0adsf"}, 24 | ] 25 | desired_df = pd.DataFrame(records) 26 | desired_df["date"] = pd.to_datetime(desired_df["date"]) 27 | 28 | df = pd.DataFrame(records) 29 | coerce_pandas_df_to_numeric_datetime(df) 30 | 31 | assert_frame_equal(desired_df, df) 32 | 33 | 34 | def test_date_coercing_none_values(): 35 | records = [ 36 | {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"}, 37 | {"buildingID": 1, "date": None, "temp_diff": "0adsf"}, 38 | ] 39 | desired_df = pd.DataFrame(records) 40 | desired_df["date"] = pd.to_datetime(desired_df["date"]) 41 | 42 | df = pd.DataFrame(records) 43 | coerce_pandas_df_to_numeric_datetime(df) 44 | 45 | assert_frame_equal(desired_df, df) 46 | 47 | 48 | def test_date_none_values_and_no_coercing(): 49 | records = [ 50 | {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"}, 51 | {"buildingID": 1, "date": None, "temp_diff": "0adsf"}, 52 | {"buildingID": 1, "date": "adsf", "temp_diff": "0adsf"}, 53 | ] 54 | desired_df = pd.DataFrame(records) 55 | 56 | df = pd.DataFrame(records) 57 | coerce_pandas_df_to_numeric_datetime(df) 58 | 59 | assert_frame_equal(desired_df, df) 60 | 61 | 62 | def test_numeric_coercing(): 63 | records = [ 64 | {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"}, 65 | {"buildingID": 1, "date": "adsf", "temp_diff": "0"}, 66 | ] 67 | desired_df = pd.DataFrame(records) 68 | desired_df["temp_diff"] = pd.to_numeric(desired_df["temp_diff"]) 69 | 70 | df = pd.DataFrame(records) 71 | coerce_pandas_df_to_numeric_datetime(df) 72 | 73 | assert_frame_equal(desired_df, df) 74 | 75 | 76 | def test_numeric_coercing_none_values(): 77 | records = [ 78 | {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"}, 79 | {"buildingID": 1, "date": "asdf", "temp_diff": None}, 80 | ] 81 | desired_df = pd.DataFrame(records) 82 | desired_df["temp_diff"] = pd.to_numeric(desired_df["temp_diff"]) 83 | 84 | df = pd.DataFrame(records) 85 | coerce_pandas_df_to_numeric_datetime(df) 86 | 87 | assert_frame_equal(desired_df, df) 88 | 89 | 90 | def test_numeric_none_values_and_no_coercing(): 91 | records = [ 92 | {"buildingID": 0, "date": "6/1/13", "temp_diff": "12"}, 93 | {"buildingID": 1, "date": "asdf", "temp_diff": None}, 94 | {"buildingID": 1, "date": "adsf", "temp_diff": "0asdf"}, 95 | ] 96 | desired_df = pd.DataFrame(records) 97 | 98 | df = pd.DataFrame(records) 99 | coerce_pandas_df_to_numeric_datetime(df) 100 | 101 | assert_frame_equal(desired_df, df) 102 | 103 | 104 | def test_df_dict_does_not_throw(): 105 | json_str = """ 106 | [{ 107 | "id": 580320, 108 | "name": "COUSIN'S GRILL", 109 | "results": "Fail", 110 | "violations": "37. TOILET area.", 111 | "words": ["37.", 112 | "toilet", 113 | "area."], 114 | "features": { 115 | "type": 0, 116 | "size": 262144, 117 | "indices": [0, 118 | 45, 119 | 97], 120 | "values": [7.0, 121 | 5.0, 122 | 1.0] 123 | }, 124 | "rawPrediction": { 125 | "type": 1, 126 | "values": [3.640841752791392, 127 | -3.640841752791392] 128 | }, 129 | "probability": { 130 | "type": 1, 131 | "values": [0.974440185187647, 132 | 0.025559814812352966] 133 | }, 134 | "prediction": 0.0 135 | }] 136 | """ 137 | df = pd.read_json(json_str) 138 | coerce_pandas_df_to_numeric_datetime(df) 139 | 140 | 141 | def test_overflow_coercing(): 142 | records = [{"_c0": "12345678901"}] 143 | desired_df = pd.DataFrame(records) 144 | desired_df["_c0"] = pd.to_numeric(desired_df["_c0"]) 145 | df = pd.DataFrame(records) 146 | coerce_pandas_df_to_numeric_datetime(df) 147 | assert_frame_equal(desired_df, df) 148 | 149 | 150 | def test_all_null_columns(): 151 | records = [{"_c0": "12345", "nulla": None}, {"_c0": "12345", "nulla": None}] 152 | desired_df = pd.DataFrame(records) 153 | desired_df["_c0"] = pd.to_numeric(desired_df["_c0"]) 154 | df = pd.DataFrame(records) 155 | coerce_pandas_df_to_numeric_datetime(df) 156 | assert_frame_equal(desired_df, df) 157 | -------------------------------------------------------------------------------- /autovizwidget/autovizwidget/tests/test_encodingwidget.py: -------------------------------------------------------------------------------- 1 | from mock import MagicMock, call 2 | from ipywidgets import Widget 3 | import pandas as pd 4 | 5 | from ..widget.encodingwidget import EncodingWidget 6 | from ..widget.encoding import Encoding 7 | 8 | 9 | df = None 10 | encoding = None 11 | ipywidget_factory = None 12 | change_hook = None 13 | 14 | 15 | def setup_function(): 16 | global df, encoding, ipywidget_factory, change_hook 17 | 18 | records = [ 19 | {"buildingID": 0, "date": "6/1/13", "temp_diff": 12, "\u263A": True}, 20 | {"buildingID": 1, "date": "6/1/13", "temp_diff": 0, "\u263A": True}, 21 | {"buildingID": 2, "date": "6/1/14", "temp_diff": 11, "\u263A": True}, 22 | {"buildingID": 0, "date": "6/1/15", "temp_diff": 5, "\u263A": True}, 23 | {"buildingID": 1, "date": "6/1/16", "temp_diff": 19, "\u263A": True}, 24 | {"buildingID": 2, "date": "6/1/17", "temp_diff": 32, "\u263A": True}, 25 | ] 26 | df = pd.DataFrame(records) 27 | 28 | encoding = Encoding(chart_type="table", x="date", y="temp_diff") 29 | 30 | ipywidget_factory = MagicMock() 31 | ipywidget_factory.get_vbox.return_value = MagicMock(spec=Widget) 32 | 33 | change_hook = MagicMock() 34 | 35 | 36 | def teardown_function(): 37 | pass 38 | 39 | 40 | def test_encoding_with_all_none_doesnt_throw(): 41 | records = [ 42 | {"buildingID": 0, "date": "6/1/13", "temp_diff": 12}, 43 | {"buildingID": 1, "date": "6/1/13", "temp_diff": 0}, 44 | {"buildingID": 2, "date": "6/1/14", "temp_diff": 11}, 45 | {"buildingID": 0, "date": "6/1/15", "temp_diff": 5}, 46 | {"buildingID": 1, "date": "6/1/16", "temp_diff": 19}, 47 | {"buildingID": 2, "date": "6/1/17", "temp_diff": 32}, 48 | ] 49 | df = pd.DataFrame(records) 50 | 51 | encoding = Encoding() 52 | 53 | ipywidget_factory = MagicMock() 54 | ipywidget_factory.get_vbox.return_value = MagicMock(spec=Widget) 55 | 56 | EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True) 57 | 58 | assert ( 59 | call( 60 | description="X", 61 | value=None, 62 | options=[ 63 | ("-", None), 64 | ("buildingID", "buildingID"), 65 | ("date", "date"), 66 | ("temp_diff", "temp_diff"), 67 | ], 68 | ) 69 | in ipywidget_factory.get_dropdown.mock_calls 70 | ) 71 | assert ( 72 | call( 73 | description="Y", 74 | value=None, 75 | options=[ 76 | ("-", None), 77 | ("buildingID", "buildingID"), 78 | ("date", "date"), 79 | ("temp_diff", "temp_diff"), 80 | ], 81 | ) 82 | in ipywidget_factory.get_dropdown.mock_calls 83 | ) 84 | assert ( 85 | call( 86 | description="Func.", 87 | value="none", 88 | options=[ 89 | ("-", "None"), 90 | ("Avg", "Avg"), 91 | ("Min", "Min"), 92 | ("Max", "Max"), 93 | ("Sum", "Sum"), 94 | ("Count", "Count"), 95 | ], 96 | ) 97 | in ipywidget_factory.get_dropdown.mock_calls 98 | ) 99 | 100 | 101 | def test_value_for_aggregation(): 102 | widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True) 103 | 104 | assert widget._get_value_for_aggregation(None) == "none" 105 | assert widget._get_value_for_aggregation("avg") == "avg" 106 | 107 | 108 | def test_x_changed_callback(): 109 | widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True) 110 | 111 | widget._x_changed_callback("name", "old", "new") 112 | 113 | assert encoding.x == "new" 114 | assert change_hook.call_count == 1 115 | 116 | 117 | def test_y_changed_callback(): 118 | widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True) 119 | 120 | widget._y_changed_callback("name", "old", "new") 121 | 122 | assert encoding.y == "new" 123 | assert change_hook.call_count == 1 124 | 125 | 126 | def test_y_agg__changed_callback(): 127 | widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True) 128 | 129 | widget._y_agg_changed_callback("name", "old", "new") 130 | 131 | assert encoding.y_aggregation == "new" 132 | assert change_hook.call_count == 1 133 | 134 | 135 | def test_log_x_changed_callback(): 136 | widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True) 137 | 138 | widget._logarithmic_x_callback("name", "old", "new") 139 | 140 | assert encoding.logarithmic_x_axis == "new" 141 | assert change_hook.call_count == 1 142 | 143 | 144 | def test_log_y_changed_callback(): 145 | widget = EncodingWidget(df, encoding, change_hook, ipywidget_factory, testing=True) 146 | 147 | widget._logarithmic_y_callback("name", "old", "new") 148 | 149 | assert encoding.logarithmic_y_axis == "new" 150 | assert change_hook.call_count == 1 151 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_sendstringtosparkcommand.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import pytest 3 | from mock import MagicMock 4 | 5 | from sparkmagic.livyclientlib.sendstringtosparkcommand import SendStringToSparkCommand 6 | from sparkmagic.livyclientlib.exceptions import BadUserDataException 7 | from sparkmagic.livyclientlib.command import Command 8 | import sparkmagic.utils.constants as constants 9 | 10 | 11 | def test_send_to_scala(): 12 | input_variable_name = "input" 13 | input_variable_value = "value" 14 | output_variable_name = "output" 15 | sparkcommand = SendStringToSparkCommand( 16 | input_variable_name, input_variable_value, output_variable_name 17 | ) 18 | sparkcommand._scala_command = MagicMock(return_value=MagicMock()) 19 | sparkcommand.to_command( 20 | constants.SESSION_KIND_SPARK, 21 | input_variable_name, 22 | input_variable_value, 23 | output_variable_name, 24 | ) 25 | sparkcommand._scala_command.assert_called_with( 26 | input_variable_name, input_variable_value, output_variable_name 27 | ) 28 | 29 | 30 | def test_send_to_r(): 31 | input_variable_name = "input" 32 | input_variable_value = "value" 33 | output_variable_name = "output" 34 | sparkcommand = SendStringToSparkCommand( 35 | input_variable_name, input_variable_value, output_variable_name 36 | ) 37 | sparkcommand._r_command = MagicMock(return_value=MagicMock()) 38 | sparkcommand.to_command( 39 | constants.SESSION_KIND_SPARKR, 40 | input_variable_name, 41 | input_variable_value, 42 | output_variable_name, 43 | ) 44 | sparkcommand._r_command.assert_called_with( 45 | input_variable_name, input_variable_value, output_variable_name 46 | ) 47 | 48 | 49 | def test_send_to_pyspark(): 50 | input_variable_name = "input" 51 | input_variable_value = "value" 52 | output_variable_name = "output" 53 | sparkcommand = SendStringToSparkCommand( 54 | input_variable_name, input_variable_value, output_variable_name 55 | ) 56 | sparkcommand._pyspark_command = MagicMock(return_value=MagicMock()) 57 | sparkcommand.to_command( 58 | constants.SESSION_KIND_PYSPARK, 59 | input_variable_name, 60 | input_variable_value, 61 | output_variable_name, 62 | ) 63 | sparkcommand._pyspark_command.assert_called_with( 64 | input_variable_name, input_variable_value, output_variable_name 65 | ) 66 | 67 | 68 | def test_to_command_invalid(): 69 | input_variable_name = "input" 70 | input_variable_value = 42 71 | output_variable_name = "output" 72 | sparkcommand = SendStringToSparkCommand( 73 | input_variable_name, input_variable_value, output_variable_name 74 | ) 75 | with pytest.raises( 76 | BadUserDataException, 77 | ): 78 | sparkcommand.to_command( 79 | "invalid", 80 | input_variable_name, 81 | input_variable_value, 82 | output_variable_name, 83 | ) 84 | 85 | 86 | def test_should_raise_when_input_aint_a_string(): 87 | input_variable_name = "input" 88 | input_variable_value = 42 89 | output_variable_name = "output" 90 | sparkcommand = SendStringToSparkCommand( 91 | input_variable_name, input_variable_value, output_variable_name 92 | ) 93 | with pytest.raises( 94 | BadUserDataException, 95 | ): 96 | sparkcommand.to_command( 97 | "spark", 98 | input_variable_name, 99 | input_variable_value, 100 | output_variable_name, 101 | ) 102 | 103 | 104 | def test_should_create_a_valid_scala_expression(): 105 | input_variable_name = "input" 106 | input_variable_value = "value" 107 | output_variable_name = "output" 108 | sparkcommand = SendStringToSparkCommand( 109 | input_variable_name, input_variable_value, output_variable_name 110 | ) 111 | assert sparkcommand._scala_command( 112 | input_variable_name, input_variable_value, output_variable_name 113 | ) == Command('var {} = """{}"""'.format(output_variable_name, input_variable_value)) 114 | 115 | 116 | def test_should_create_a_valid_python_expression(): 117 | input_variable_name = "input" 118 | input_variable_value = "value" 119 | output_variable_name = "output" 120 | sparkcommand = SendStringToSparkCommand( 121 | input_variable_name, input_variable_value, output_variable_name 122 | ) 123 | assert sparkcommand._pyspark_command( 124 | input_variable_name, input_variable_value, output_variable_name 125 | ) == Command("{} = {}".format(output_variable_name, repr(input_variable_value))) 126 | 127 | 128 | def test_should_create_a_valid_r_expression(): 129 | input_variable_name = "input" 130 | input_variable_value = "value" 131 | output_variable_name = "output" 132 | sparkcommand = SendStringToSparkCommand( 133 | input_variable_name, input_variable_value, output_variable_name 134 | ) 135 | assert sparkcommand._r_command( 136 | input_variable_name, input_variable_value, output_variable_name 137 | ) == Command( 138 | """assign("{}","{}")""".format(output_variable_name, input_variable_value) 139 | ) 140 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Distributed under the terms of the Modified BSD License. 2 | from IPython.core.error import UsageError 3 | from IPython.core.magic_arguments import parse_argstring 4 | import numpy as np 5 | import pandas as pd 6 | import json 7 | import importlib 8 | from collections import OrderedDict 9 | 10 | import sparkmagic.utils.configuration as conf 11 | import sparkmagic.utils.constants as constants 12 | from sparkmagic.livyclientlib.exceptions import ( 13 | BadUserDataException, 14 | DataFrameParseException, 15 | BadUserConfigurationException, 16 | ) 17 | 18 | 19 | def get_coerce_value(coerce): 20 | if coerce is not None: 21 | coerce = coerce.lower() in ("yes", "true", "t", "y", "1") 22 | return coerce 23 | 24 | 25 | def parse_argstring_or_throw(magic_func, argstring, parse_argstring=parse_argstring): 26 | """An alternative to the parse_argstring method from IPython.core.magic_arguments. 27 | Catches IPython.core.error.UsageError and propagates it as a 28 | livyclientlib.exceptions.BadUserDataException.""" 29 | try: 30 | return parse_argstring(magic_func, argstring) 31 | except UsageError as e: 32 | raise BadUserDataException(str(e)) 33 | 34 | 35 | def coerce_pandas_df_to_numeric_datetime(df): 36 | for column_name in df.columns: 37 | coerced = False 38 | 39 | if df[column_name].isnull().all(): 40 | continue 41 | 42 | if not coerced and df[column_name].dtype == np.dtype("object"): 43 | try: 44 | df[column_name] = pd.to_datetime(df[column_name], errors="raise") 45 | coerced = True 46 | except (ValueError, TypeError, OverflowError): 47 | pass 48 | 49 | if not coerced and df[column_name].dtype == np.dtype("object"): 50 | try: 51 | df[column_name] = pd.to_numeric(df[column_name], errors="raise") 52 | coerced = True 53 | except (ValueError, TypeError): 54 | pass 55 | 56 | 57 | def records_to_dataframe(records_text, kind, coerce=None): 58 | if records_text in ["", "[]"]: 59 | strings = [] 60 | else: 61 | strings = records_text.strip().split("\n") 62 | try: 63 | data_array = [ 64 | json.JSONDecoder(object_pairs_hook=OrderedDict).decode(s) for s in strings 65 | ] 66 | 67 | if kind == constants.SESSION_KIND_SPARKR and len(data_array) > 0: 68 | data_array = data_array[0] 69 | 70 | df = pd.DataFrame(data_array) 71 | 72 | if len(data_array) > 0: 73 | # This will assign the columns in the right order. If we simply did 74 | # df = pd.DataFrame(data_array, columns=data_array[0].keys()) 75 | # in the code defining df, above, we could get an issue where the first element 76 | # has some columns as null, and thus would drop the columns from the df altogether. 77 | # Refer to https://github.com/jupyter-incubator/sparkmagic/issues/346 for 78 | # more details. 79 | for data in data_array: 80 | if len(data.keys()) == len(df.columns): 81 | df = df[list(data.keys())] 82 | break 83 | 84 | if coerce is None: 85 | coerce = conf.coerce_dataframe() 86 | if coerce: 87 | coerce_pandas_df_to_numeric_datetime(df) 88 | 89 | return df 90 | except ValueError: 91 | raise DataFrameParseException( 92 | "Cannot parse object as JSON: '{}'".format(strings) 93 | ) 94 | 95 | 96 | def get_sessions_info_html(info_sessions, current_session_id): 97 | html = ( 98 | """ 99 | """ 100 | + "".join( 101 | [session.get_row_html(current_session_id) for session in info_sessions] 102 | ) 103 | + "
IDYARN Application IDKindStateSpark UIDriver logUserCurrent session?
" 104 | ) 105 | 106 | return html 107 | 108 | 109 | def initialize_auth(args): 110 | """Creates an authenticatior class instance for the given auth type 111 | 112 | Args: 113 | args (IPython.core.magics.namespace): The namespace object that is created from 114 | parsing %spark magic command 115 | 116 | Returns: 117 | An instance of a valid Authenticator or None if args.auth is 'None' 118 | 119 | Raises: 120 | sparkmagic.livyclientlib.BadUserConfigurationException: if args.auth is not a valid 121 | authenticator class. 122 | """ 123 | if args.auth is None: 124 | auth = conf.get_auth_value(args.user, args.password) 125 | else: 126 | auth = args.auth 127 | if auth == constants.NO_AUTH: 128 | return None 129 | else: 130 | full_class = conf.authenticators().get(auth) 131 | if full_class is None: 132 | raise BadUserConfigurationException("Auth '{}' not supported".format(auth)) 133 | module, class_name = (full_class).rsplit(".", 1) 134 | events_handler_module = importlib.import_module(module) 135 | auth_class = getattr(events_handler_module, class_name) 136 | return auth_class(args) 137 | 138 | 139 | class Namespace: 140 | """Namespace to initialize authenticator class with""" 141 | 142 | def __init__(self, **kwargs): 143 | self.__dict__.update(kwargs) 144 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/livyclientlib/reliablehttpclient.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 aggftw@gmail.com 2 | # Distributed under the terms of the Modified BSD License. 3 | import json 4 | from time import sleep 5 | import requests 6 | import sparkmagic.utils.configuration as conf 7 | from sparkmagic.utils.sparklogger import SparkLog 8 | from .exceptions import HttpClientException, HttpSessionAdapterConfigException 9 | import importlib 10 | 11 | 12 | class ReliableHttpClient(object): 13 | """Http client that is reliable in its requests. Uses requests library.""" 14 | 15 | def __init__(self, endpoint, headers, retry_policy): 16 | self._endpoint = endpoint 17 | self._headers = headers 18 | self._retry_policy = retry_policy 19 | self._auth = self._endpoint.auth 20 | self._session = requests.Session() 21 | self.logger = SparkLog("ReliableHttpClient") 22 | self.verify_ssl = not conf.ignore_ssl_errors() 23 | if not self.verify_ssl: 24 | self.logger.debug( 25 | "ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks." 26 | ) 27 | requests.packages.urllib3.disable_warnings() 28 | self._set_http_session_config() 29 | 30 | def _set_http_session_config(self): 31 | http_session_config = conf.http_session_config() 32 | if http_session_config and http_session_config.get("adapters"): 33 | self._set_http_session_adapters(http_session_config["adapters"]) 34 | 35 | def _set_http_session_adapters(self, adapters): 36 | for adapter in adapters: 37 | full_class = adapter.get("adapter") 38 | adapter_prefix = adapter.get("prefix") 39 | if full_class is None or adapter_prefix is None: 40 | raise HttpSessionAdapterConfigException( 41 | "Invalid http session adapter config, prefix: {} or class: {} " 42 | "not defined correctly".format(adapter_prefix, full_class) 43 | ) 44 | module, class_name = full_class.rsplit(".", 1) 45 | adapter_module = importlib.import_module(module) 46 | adapter_class = getattr(adapter_module, class_name) 47 | self._session.mount(adapter_prefix, adapter_class()) 48 | 49 | def get_headers(self): 50 | return self._headers 51 | 52 | def compose_url(self, relative_url): 53 | r_u = "/{}".format(relative_url.rstrip("/").lstrip("/")) 54 | return self._endpoint.url + r_u 55 | 56 | def get(self, relative_url, accepted_status_codes): 57 | """Sends a get request. Returns a response.""" 58 | return self._send_request( 59 | relative_url, accepted_status_codes, self._session.get 60 | ) 61 | 62 | def post(self, relative_url, accepted_status_codes, data): 63 | """Sends a post request. Returns a response.""" 64 | return self._send_request( 65 | relative_url, accepted_status_codes, self._session.post, data 66 | ) 67 | 68 | def delete(self, relative_url, accepted_status_codes): 69 | """Sends a delete request. Returns a response.""" 70 | return self._send_request( 71 | relative_url, accepted_status_codes, self._session.delete 72 | ) 73 | 74 | def _send_request(self, relative_url, accepted_status_codes, function, data=None): 75 | return self._send_request_helper( 76 | self.compose_url(relative_url), accepted_status_codes, function, data, 0 77 | ) 78 | 79 | def _send_request_helper( 80 | self, url, accepted_status_codes, function, data, retry_count 81 | ): 82 | while True: 83 | try: 84 | if data is None: 85 | r = function( 86 | url, 87 | headers=self._headers, 88 | auth=self._auth, 89 | verify=self.verify_ssl, 90 | ) 91 | else: 92 | r = function( 93 | url, 94 | headers=self._headers, 95 | auth=self._auth, 96 | data=json.dumps(data), 97 | verify=self.verify_ssl, 98 | ) 99 | except requests.exceptions.RequestException as e: 100 | error = True 101 | r = None 102 | status = None 103 | text = None 104 | 105 | self.logger.error("Request to '{}' failed with '{}'".format(url, e)) 106 | else: 107 | error = False 108 | status = r.status_code 109 | text = r.text 110 | 111 | if error or status not in accepted_status_codes: 112 | if self._retry_policy.should_retry(status, error, retry_count): 113 | sleep(self._retry_policy.seconds_to_sleep(retry_count)) 114 | retry_count += 1 115 | continue 116 | 117 | if error: 118 | raise HttpClientException( 119 | "Error sending http request and maximum retry encountered." 120 | ) 121 | else: 122 | raise HttpClientException( 123 | "Invalid status code '{}' from {} with error payload: {}".format( 124 | status, url, text 125 | ) 126 | ) 127 | return r 128 | -------------------------------------------------------------------------------- /sparkmagic/sparkmagic/tests/test_sessionmanager.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import pytest 3 | from mock import MagicMock, PropertyMock 4 | 5 | import sparkmagic.utils.configuration as conf 6 | from sparkmagic.livyclientlib.exceptions import SessionManagementException 7 | from sparkmagic.livyclientlib.sessionmanager import SessionManager 8 | 9 | 10 | def test_get_client_throws_when_client_not_exists(): 11 | with pytest.raises(SessionManagementException): 12 | manager = get_session_manager() 13 | manager.get_session("name") 14 | 15 | 16 | def test_get_client(): 17 | client = MagicMock() 18 | manager = get_session_manager() 19 | 20 | manager.add_session("name", client) 21 | 22 | assert client == manager.get_session("name") 23 | 24 | 25 | def test_delete_client(): 26 | with pytest.raises(SessionManagementException): 27 | client = MagicMock() 28 | manager = get_session_manager() 29 | 30 | manager.add_session("name", client) 31 | manager.delete_client("name") 32 | 33 | manager.get_session("name") 34 | 35 | 36 | def test_delete_client_throws_when_client_not_exists(): 37 | with pytest.raises(SessionManagementException): 38 | manager = get_session_manager() 39 | 40 | manager.delete_client("name") 41 | 42 | 43 | def test_add_client_throws_when_client_exists(): 44 | with pytest.raises(SessionManagementException): 45 | client = MagicMock() 46 | manager = get_session_manager() 47 | 48 | manager.add_session("name", client) 49 | manager.add_session("name", client) 50 | 51 | 52 | def test_client_names_returned(): 53 | client = MagicMock() 54 | manager = get_session_manager() 55 | 56 | manager.add_session("name0", client) 57 | manager.add_session("name1", client) 58 | 59 | assert {"name0", "name1"} == set(manager.get_sessions_list()) 60 | 61 | 62 | def test_get_any_client(): 63 | client = MagicMock() 64 | manager = get_session_manager() 65 | 66 | manager.add_session("name", client) 67 | 68 | assert client == manager.get_any_session() 69 | 70 | 71 | def test_get_any_client_raises_exception_with_no_client(): 72 | with pytest.raises(SessionManagementException): 73 | manager = get_session_manager() 74 | 75 | manager.get_any_session() 76 | 77 | 78 | def test_get_any_client_raises_exception_with_two_clients(): 79 | with pytest.raises(SessionManagementException): 80 | client = MagicMock() 81 | manager = get_session_manager() 82 | manager.add_session("name0", client) 83 | manager.add_session("name1", client) 84 | 85 | manager.get_any_session() 86 | 87 | 88 | def test_clean_up(): 89 | client0 = MagicMock() 90 | client1 = MagicMock() 91 | manager = get_session_manager() 92 | manager.add_session("name0", client0) 93 | manager.add_session("name1", client1) 94 | 95 | manager.clean_up_all() 96 | 97 | client0.delete.assert_called_once_with() 98 | client1.delete.assert_called_once_with() 99 | 100 | 101 | def test_cleanup_all_sessions_on_exit(): 102 | conf.override(conf.cleanup_all_sessions_on_exit.__name__, True) 103 | client0 = MagicMock() 104 | client1 = MagicMock() 105 | manager = get_session_manager() 106 | manager.add_session("name0", client0) 107 | manager.add_session("name1", client1) 108 | 109 | atexit._run_exitfuncs() 110 | 111 | client0.delete.assert_called_once_with() 112 | client1.delete.assert_called_once_with() 113 | manager.ipython_display.writeln.assert_called_once_with( 114 | "Cleaning up livy sessions on exit is enabled" 115 | ) 116 | 117 | 118 | def test_cleanup_all_sessions_on_exit_fails(): 119 | """Cleanup on exit is best effort only. 120 | 121 | When cleanup fails, exception is caught and error is logged. 122 | """ 123 | conf.override(conf.cleanup_all_sessions_on_exit.__name__, True) 124 | client0 = MagicMock() 125 | client1 = MagicMock() 126 | client0.delete.side_effect = Exception("Mocked exception for client1.delete") 127 | manager = get_session_manager() 128 | manager.add_session("name0", client0) 129 | manager.add_session("name1", client1) 130 | 131 | atexit._run_exitfuncs() 132 | 133 | client0.delete.assert_called_once_with() 134 | client1.delete.assert_not_called() 135 | 136 | 137 | def test_get_session_id_for_client(): 138 | manager = get_session_manager() 139 | manager.get_sessions_list = MagicMock(return_value=["name"]) 140 | manager._sessions["name"] = MagicMock() 141 | 142 | id = manager.get_session_id_for_client("name") 143 | 144 | assert id is not None 145 | 146 | 147 | def test_get_session_name_by_id_endpoint(): 148 | manager = get_session_manager() 149 | id_to_search = "0" 150 | endpoint_to_search = "endpoint" 151 | name_to_search = "name" 152 | 153 | name = manager.get_session_name_by_id_endpoint(id_to_search, endpoint_to_search) 154 | assert None == name 155 | 156 | session = MagicMock() 157 | type(session).id = PropertyMock(return_value=int(id_to_search)) 158 | session.endpoint = endpoint_to_search 159 | 160 | manager.add_session(name_to_search, session) 161 | name = manager.get_session_name_by_id_endpoint(id_to_search, endpoint_to_search) 162 | assert name_to_search == name 163 | 164 | 165 | def test_get_session_id_for_client_not_there(): 166 | manager = get_session_manager() 167 | manager.get_sessions_list = MagicMock(return_value=[]) 168 | 169 | id = manager.get_session_id_for_client("name") 170 | 171 | assert id is None 172 | 173 | 174 | def get_session_manager(): 175 | ipython_display = MagicMock() 176 | return SessionManager(ipython_display) 177 | --------------------------------------------------------------------------------