├── .github ├── ISSUE_TEMPLATE │ ├── Bug_report.md │ └── feature_request.md └── PULL_REQUEST_TEMPLATE.md ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── ROADMAP.md ├── build ├── ci.sh ├── env.sh ├── extension_init.sh └── functions.sh ├── conf └── sample_config.json ├── docs ├── CONTRIBUTING.md ├── getting-started │ └── build-ppextensions.md ├── index.md ├── ppextensions-github-integration │ └── github-integration.md ├── ppextensions-scheduler │ └── scheduler.md └── ppmagics │ ├── csv.md │ ├── hive.md │ ├── presto.md │ ├── publish.md │ ├── run.md │ ├── run_pipeline.md │ ├── sts.md │ └── teradata.md ├── github ├── __init__.py ├── github.py └── static │ ├── github.js │ ├── githubcommit.js │ └── githubmain.js ├── mkdocs.yml ├── ppextensions ├── __init__.py ├── ppmagics │ ├── __init__.py │ ├── parameters.py │ └── ppmagics.py ├── ppsql │ ├── __init__.py │ └── connection │ │ ├── __init__.py │ │ ├── basesql.py │ │ ├── csvconnection.py │ │ ├── hiveconnection.py │ │ ├── prestoconnection.py │ │ └── teradataconnection.py └── pputils │ ├── __init__.py │ ├── utils │ ├── __init__.py │ ├── configuration.py │ ├── constants.py │ ├── exceptions.py │ ├── filesystemreader.py │ ├── log.py │ ├── parameterargs.py │ ├── resultset.py │ ├── tableau.py │ ├── utils.py │ └── yarnapi.py │ └── widgets │ ├── __init__.py │ ├── messages.py │ ├── ppwidgets.py │ ├── widgets.py │ └── widgetsfactory.py ├── scheduler ├── __init__.py ├── scheduler.py ├── static │ ├── daginfo.html │ ├── editdag.html │ ├── scheduler.js │ └── schedulermain.js └── template │ ├── dag_template.py │ └── var_template.conf └── setup.py /.github/ISSUE_TEMPLATE/Bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | 9 | 10 | **To Reproduce** 11 | 12 | 13 | **Expected behavior** 14 | 15 | 16 | **Screenshots** 17 | 18 | 19 | **Potential fix** 20 | 9 | 10 | **What benefits do you foresee from the feature you are requesting?** 11 | 12 | 13 | **Potential solution/ideas?** 14 | 15 | 16 | **Additional context** 17 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Make sure you have checked all steps below. 2 | 3 | ### GitHub Issue 4 | Fixes # 5 | 6 | ### Checklist: 7 | 8 | - [ ] This pull request updates the documentation 9 | - [ ] This pull request changes library dependencies 10 | - [ ] Title of the PR is of format (example) : [#17] Add Issue Template 11 | 12 | 13 | 14 | 15 | 16 | ### What is the purpose of this pull request? 17 | 18 | 19 | 20 | ### How was this change validated? 21 | 22 | 23 | 24 | ### Commit Guidelines 25 | - [ ] My commits all reference GH issues in their subject lines, and I have squashed multiple commits if they address the same issue. In addition, my commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git-commit/)": 26 | 1. Subject is separated from body by a blank line 27 | 2. Subject is limited to 50 characters 28 | 3. Subject does not end with a period 29 | 4. Subject uses the imperative mood ("add", not "adding") 30 | 5. Body wraps at 72 characters 31 | 6. Body explains "what" and "why", not "how" 32 | 33 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - '3.5' 5 | 6 | branches: 7 | only: 8 | - master 9 | - develop 10 | 11 | script: 12 | - bash build/ci.sh 13 | 14 | 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | # Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment - free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | # Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting / derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | # Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | # Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e - mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | # Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at help-gimel-coc@paypal.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | # Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at[http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing to PPExtensions 3 | 4 | # Discussions 5 | 6 | Our recommendation is to start a slack discussion as soon as you have an idea for contributing to PPExtensions. 7 | This will help you, other interested contributors & the committers get to common grounds early in time. 8 | Contact the ppextensions community on[slack](https://ppextensions.slack.com​). 9 | 10 | -------------------------------------------------------------------------------------------------------------------- 11 | 12 | # How can you help 13 | 14 | # Code 15 | Look for Issues that are open on below categories, pick an issue, start a slack discussion on slack channel, once we get to common grounds on the solution approach, open a PR with your implementation. 16 | * Bug Fixes 17 | * Enhancements & Improvements(Jira Details) 18 | * Add a new connector or major feature that you see missing in PPExtensions 19 | 20 | # Documentation 21 | You can also improve our documentation 22 | * readme.md 23 | * docs / *.md 24 | * Any other valuable comment you'd like to add to the code that will simplify other developers' lives. 25 | 26 | -------------------------------------------------------------------------------------------------------------------- 27 | 28 | 29 | # Contribution Process 30 | 31 | * Get your github account. 32 | * Fork the ppextensions repo into your account. 33 | * Create an issue branch using the master branch. 34 | * Make modifications to the code. 35 | * Ensure code coverage by added test cases. 36 | * All commits must have the issue ID & summary. Say "[ISSUE-10] Update readme.md for Scheduler". 37 | * Ensure all your commits are squashed. 38 | * Make a Pull Request to dev branch. 39 | * If there are code review related changes - ensure those commits are also squashed. 40 | * DO NOT include changes that are not directly related to the issue you are trying to resolve. 41 | * Once PR is approved, code will be merged to Development branch. 42 | * Once all regression test cases have passed - changes will be merged to master branch. 43 | 44 | -------------------------------------------------------------------------------------------------------------------- 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3 - Clause License 2 | 3 | Copyright(c) 2018, PPExtensions Contributors 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and / or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES 27 | LOSS OF USE, DATA, OR PROFITS 28 | OR BUSINESS INTERRUPTION) HOWEVER 29 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 | OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/paypal/PPExtensions.svg?branch=master)](https://travis-ci.org/paypal/PPExtensions) 2 | [![Documentation Status](https://readthedocs.org/projects/ppextensions/badge/?version=latest)](http://ppextensions.readthedocs.io/en/latest/?badge=latest) 3 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/a159cd4835e34c8abb1d738ff996d065)](https://www.codacy.com/app/ppextensions-team/PPExtensions?utm_source=github.com&utm_medium=referral&utm_content=paypal/PPExtensions&utm_campaign=Badge_Grade) 4 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 5 | 6 | # PPExtensions 7 | 8 | PPExtensions is a suite of ipython and jupyter extensions built to improve user experience and reduce time to market in [Jupyter](http://jupyter.org) notebooks. 9 | 10 | 11 | # Features 12 | 13 | - **PPMagics** - Set of magics to simplify access to different storage systems and tableau. 14 | - **Github Integration** - A jupyter extension to integrate notebooks with github. This extension simplifies version controlling, sharing and resolving merge conflicts of notebooks. 15 | - **Notebooks Scheduling** - A jupyter extension to productionalize the notebooks development environment. This extension enables scheduling notebooks with help of [Apache Airflow](https://airflow.apache.org/). 16 | 17 | 18 | # Installation 19 | 20 | pip install ppextensions 21 | 22 | 23 | # Current State 24 | 25 | | Feature | Available | State | 26 | |---------------------- | ------------- | -------------| 27 | | PPMagics | Available | Beta | 28 | | Scheduling Notebooks | Available | Beta | 29 | | Github Integration | Available | Beta | 30 | 31 | -------------------------------------------------------------------------------------------------------------------- 32 | 33 | # Documentation & Getting Started 34 | 35 | * [Click here to read the docs](http://ppextensions.readthedocs.io/) 36 | 37 | # Questions 38 | 39 | * [Slack](https://ppextensions.slack.com) 40 | * [User Forum](https://groups.google.com/d/forum/ppextensions) 41 | * [Developer Forum](https://groups.google.com/d/forum/ppextensions) 42 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | ## ROADMAP 2 | 3 | This roadmap is organized into stages of development, leading towards a backend for (mostly) real-time collaboration. 4 | 5 | ### Stage I 6 | 7 | - PPMagics 8 | - [x] Hive magic 9 | - [x] Teradata Magic 10 | - [x] STS (Spark Thrift Server) magic 11 | - [x] Presto magic 12 | - [x] CSV magic 13 | - [x] Run Magic 14 | 15 | - [x] Tableau publish 16 | 17 | ### Stage II 18 | 19 | - [x] Save notebooks back to S3 20 | - [x] Delete notebooks 21 | 22 | ### Stage III 23 | 24 | - [x] Render page using nteract/nteract components 25 | - [ ] Configurable base path for commuter app 26 | - [ ] Start outlining an authentication and permissions strategy 27 | 28 | ### Stage IV 29 | 30 | - [ ] Create server side in-memory model of notebook and transient models, push to clients 31 | - [ ] Provide/use kernels from configured source (e.g. tmpnb.org, jupyterhub, or your private setup) -------------------------------------------------------------------------------- /build/ci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export REPO_HOME=$(pwd) 4 | export BUILD_DIR=${REPO_HOME}/build 5 | export WORK_DIR=${REPO_HOME}/.tmp 6 | 7 | # Fetch reusable functions ... 8 | source ${BUILD_DIR}/functions.sh 9 | # Fetch constants and environment variables ... 10 | source ${BUILD_DIR}/env.sh 11 | 12 | export args=$* 13 | 14 | write_log "Arguments --> ${args}" 15 | 16 | write_log "################### Cleanup #####################" 17 | 18 | write_log "Work Directory --> ${WORK_DIR}" 19 | run_cmd "rm -rf ${WORK_DIR}" 20 | run_cmd "mkdir ${WORK_DIR}" 21 | 22 | #Place as many pre-requisite steps here as requires... 23 | write_log "#################### Install Requisite Packages ###############" 24 | 25 | write_log " ----------- 1. Install Tableau SDK ------------- " 26 | 27 | run_cmd "cd ${WORK_DIR}" 28 | run_cmd "wget ${TABLEAU_URL}" 29 | run_cmd "tar -xvf ${TABLEAU_TAR_BALL}" 30 | run_cmd "export TABLEAU_DIR=$(ls | grep Tableau | grep -v gz)" 31 | run_cmd "cd ${TABLEAU_DIR}" 32 | run_cmd "python setup.py install" 33 | run_cmd "cd ${REPO_HOME}" 34 | 35 | write_log "#################### Install PPExtensions ####################" 36 | 37 | run_cmd "pip install ppextensions" 38 | 39 | write_log "################### Final Cleanup #########################" 40 | 41 | run_cmd "rm -rf ${WORK_DIR}" 42 | 43 | write_log "######################## BUILD SUCCESS ###############################" 44 | 45 | -------------------------------------------------------------------------------- /build/env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TABLEAU_URL=https://downloads.tableau.com/tssoftware/Tableau-SDK-Python-Linux-64Bit-10-3-14.tar.gz 4 | export TABLEAU_TAR_BALL=Tableau-SDK-Python-Linux-64Bit-10-3-14.tar.gz 5 | -------------------------------------------------------------------------------- /build/extension_init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | jupyter nbextension install scheduler --user --py 3 | jupyter nbextension enable scheduler --user --py 4 | jupyter serverextension enable scheduler --py --user 5 | 6 | jupyter nbextension install github --user --py 7 | jupyter nbextension enable github --user --py 8 | jupyter serverextension enable github --py --user 9 | -------------------------------------------------------------------------------- /build/functions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # This is a re-usable functions file 5 | # Other scripts may use the functions in this file by sourcing them 6 | 7 | #----------------------------function will check for error code & exit if failure, else proceed further----------------------------# 8 | 9 | #usage : check_error <$?> 10 | #Example: Check_error < pass $? from the shell command > < Custom Message for errorcode -gt 0 > 11 | 12 | check_error() 13 | { 14 | cmd_error_code=$1 15 | custom_message=$2 16 | if [ ${cmd_error_code} -gt 0 ]; then 17 | write_log "Error | Stage | ${custom_message}" 18 | exit ${cmd_error_code} 19 | else 20 | write_log "Success | Stage | ${custom_message}" 21 | fi 22 | } 23 | 24 | #----------------------------function will check for error code & warn if failure----------------------------# 25 | 26 | #usage : check_warning <$?> 27 | #Example: Check_warning < pass $? from the shell command > < Custom Message for errorcode -gt 0 > 28 | 29 | 30 | check_warning() 31 | { 32 | 33 | cmd_error_code=$1 34 | pgm_exit_code=$2 35 | pgm_exit_msg=$3 36 | if [ ${cmd_error_code} -gt 0 ]; then 37 | write_log "WARNING ! ${cmd_error_code} ${pgm_exit_code} ${pgm_exit_msg}" 38 | else 39 | echo "" 40 | fi 41 | } 42 | 43 | 44 | 45 | #----------------------------function will write the message to Console / Log File----------------------------# 46 | 47 | #Usage : write_log < Whatever message you need to log > 48 | 49 | write_log() 50 | { 51 | msg=$1 52 | to_be_logged="$(date '+%Y%m%d %H:%M:%S') | $msg" 53 | echo ${to_be_logged} 54 | } 55 | 56 | #-----------------------------------Executes a Command--------------------------------------------------------# 57 | 58 | 59 | 60 | #Usage : run_cmd < The command to execute > 61 | 62 | run_cmd() 63 | { 64 | cmd=$1 65 | if [ -z $2 ]; then 66 | fail_on_error="break_code" 67 | else 68 | fail_on_error=$2 69 | fi 70 | write_log "Executing Command --> $1" 71 | $cmd 72 | error_code=$? 73 | if [ ! $fail_on_error = "ignore_errors" ]; then 74 | check_error $error_code "$cmd" 75 | fi 76 | } 77 | -------------------------------------------------------------------------------- /conf/sample_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "hive":{ 3 | "sample":{ 4 | "host":"host_name", 5 | "port":1, 6 | "auth":"plain", 7 | "resource_manager_url":"url_name", 8 | "name_node_url":"name_node", 9 | "name_node_opts":{ 10 | "hadoop.security.authentication":"plain" 11 | } 12 | } 13 | }, 14 | "sts":{ 15 | "sample":{ 16 | "host":"host_name", 17 | "port":10025, 18 | "auth":"plain", 19 | "resource_manager_url":"url_name", 20 | "name_node":"name_node", 21 | "name_node_port":10015, 22 | "name_node_opts":{ 23 | "hadoop.security.authentication":"plain" 24 | } 25 | } 26 | }, 27 | "teradata":{ 28 | "sample1":{ 29 | "host":"host_name" 30 | }, 31 | "sample_2":{ 32 | "host":"host_name" 33 | } 34 | }, 35 | "tableau":{ 36 | "site_name":"site_name", 37 | "user_name":"", 38 | "password":"" 39 | } 40 | } -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing to PPExtensions 3 | 4 | ## Discussions 5 | 6 | Our recommendation is to start a slack discussion as soon as you have an idea for contributing to PPExtensions. 7 | This will help you, other interested contributors & the committers get to common grounds early in time. 8 | Contact the PPExtenions community on [slack](https://join.slack.com/t/ppextensions/shared_invite/enQtNDIyODk5NzYzMzEyLTIwOGM3MWE0OGZlNjFkYTUxZTJiN2NjOWFlNmUxNDRiY2U3MzE0Nzg5NDRjZjE2M2VmZGI4NWJhOGVjYTRiMTk). 9 | 10 | -------------------------------------------------------------------------------------------------------------------- 11 | 12 | ## How can you help 13 | 14 | ### Code 15 | Look for Issues that are open on below categories, pick an issue, start a slack discussion on slack channel, once we get to common grounds on the solution approach, open a PR with your implementation. 16 | * Bug Fixes 17 | * Enhancements & Improvements (Jira Details) 18 | * Add a new extension that you see are useful to Jupyter community 19 | 20 | ### Documentation 21 | You can also improve our documentation 22 | * readme.md 23 | * docs/*.md 24 | * Any other valuable comment you'd like to add to the code that will simplify other developers' lives. 25 | 26 | -------------------------------------------------------------------------------------------------------------------- 27 | 28 | 29 | ## Contribution Process 30 | 31 | * Get your github account. 32 | * Fork the PPExtensions repo into your account. 33 | * Create an issue branch using the master branch. 34 | * Make modifications to the code. 35 | * Ensure code coverage by added test cases. 36 | * All commits must have the issue ID & summary. Say "[#32] Add Codacy Integration and Badge". 37 | * Ensure all your commits are squashed. 38 | * Make a Pull Request to develop branch. 39 | * If there are code review related changes - ensure those commits are also squashed. 40 | * DO NOT include changes that are not directly related to the issue you are trying to resolve. 41 | * Once PR is approved, code will be merged to Development branch. 42 | * Once all regression test cases have passed - changes will be merged to master branch. 43 | 44 | -------------------------------------------------------------------------------------------------------------------- 45 | 46 | -------------------------------------------------------------------------------- /docs/getting-started/build-ppextensions.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | ``` 3 | pip install ppextensions 4 | ``` 5 | 6 | ## Try ppextensions magics 7 | ``` 8 | %load_ext ppextensions.ppmagics 9 | ``` 10 | 11 | Try help to see all the available magics and options 12 | ``` 13 | %help 14 | ``` 15 | 16 | | Magic | Usage | Explanation | 17 | |---------------------|----------------|--------------------------------------------------------------------------------------------------------------------------------| 18 | | hive | %hive? | Connects to hive engine. Hive magic also gives options to insert csv/dataframe to teradata and publishing data to tableau. | 19 | | teradata | %teradata? | Connects to teradata engine. Teradata magic also gives has to insert csv/dataframe to teradata and publishing data to tableau. | 20 | | presto | %presto? | Connects to presto engine. Presto magic also has options to publishing data to tableau. | 21 | | Spark Thrift Server | %sts? | Connects to Spark Thrift Server. Sts magic also has options to publishing data to tableau. | 22 | | CSV | %csv? | Runs sqls on top of csv files. CSV magic also has options to publishing data to tableau. | 23 | | run | %run? | Runs a notebook from another notebook. Allows for running parameterized notebooks. | 24 | | run_pipeline | %run_pipeline? | Run notebooks sequentially in a stateful Pipeline. | 25 | 26 | 27 | For more info: 28 | [Github Link](https://github.com/paypal/PPExtensions/) 29 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # PPExtensions 2 | 3 | PPExtensions is a suite of ipython and jupyter extensions built to improve user experience and reduce time to market in [Jupyter](http://jupyter.org) notebooks. 4 | 5 | 6 | # Features 7 | 8 | - **[PPMagics](ppmagics/csv.md)** - Set of magics to simplify access to different storage systems and tableau. 9 | - **[Github Integration](ppextensions-github-integration/github-integration.md)** - A jupyter extension to integrate notebooks with github. This extension simplifies version controlling, sharing and resolving merge conflicts of notebooks. 10 | - **[Notebooks Scheduling](ppextensions-scheduler/scheduler.md)** - A jupyter extension to productionalize the notebooks development environment. This extension enables scheduling notebooks with help of [airflow](https://airflow.apache.org/). 11 | 12 | 13 | # Installation 14 | 15 | pip install ppextensions 16 | 17 | 18 | # Current State 19 | 20 | | Feature | Available | State | 21 | |---------------------- | ------------- | -------------| 22 | | PPMagics | Available | Beta | 23 | | Scheduling Notebooks | Available | Beta | 24 | | Github Integration | Available | Beta | 25 | 26 | -------------------------------------------------------------------------------------------------------------------- 27 | 28 | # Documentation & Getting Started 29 | 30 | * [Click here to read the docs](http://ppextensions.readthedocs.io/) 31 | 32 | # Contributing 33 | 34 | * [Edit in Github](https://github.com/paypal/PPExtensions/) 35 | 36 | # Questions 37 | 38 | * [Slack](https://join.slack.com/t/ppextensions/shared_invite/enQtNDIyODk5NzYzMzEyLTIwOGM3MWE0OGZlNjFkYTUxZTJiN2NjOWFlNmUxNDRiY2U3MzE0Nzg5NDRjZjE2M2VmZGI4NWJhOGVjYTRiMTk) 39 | * [User Forum](https://groups.google.com/d/forum/ppextensions) 40 | * [Developer Forum](https://groups.google.com/d/forum/ppextensions) 41 | -------------------------------------------------------------------------------- /docs/ppextensions-github-integration/github-integration.md: -------------------------------------------------------------------------------- 1 | # Github Integration 2 | 3 | # About 4 | A Jupyter extension to integrate notebooks with Github. This extension simplifies version controlling, sharing and resolving merge conflicts of notebooks. 5 | 6 | ### (Method 1) Local Environment Setup 7 | 8 | #### Local Setup 9 | 10 | **Register private Github token:** Go to Github website, click `Settings` --> `Developer settings` --> `Personal access tokens` --> `Generate new token`, copy the new token and export that as an environment variable. 11 | ~~~ 12 | export githubtoken= 13 | ~~~ 14 | 15 | Notice if the token is replaced, all local repo will be "unlinked" to remote. 16 | 17 | **Enable git merge driver:** 18 | To show conflict in notebook, a nbmerge driver from nbdime module should be enabled as well. 19 | ~~~ 20 | git-nbmergedriver config --enable --global 21 | ~~~ 22 | 23 | #### Install Github Extension 24 | 25 | ~~~ 26 | pip install ppextensions 27 | jupyter nbextension install github --user --py 28 | jupyter nbextension enable github --user --py 29 | jupyter serverextension enable github --py --user 30 | ~~~ 31 | 32 | Alternatively, if you want to install all extensions from [ppextensions](https://ppextensions.io) 33 | ~~~ 34 | cd PPExtensions 35 | bash build/extension_init.sh 36 | ~~~ 37 | 38 | This command will automatically install all github and scheduler extensions. 39 | 40 | **(Optional) Initialize a Github repo for notebooks** 41 | 42 | If you want to create a separate repo for sharing the notebooks, go to github website and create a new repo, be sure to create a README as well in order to initialize the master branch, otherwise when you pull the repo, there will be a "master branch not found" error. 43 | 44 | **(Optional) Use an existing Github repo for sharing the notebooks** 45 | Either push to or pull from that repo will create a local workspace in Private Sharing folder in the notebook startup folder. 46 | 47 | ### (Method 2) Use Docker 48 | ~~~ 49 | docker run --name=demo --link=mysql:db -i -t -e githubtoken= -e githubname= -e githubemail= -p 8080:8080 -p 8888:8888 qwjlegend/ppextensions 50 | 51 | ~~~ 52 | 53 | Then go to localhost:8888/?token= to start using notebook with ppextensions. 54 | 55 | ### Push to Github 56 | 57 | **Push a single notebook to Github:** Select the notebook to be pushed, click `Sharing` --> `Push to Github`, select the repo, branch and type commit messages in the popup, and click on `Push`. 58 | 59 | When you push a notebook outside the `Sharing` folder, the notebook will be moved under `Sharing//` path, and the be pushed to Github. 60 | When you push a notebook inside the `Sharing` folder, only the "Linked" repo in the dropdown will display in the dropdown. 61 | 62 | In the following situation, the push command will fail: 63 | 64 | ***There is a conflict:*** Updates were rejected because the remote contains work that you do not have locally. Please do git pull and fix the possible conflicts before pushing again! 65 | 66 | **Push a folder to Github:** Select the folder, click on `Sharing` --> `Push to Github`, select the repo, branch and type commit messages in the popup, and click on `Push`. 67 | 68 | When you push a folder outside the `Sharing` folder, that entire folder will be moved under "Sharing/" path, and then be pushed to Github. 69 | 70 | 71 | ### Pull from Github 72 | 73 | Click on `Sharing` --> `Pull from Github`, copy the Github repo url and paste that in the input area, then click on `Pull`. 74 | 75 | In the following situations, the pull command will fail: 76 | 77 | ***During a merge:*** You have not conclued your merge(MERGE_HEAD exists). Please, commit your changes before you can merge. 78 | 79 | ***There is a conflict:*** Auto-mergeing **.ipynb. CONFLICT(content): Merge conflict in **.ipynb. Automatic merge failed; fix conflicts and then commit the result. 80 | 81 | ***Untracked notebook in local:*** Your local changes to the following files would be overwritten by merge: xx.ipynb. Please, commit your changes or stash them before you can merge. Aborting. 82 | 83 | ### Commit 84 | 85 | Open up a notebook, click on the Github icon in the tool bar. There are two types of commit: 86 | 87 | **Commit one notebook:** This option will be used in most cases. 88 | 89 | In the following situations, this command will fail: 90 | 91 | ***Worktree clean, nothing to commit*** 92 | 93 | ***There are other untracked/uncommitted notebooks:*** Nothing committed but untracked files presented. 94 | 95 | **Commit all notebooks in the same folder:** Only use this option when you want to commit the deleted notebooks. 96 | 97 | 98 | ### Conflict Fix 99 | 100 | When you pull from Github and you local commit is different from remote commit, a conflict will be generated, if the conflict cannot be automatically resolved, you can fix the conflicts by opening the notebook. 101 | 102 | In the error message, the conflicting files will be displayed. 103 | 104 | Notice: The merge-driver is depending on nbdime module, while it is working well in identifying "cell level" conflicts, it does not fully support "notebook level" merging. Therefore, it is not guaranteed that a "notebook level" conflict (such as a deleted cell/added cell) will be identified in 100 percent correctness. Before the improved nbdime module is released, we would recommend the user to keep the number of cells unchanged in a collaborative circumstance. 105 | 106 | To commit, first click on the Github icon in the notebook toolbar, choose either `Commit this notebook only` or `Commit all notebooks in this folder`, then click on `Commit`. 107 | -------------------------------------------------------------------------------- /docs/ppextensions-scheduler/scheduler.md: -------------------------------------------------------------------------------- 1 | # Scheduler 2 | 3 | # About 4 | A Jupyter extension to productionalize your notebooks by scheduling them to run in the background 5 | 6 | ### (Method 1) Local Environment Setup 7 | 8 | #### Pre Requisites 9 | 10 | **Configure Airflow** 11 | ~~~ 12 | export AIRFLOW_HOME= 13 | ~~~ 14 | 15 | Run airflow in command line, a `airflow.cfg` file will be generated in airflow home. Here is a list of parameters which needs to be changed. 16 | 17 | ~~~ 18 | dags_folder = /dags 19 | executor = LocalExecutor 20 | sql_alchemy_conn = mysql+mysqlconnector:://:@:/airflow 21 | dags_are_paused_at_creation = False (recommended) 22 | load_examples = False (recommended) 23 | ~~~ 24 | 25 | Create a `dags` and a `variables` folder in airflow home to store the dag files and their related vairable files. 26 | 27 | **Setup MySQL Database** 28 | 29 | Create a database `airflow` in mysql. This serves as the metadata db for airflow. 30 | 31 | #### Local Setup 32 | 33 | Here are a few preparations to make scheduler extension work. The Pre-req steps can be skipped with those are already configured. 34 | 35 | **Export Path Variables** 36 | ~~~ 37 | export AIRFLOW_METADATA_CONNECTION_STRING='mysql+mysqlconnector://:@:/airflow' 38 | ~~~ 39 | 40 | **Start Airflow Scheduler, Webserver** 41 | 42 | In this tutorial, we are using airflow LocalExecutor, hence airflow worker is not required. But if you are using some other executors like CeleryExecutor, then the airflow worker should also be started. 43 | 44 | ~~~ 45 | airflow initdb 46 | airflow webserver 47 | airflow scheduler 48 | ~~~ 49 | 50 | By default, the log files will be generated in airflow_home, you can configure that as well. Refer to https://airflow.apache.org/howto/write-logs.html. 51 | 52 | #### Install Scheduler Extension 53 | 54 | ~~~ 55 | pip install ppextensions 56 | jupyter nbextension install scheduler --user --py 57 | jupyter nbextension enable scheduler --user --py 58 | jupyter serverextension enable scheduler --py --user 59 | ~~~ 60 | 61 | Alternatively, if you want to install all extensions from [ppextensions](https://ppextensions.io) 62 | ~~~ 63 | cd PPExtensions 64 | bash build/extension_init.sh 65 | ~~~ 66 | 67 | This command will automatically install all github and scheduler extensions. 68 | 69 | 70 | ### (Method 2) Use Docker 71 | ~~~ 72 | docker run --name=mysql circleci/mysql 73 | docker run --name=demo --link=mysql:db -i -t -e githubtoken= -e githubname= -e githubemail= -p 8080:8080 -p 8888:8888 qwjlegend/ppextensions 74 | ~~~ 75 | 76 | The default time zone in docker container in UTC, to make scheduler work correctly based on your timezone, you need to go inside docker container and setup the timezone accordingly. 77 | 78 | Then go to `localhost:8888/?token=` to start using notebook with ppextensions. 79 | 80 | ### Schedule Notebook 81 | 82 | To schedule a notebook, first select a notebook, click on the `schedule` button appeared in the dynamic tool bar, a scheduler menu will pop up. 83 | 84 | Currently scheduler extension provides the following configurable dag parameters: 85 | 86 | ***Interval:*** Three different scales of frequency are provided: hourly, daily and weekly. 87 | 88 | ***Start Time/Date:*** The start time/date can not be ealier than current time. 89 | 90 | ***Number of Runs:*** The number of runs the job should be executed. For example, if a job is scheduled to at `12:00AM 11/11/2018` with an interval of `1 hour`, and the number of runs is set to 5 times, then the job will be ended at `5:00 AM 11/11/2018`. 91 | 92 | ***Emails:*** To receive failure email and success email, check the box and input the email address in the input area. 93 | 94 | To receive the email alert, the STMP server should be setup in the host machine and corresponding parameters in `airflow.cfg` `[smtp]` section need to be configured. 95 | 96 | Click on `Schedule` button, the job will be displayed in `Scheduled Jobs` tab, from which you can see the **Last Run Time**, **Last Run Time**, **Last Run Duration**, **Next Scheduled Run** of each job scheduled. Notice, there will be some delay in the airflow UI to show the job. 97 | 98 | ### Edit Job 99 | 100 | To edit a job, go to the `Scheduled Jobs` tab, click on `Edit` button in `Action` column of the target job, the current configuration of that job except number of runs will be displayed in the configuration menu as default values. Change the configuration and hit on `Confirm Edit` button, the changes will be applied to the job. 101 | 102 | 103 | ### Delete Job 104 | 105 | To delete a job, go to the `Scheduled Jobs` tab, click on `Remove` button in `Action` column of the target job, the dag/vairable file of the related job as well as the records in the metadata db will be removed. 106 | 107 | 108 | -------------------------------------------------------------------------------- /docs/ppmagics/csv.md: -------------------------------------------------------------------------------- 1 | # CSV Magic 2 | 3 | # About 4 | Jupyter enables you to get started quickly on developing and running interactive queries on csv using ppmagics. You can visualize your results as graphs and charts and share your reports. 5 | 6 | # Getting Started 7 | 8 | Querying CSV 9 | --- 10 | 11 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 12 | 13 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 14 | ~~~ 15 | %load_ext ppextensions.ppmagics 16 | ~~~ 17 | 18 | **Using CSV magic** 19 | 20 | To see available options for CSV Magic run `%csv?`: 21 | ``` 22 | 23 | %csv [-tab TABLEAU] [-pub PUBLISH] [-tde TDE_NAME] [-pname PROJECT_NAME] 24 | ``` 25 | 26 | 27 | Note: Currently csv magic supports only select sqls and ```return: Dataframe``` 28 | ``` 29 | optional arguments: 30 | -tab TABLEAU, --tableau TABLEAU 31 | True to download tableau data 32 | -pub PUBLISH, --publish PUBLISH 33 | Publish Data to Tableau Server 34 | -tde TDE_NAME, --tde_name TDE_NAME 35 | tde Name to be published 36 | -pname PROJECT_NAME, --project_name PROJECT_NAME 37 | project name to be published 38 | ``` 39 | 40 | **Running CSV query:** 41 | 42 | csv sql in one-line mode: 43 | ~~~~ 44 | %csv 45 | ~~~~ 46 | 47 | csv sql in multi-line mode: 48 | ~~~~ 49 | %%csv 50 | 51 | 52 | 53 | ~~~~ 54 | 55 | ```buildoutcfg 56 | Example Queries: 57 | 1. select * from test.csv 58 | 2. select col1 from test.csv where col1=1 59 | 3.select * from test.tsv 60 | ``` 61 | 62 | 63 | **Publish to tableau** 64 | 65 | %csv --tableau True --publish True --tde_name --project_name 66 | select * from database.table_name limit 10 67 | 68 | 69 | 70 | ******For tableau configuration refer to [Publish Magic]()****** -------------------------------------------------------------------------------- /docs/ppmagics/hive.md: -------------------------------------------------------------------------------- 1 | # Hive Magic 2 | 3 | # About 4 | Jupyter enables you to get started quickly on developing and running interactive hive sql queries using ppmagics. You can visualize your results as graphs and charts and share your reports. 5 | 6 | # Getting Started 7 | 8 | Querying Hive 9 | --- 10 | 11 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 12 | 13 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 14 | ~~~ 15 | %load_ext ppextensions.ppmagics 16 | ~~~ 17 | 18 | **Using Hive magic** 19 | 20 | To see available options for Hive Magic run `%hive?`: 21 | ``` 22 | %hive [-c CLUSTER_NAME] [-hs HIVE_SERVER] [-p PORT] [-nn NAME_NODE_URL] 23 | [-np NAME_NODE_OPTIONS] [-rm RESOURCE_MANAGER_URL] [-a AUTH] 24 | [-f CSV] [-t TABLE] [-df DATAFRAME] [-tab TABLEAU] [-pub PUBLISH] 25 | [-tde TDE_NAME] [-pname PROJECT_NAME] 26 | ``` 27 | 28 | ``` 29 | optional arguments: 30 | -c CLUSTER_NAME, --cluster_name CLUSTER_NAME 31 | Cluster Name to connect to 32 | -hs HIVE_SERVER, --hive_server HIVE_SERVER 33 | Hive server2 host name or ip address. 34 | -p PORT, --port PORT Hive Server2 port 35 | -nn NAME_NODE_URL, --name_node_url NAME_NODE_URL 36 | Name node host name 37 | -np NAME_NODE_OPTIONS, --name_node_options NAME_NODE_OPTIONS 38 | Parameters for host 39 | -rm RESOURCE_MANAGER_URL, --resource_manager_url RESOURCE_MANAGER_URL 40 | Resource Manager web ui url 41 | -a AUTH, --auth AUTH Authentication type 42 | -f CSV, --csv CSV Local CSV file name to be loaded to hive table. Use 43 | this option along with --table 44 | -t TABLE, --table TABLE 45 | Hive table name for data to be inserted to. Use this 46 | option along with --csv 47 | -df DATAFRAME, --dataframe DATAFRAME 48 | DataFrame to be uploaded to a table. Use this option 49 | with --table 50 | -tab TABLEAU, --tableau TABLEAU 51 | True to download tableau data 52 | -pub PUBLISH, --publish PUBLISH 53 | Publish Data to Tableau Server 54 | -tde TDE_NAME, --tde_name TDE_NAME 55 | tde Name to be published 56 | -pname PROJECT_NAME, --project_name PROJECT_NAME 57 | project name to be published 58 | ``` 59 | 60 | **Running Hive query:** 61 | 62 | Establishing a hive server connection to read data from hive 63 | ``` 64 | %%hive -c 65 | 66 | ``` 67 | 68 | Update `~/.ppextensions/config.json` with a named cluster including `hive url`, `port number` and `auth` to use `-c` if a persistent cluster configuration is desired. 69 | 70 | ``` 71 | { 72 | "hive":{ 73 | "cluster_name": { 74 | "host": , 75 | "port": , 76 | "auth": "plain/gssapi", 77 | } 78 | "cluster_name_1": { 79 | "host": ", 80 | "port": , 81 | "auth": "plain/gssapi", 82 | } 83 | } 84 | } 85 | ``` 86 | For [reading and inserting data](#insert_data) additional configuration is required. 87 | 88 | ****Updated config will be available after restarting the kernel*** 89 | 90 | 91 | Optionally, it is also possible to connect without a config 92 | 93 | ```buildoutcfg 94 | %%hive --hive_server hive.server.com --port 10000 --auth gssapi 95 | 96 | ``` 97 | 98 | On an established hive server connection further queries can be run as: 99 | 100 | hive sql in one-line mode: 101 | ~~~~ 102 | %hive 103 | ~~~~ 104 | 105 | hive sql in multi-line mode: 106 | ~~~~ 107 | %%hive 108 | 109 | 110 | 111 | ~~~~ 112 | 113 | 114 | 115 | **To insert csv/df data to a Hive table** 116 | 117 | %hive -f file.csv -t database.table_name 118 | 119 | %hive -df df_name -t database.table_name 120 | 121 | 122 | Update `~/.ppextensions/config.json` with `name_node_url` and `name_node_opts` for the desired cluster to insert and read data 123 | ``` 124 | { 125 | "hive":{ 126 | "cluster_name": { 127 | "host": , 128 | "port": , 129 | "auth": "plain/gssapi", 130 | "resource_manager_url": "url_name", 131 | "name_node_url": "namenodeurl:port", 132 | "name_node_opts": {"hadoop.security.authentication": "kerberos"} 133 | } 134 | } 135 | } 136 | 137 | ``` 138 | 139 | Optionally, it is also possible to connect for inserting/reading without a config 140 | ```buildoutcfg 141 | %%hive --hive_server hive.server.com --port 10000 --auth gssapi --name_node_url hive.server.com --name_node_opts {"hadoop.security.authentication": "kerberos"} 142 | 143 | ``` 144 | 145 | 146 | 147 | 148 | 149 | **Publish to tableau** 150 | 151 | %hive --tableau True --publish True --tde_name --project_name 152 | select * from database.table_name limit 10 153 | 154 | 155 | 156 | 157 | ******For tableau configuration refer to [Publish Magic](https://github.paypal.com/ppextensions/master/docs/ppextensions-magics/publish.md)****** 158 | 159 | %hive 160 | ``` 161 | Please check the official [Hive documentation] (https://hive.apache.org/) for information on using Hive. -------------------------------------------------------------------------------- /docs/ppmagics/presto.md: -------------------------------------------------------------------------------- 1 | # Presto Magic 2 | 3 | # About 4 | Jupyter enables you to get started quickly on developing and running interactive presto sql queries using ppmagics. You can visualize your results as graphs and charts and share your reports. 5 | 6 | # Getting Started 7 | 8 | Querying Presto 9 | --- 10 | 11 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 12 | 13 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 14 | ~~~ 15 | %load_ext ppextensions.ppmagics 16 | ~~~ 17 | 18 | **Using Presto magic** 19 | 20 | To see available options for Presto Magic run `%presto?`: 21 | ``` 22 | %presto [-c CLUSTER_NAME] [-h HOST] [-p PORT] [-a AUTH] [-tab TABLEAU] 23 | [-pub PUBLISH] [-tde TDE_NAME] [-pname PROJECT_NAME] 24 | ``` 25 | 26 | ``` 27 | optional arguments: 28 | -c CLUSTER_NAME, --cluster_name CLUSTER_NAME 29 | -h HOST, --host HOST Host name or ip address of presto server 30 | -p PORT, --port PORT Port of presto server 31 | -a AUTH, --auth AUTH Authentication type 32 | -tab TABLEAU, --tableau TABLEAU 33 | True to download tableau data 34 | -pub PUBLISH, --publish PUBLISH 35 | Publish Data to Tableau Server 36 | -tde TDE_NAME, --tde_name TDE_NAME 37 | tde Name to be published 38 | -pname PROJECT_NAME, --project_name PROJECT_NAME 39 | project name to be published 40 | ``` 41 | 42 | **Running Presto query:** 43 | 44 | Establishing a presto server connection to read data from presto 45 | ``` 46 | %%presto -c 47 | 48 | ``` 49 | 50 | Update `~/.ppextensions/config.json` with a named cluster including `presto url`, `port number` and `auth` to use `-c` if a persistent cluster configuration is desired. 51 | 52 | ``` 53 | { 54 | "presto":{ 55 | "cluster_name": { 56 | "host": , 57 | "port": , 58 | "auth": "plain/gssapi", 59 | } 60 | "cluster_name_1": { 61 | "host": ", 62 | "port": , 63 | "auth": "plain/gssapi", 64 | } 65 | } 66 | } 67 | ``` 68 | ****Updated config will be available after restarting the kernel*** 69 | 70 | 71 | Optionally, it is also possible to connect without a config 72 | 73 | ```buildoutcfg 74 | %%presto --host presto.server.com --port 10000 --auth gssapi 75 | 76 | ``` 77 | 78 | On an established presto server connection further queries can be run as: 79 | 80 | presto sql in one-line mode: 81 | ~~~~ 82 | %presto 83 | ~~~~ 84 | 85 | presto sql in multi-line mode: 86 | ~~~~ 87 | %%presto 88 | 89 | 90 | 91 | ~~~~ 92 | 93 | 94 | **Publish to tableau** 95 | 96 | %presto --tableau True --publish True --tde_name --project_name 97 | select * from database.table_name limit 10 98 | 99 | 100 | 101 | 102 | ******For tableau configuration refer to [Publish Magic]()****** -------------------------------------------------------------------------------- /docs/ppmagics/publish.md: -------------------------------------------------------------------------------- 1 | # Publish Magic 2 | 3 | # Prerequisites 4 | 5 | Install [tableau sdk](https://onlinehelp.tableau.com/current/api/sdk/en-us/help.htm#SDK/tableau_sdk_installing.htm%3FTocPath%3D_____3) 6 | 7 | # About 8 | Jupyter enables you to publish your data to online tableau server. 9 | Publish magic allows you to publish the dataframe or result from any query to online tableau server. 10 | 11 | Note: You will need [tabcmd](https://onlinehelp.tableau.com/current/server/en-us/tabcmd.htm) installed and an [online account](https://www.tableau.com/learn/tutorials/on-demand/publishing-tableau-server-and-tableau-online-9) to use this feature 12 | 13 | 14 | # Getting Started 15 | 16 | Publish Magic 17 | --- 18 | 19 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 20 | 21 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 22 | ~~~ 23 | %load_ext ppextensions.ppmagics 24 | ~~~ 25 | 26 | **Using Publish magic** 27 | 28 | To see available options for Publish Magic run `%publish?`: 29 | ``` 30 | %publish [-tde TDE_NAME] [-p_name PROJECT_NAME] 31 | ``` 32 | 33 | ``` 34 | optional arguments: 35 | -tde TDE_NAME, --tde_name TDE_NAME 36 | tde Name to be published 37 | -p_name PROJECT_NAME, --project_name PROJECT_NAME 38 | tde Name to be published 39 | ``` 40 | 41 | **Running Publish Magic:** 42 | 43 | ``` 44 | %%publish -tde_name 45 | 46 | ``` 47 | 48 | Update `~/.ppextensions/config.json` with the tableau server details including `site_name`, `user_name` and `password` . 49 | 50 | ``` 51 | { "tableau":{ 52 | "site_name":" 68 | ~~~~ 69 | dataframe would be published with default timestamp 70 | 71 | 72 | **Publishing to specific project with specific tde name** 73 | 74 | %%publish --project_name --tde_name 75 | 76 | 77 | 78 | **Publishing results from another magic. 79 | 80 | ```buildoutcfg 81 | %publish %hive 82 | ``` 83 | ``` 84 | %%publish --tde_name 85 | %hive 86 | ``` 87 | Please check the official [Tableau documentation] (https://onlinehelp.tableau.com/current/pro/desktop/en-us/help.htm#concepts.html) for information on using Tableau. -------------------------------------------------------------------------------- /docs/ppmagics/run.md: -------------------------------------------------------------------------------- 1 | # Run Magic 2 | 3 | 4 | 5 | # About 6 | The Run Magic enables sequential or parallel execution of notebooks. Additionally, notebooks can include parameters to enable parameterized execution; parameters are assumed to be present in the first code cell of the notebook. 7 | # Getting Started 8 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 9 | 10 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 11 | ~~~ 12 | %load_ext ppextensions.ppmagics 13 | ~~~ 14 | 15 | **Using Run magic** 16 | 17 | To see available options for Run Magic run `%run?`: 18 | ``` 19 | %run [-p PARALLEL] [-e ALLOW_ERRORS] [-pbar ENABLE_PROGRESS_BAR] 20 | [-t CELL_TIMEOUT] 21 | ``` 22 | ``` 23 | optional arguments: 24 | -p PARALLEL, --parallel PARALLEL 25 | Run Notebooks in Parallel 26 | -e ALLOW_ERRORS, --allow_errors ALLOW_ERRORS 27 | Ignore errors and execute whole notebook 28 | -pbar ENABLE_PROGRESS_BAR, --enable_progress_bar ENABLE_PROGRESS_BAR 29 | Show Progress Bar 30 | -t CELL_TIMEOUT, --cell_timeout CELL_TIMEOUT 31 | Cell Execution Timeout. -1 to Disable. 32 | ``` 33 | 34 | Multiple notebooks should be separated with a `;`. If a notebook execution should be saved, the save name can be specified with the run notebook separated by a `:`. 35 | 36 | 37 | One or more parameters can be specified while executing a parameterized notebook. If a parameter specified during run does not exist in the notebook it will be ignored. All parameters should be part of the first code cell. Parameters can be strings, numbers, lists and dictionaries. 38 | 39 | Below are different ways to use the Run Magic. 40 | 41 | ``` 42 | # simple run 43 | %run your notebook 44 | ``` 45 | ``` 46 | # Save execution after run 47 | %run your notebook:your save notebook 48 | ``` 49 | ``` 50 | # Allow errors during execution of cells 51 | %%run -e True 52 | your notebook 53 | ``` 54 | ``` 55 | # sequential run 56 | %%run 57 | your notebook 01; 58 | your notebook 02 59 | ``` 60 | ``` 61 | # parallel run 62 | %%run -p True 63 | your notebook 01; 64 | your notebook 02 65 | ``` 66 | ``` 67 | # Show progress bar during execution of notebook 68 | %%run -pbar True 69 | your notebook 70 | ``` 71 | ``` 72 | # specify cell timeout in seconds, -1 to disable cell timeout 73 | %%run -t 600 74 | your notebook 01; 75 | your notebook 02 76 | ``` 77 | ``` 78 | # parameterized run 79 | %%run 80 | your notebook with prameters 01 key01=int key01=string key02={'key01': param01}; 81 | your notebook with paramters 02 key01=int key02=string key03=[param01, param02]; 82 | your notebook with parameters 01 83 | ``` 84 | -------------------------------------------------------------------------------- /docs/ppmagics/run_pipeline.md: -------------------------------------------------------------------------------- 1 | # Run Pipeline Magic 2 | 3 | 4 | 5 | # About 6 | The run_pipeline magic enables sequential stateful execution of notebooks. Additionally, notebooks can include parameters to enable parameterized execution; parameters are assumed to be present in the first code cell of the notebook. 7 | 8 | The magic creates a dictionary `_pipeline_workspace` which will be avaliable to all the notebooks in the pipeline during execution. The dictionary is intended to store state. It can contain any python object. Managing and communicating state changes can be achieved using parameterization of notebooks. 9 | # Getting Started 10 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 11 | 12 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 13 | ~~~ 14 | %load_ext ppextensions.ppmagics 15 | ~~~ 16 | 17 | **Using run_pipeline magic** 18 | 19 | To see available options for run_pipeline magic run `%run_pipeline?`: 20 | ``` 21 | %run_pipeline [-t CELL_TIMEOUT] 22 | ``` 23 | ``` 24 | optional arguments: 25 | -t CELL_TIMEOUT, --cell_timeout CELL_TIMEOUT 26 | Cell Execution Timeout. -1 to Disable. 27 | ``` 28 | 29 | Multiple notebooks should be separated with a `;`. If a notebook execution should be saved, the save name can be specified with the run notebook separated by a `:`. 30 | 31 | 32 | One or more parameters can be specified while executing a parameterized notebook. If a parameter specified during run does not exist in the notebook it will be ignored. All parameters should be part of the first code cell. Parameters can be strings, numbers, lists and dictionaries. 33 | 34 | Below are different ways to use the run_pipeline Magic. 35 | ``` 36 | # simple pipeline 37 | %%run_pipeline 38 | first notebook in pipeline; 39 | second notebook in pipeline; 40 | third notebook in pipeline 41 | ``` 42 | ``` 43 | # simple pipeline with saving execution 44 | %%run_pipeline 45 | first notebook in pipeline:savename for first notebook; 46 | second notebook in pipeline:savename for third notebook; 47 | third notebook in pipeline:savename for third notebook 48 | ``` 49 | 50 | ``` 51 | # pipleine with parameterized notebooks 52 | %%run_pipeline 53 | first notebook in pipeline key01=int key01=string key02={'key01': param01}; 54 | second notebook in pipeline; 55 | third notebook in pipeline:your save name key01=int key02=string key03=[param01, param02] 56 | ``` 57 | 58 | ``` 59 | # specify cell timeout in seconds, -1 to disable cell timeout 60 | %%run_pipeline --cel_timeout 600 61 | first notebook in pipeline key01=int key01=string key02={'key01': param01}; 62 | second notebook in pipeline; 63 | third notebook in pipeline:your save name key01=int key02=string key03=[param01, param02] 64 | ``` 65 | 66 | ``` 67 | # An example of using state 68 | """ 69 | The first notebook downloads data to a dataframe and saves it to _pipeline_workspace as a key specified by the user as a paramter. 70 | The second notebook takes the key containing data in _pipeline_workspace as a paramter and visualizes the data 71 | """ 72 | 73 | %%run_pipeline 74 | download data notebook save_key_name="data_key"; 75 | visualize data notebook:save name data_key="data_key" 76 | ``` -------------------------------------------------------------------------------- /docs/ppmagics/sts.md: -------------------------------------------------------------------------------- 1 | # STS Magic 2 | 3 | # About 4 | Jupyter enables you to get started quickly on developing and running interactive queries thru spark thrift server using ppmagics. You can visualize your results as graphs and charts and share your reports. 5 | 6 | # Getting Started 7 | 8 | Querying sts 9 | --- 10 | 11 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 12 | 13 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 14 | ~~~ 15 | %load_ext ppextensions.ppmagics 16 | ~~~ 17 | 18 | **Using Spark Thrift Sever magic** 19 | 20 | To see available options for sts Magic run `%sts?`: 21 | ``` 22 | %sts [-c CLUSTER_NAME] [-h HOST] [-p PORT] [-a AUTH] [-tab TABLEAU] 23 | [-pub PUBLISH] [-tde TDE_NAME] [-pname PROJECT_NAME] 24 | ``` 25 | 26 | ``` 27 | optional arguments: 28 | -c CLUSTER_NAME, --cluster_name CLUSTER_NAME 29 | Cluster Name to connect to 30 | -h HOST, --hive_server host 31 | sts server host name or ip address. 32 | -p PORT, --port PORT sts Server port 33 | 34 | -a AUTH, --auth AUTH Authentication type 35 | 36 | -tab TABLEAU, --tableau TABLEAU 37 | True to download tableau data 38 | -pub PUBLISH, --publish PUBLISH 39 | Publish Data to Tableau Server 40 | -tde TDE_NAME, --tde_name TDE_NAME 41 | tde Name to be published 42 | -pname PROJECT_NAME, --project_name PROJECT_NAME 43 | project name to be published 44 | ``` 45 | 46 | **Running sts query:** 47 | 48 | Establishing a sts server connection to read data from sts 49 | ``` 50 | %%sts -c 51 | 52 | ``` 53 | 54 | Update `~/.ppextensions/config.json` with a named cluster including `sts url`, `port number` and `auth` to use `-c` if a persistent cluster configuration is desired. 55 | 56 | ``` 57 | { 58 | "sts":{ 59 | "cluster_name": { 60 | "host": , 61 | "port": , 62 | "auth": "plain/gssapi", 63 | } 64 | "cluster_name_1": { 65 | "host": ", 66 | "port": , 67 | "auth": "plain/gssapi", 68 | } 69 | } 70 | } 71 | ``` 72 | 73 | ****Updated config will be available after restarting the kernel*** 74 | 75 | 76 | Optionally, it is also possible to connect without a config 77 | 78 | ```buildoutcfg 79 | %%sts --host sts.server.com --port 10000 --auth gssapi 80 | 81 | ``` 82 | 83 | On an established sts server connection further queries can be run as: 84 | 85 | sts sql in one-line mode: 86 | ~~~~ 87 | %sts 88 | ~~~~ 89 | 90 | sts sql in multi-line mode: 91 | ~~~~ 92 | %%sts 93 | 94 | 95 | 96 | ~~~~ 97 | **Publish to tableau** 98 | 99 | %sts --tableau True --publish True --tde_name --project_name 100 | select * from database.table_name limit 10 101 | 102 | 103 | 104 | 105 | ******For tableau configuration refer to [Publish Magic]()****** -------------------------------------------------------------------------------- /docs/ppmagics/teradata.md: -------------------------------------------------------------------------------- 1 | # Teradata Magic 2 | 3 | 4 | #About 5 | Jupyter enables you to get started quickly on developing and running interactive sql queries on Teradata using ppmagics. You can visualize your results as graphs and charts and share your reports. 6 | 7 | # Getting Started 8 | 9 | Querying Teradata 10 | --- 11 | 12 | **Opening Notebook:** Open Jupyter Notebook, click `New` --> `Python3` kernel 13 | 14 | **Import ppextensions :** Execute the code below to import ppmagics from ppextensions to your notebook 15 | ~~~ 16 | %load_ext ppextensions.ppmagics 17 | ~~~ 18 | **Using Teradata magic** 19 | 20 | To see available options for Hive Magic run `%teradata?`: 21 | ``` 22 | %teradata [-c CLUSTER_NAME] [-f CSV] [-t TABLE] [-df DATAFRAME] 23 | [-h HOST] [-tab TABLEAU] [-pub PUBLISH] [-tde TDE_NAME] 24 | [-pname PROJECT_NAME] 25 | ``` 26 | ``` 27 | optional arguments: 28 | -c CLUSTER_NAME, --cluster_name CLUSTER_NAME 29 | Cluster Name to connect to 30 | -f CSV, --csv CSV Local CSV file name to be loaded to hive table. Use 31 | this option along with --table 32 | -t TABLE, --table TABLE 33 | Teradata table name for data to be inserted to. Use this 34 | option along with --csv 35 | -df DATAFRAME, --dataframe DATAFRAME 36 | DataFrame to be uploaded to a table. Use this option 37 | with --table 38 | -h HOST, --host HOST Teradata host name to connect to 39 | -tab TABLEAU, --tableau TABLEAU 40 | True to download tableau data 41 | -pub PUBLISH, --publish PUBLISH 42 | Publish Data to Tableau Server 43 | -tde TDE_NAME, --tde_name TDE_NAME 44 | tde Name to be published 45 | -pname PROJECT_NAME, --project_name PROJECT_NAME 46 | project name to be published 47 | ``` 48 | 49 | **Running Teradata query:** 50 | Establishing a connection to Teradata 51 | ``` 52 | %%teradata -c 53 | 54 | ``` 55 | 56 | Update `~/.ppextensions/config.json` with named cluster including `host` if a persistent cluster configuration is desired. 57 | 58 | ``` 59 | "teradata": { 60 | "cluster_name": { 61 | "host": 62 | }, 63 | "cluster_name_1": { 64 | "host": 65 | } 66 | } 67 | ``` 68 | 69 | ****Updated config will be available after restarting the kernel*** 70 | 71 | 72 | Optionally, it is also possible to connect without a config 73 | 74 | ```buildoutcfg 75 | %%teradata --host 76 | 77 | ``` 78 | 79 | On an established Teradata connection further queries can be run as: 80 | 81 | 82 | Teradata sql in one-line mode 83 | ~~~~ 84 | %teradata 85 | ~~~~ 86 | To run Teradata sql in multi-line mode 87 | ~~~~ 88 | %%teradata 89 | 90 | 91 | 92 | ~~~~ 93 | 94 | **To insert csv/df data to Teradata** 95 | 96 | %teradata -f file.csv -t database.table_name 97 | 98 | %teradata -df df_name -t database.table_name 99 | 100 | **Publish to tableau** 101 | 102 | %teradata --tableau True --publish True --tde_name --project_name 103 | select * from database.table_name limit 10 104 | 105 | ******For tableau configuration refer to [Publish Magic]()****** 106 | 107 | Please check the official [Teradata documentation] (https://www.info.teradata.com/browse.cfm) for information on using Teradata. -------------------------------------------------------------------------------- /github/__init__.py: -------------------------------------------------------------------------------- 1 | """Package sparkmonitor 2 | 3 | This package contains two modules: 4 | kernelextension.py is the Jupyter kernel extension. 5 | serverextension.py is the Jupyter web server extension. 6 | """ 7 | 8 | 9 | def _jupyter_nbextension_paths(): 10 | """Used by 'jupyter nbextension' command to install frontend extension""" 11 | return [dict( 12 | section="notebook", 13 | # the path is relative to the `my_fancy_module` directory 14 | src="static", 15 | # directory in the `nbextension/` namespace 16 | dest="github", 17 | # _also_ in the `nbextension/` namespace 18 | require="github/githubcommit"), 19 | dict( 20 | section="tree", 21 | # the path is relative to the `my_fancy_module` directory 22 | src="static", 23 | # directory in the `nbextension/` namespace 24 | dest="github", 25 | # _also_ in the `nbextension/` namespace 26 | require="github/github"), 27 | dict( 28 | section="tree", 29 | # the path is relative to the `my_fancy_module` directory 30 | src="static", 31 | # directory in the `nbextension/` namespace 32 | dest="github", 33 | # _also_ in the `nbextension/` namespace 34 | require="github/githubmain") 35 | ] 36 | 37 | 38 | def _jupyter_server_extension_paths(): 39 | """Used by "jupyter serverextension" command to install web server extension'""" 40 | return [{ 41 | "module": "github.github" 42 | }] 43 | -------------------------------------------------------------------------------- /github/github.py: -------------------------------------------------------------------------------- 1 | from notebook.utils import url_path_join 2 | from notebook.base.handlers import IPythonHandler 3 | from git import Repo, exc, GitCommandError 4 | from shutil import move 5 | from urllib.parse import urlparse, unquote 6 | 7 | import requests 8 | import json 9 | import os 10 | 11 | GITHUB_URL_PREFIX = "https://github.com/" 12 | GITHUB_API_PREFIX = "https://api.github.com" 13 | GITHUB_TOKEN = os.getenv("githubtoken","") 14 | NOTEBOOK_STARTUP_PATH = os.getcwd() + "/" 15 | LOCAL_REPO_FOLDER = "Sharing" 16 | LOCAL_REPO_PREFIX = NOTEBOOK_STARTUP_PATH + LOCAL_REPO_FOLDER 17 | 18 | 19 | class PrivateGitHandler(IPythonHandler): 20 | """ 21 | The base class that has all functions used in private sharing backend handlers. 22 | """ 23 | 24 | def error_handler(self, err, iserr=True): 25 | err = err.replace("\n", "
").replace("\t", " ") 26 | if iserr: 27 | self.set_status(500) 28 | self.finish(err) 29 | 30 | @staticmethod 31 | def git_clone(local_repo_path, repo_url): 32 | try: 33 | repo_instance = Repo(local_repo_path) 34 | except exc.NoSuchPathError: 35 | o = urlparse(repo_url) 36 | repo_url_with_token = o.scheme + "://" + GITHUB_TOKEN + "@" + o.hostname + o.path 37 | Repo.clone_from(repo_url_with_token, local_repo_path) 38 | with open(local_repo_path + "/.gitignore", "a") as f: 39 | f.write("\n.*\n.gitignore") 40 | repo_instance = Repo(local_repo_path) 41 | return repo_instance 42 | 43 | def git_commit(self, from_path, to_path, file_name, repo_instance, commit_message): 44 | try: 45 | move(from_path, to_path) 46 | except Exception as e: 47 | self.error_handler(str(e)) 48 | git_instance = repo_instance.git 49 | if not os.path.isdir(to_path): 50 | git_instance.add(file_name) 51 | else: 52 | git_instance.add("--a") 53 | git_instance.commit("-m", commit_message) 54 | 55 | @staticmethod 56 | def git_commit_inside(file_name, repo_instance, commit_message, option): 57 | git_instance = repo_instance.git 58 | if option == "single": 59 | git_instance.add(file_name) 60 | else: 61 | git_instance.add("--a") 62 | git_instance.commit("-m", commit_message) 63 | 64 | @staticmethod 65 | def get_repo(file_path): 66 | repos = dict() 67 | headers = {'Authorization': 'token ' + GITHUB_TOKEN} 68 | parts = file_path.split("/") 69 | if parts[0] == LOCAL_REPO_FOLDER: 70 | repo_name = parts[1] + "/" + parts[2] 71 | branch = requests.get(GITHUB_API_PREFIX + '/repos/' + repo_name + '/branches', headers=headers) 72 | if branch.status_code == 404: 73 | repos[repo_name] = ['Branch Not Found!'] 74 | if len(branch.json()) == 0: 75 | repos[repo_name] = ['master'] 76 | else: 77 | repos[repo_name] = [br['name'] for br in branch.json()] 78 | else: 79 | params = {'affiliation': "owner", "per_page": 100, "sort": "full_name"} 80 | repo = requests.get(GITHUB_API_PREFIX + '/user/repos', headers=headers, params=params).json() 81 | for rp in repo: 82 | repo_name = rp['full_name'] 83 | branch = requests.get(GITHUB_API_PREFIX + '/repos/' + repo_name + '/branches', headers=headers) 84 | if branch.status_code == 404: 85 | repos[repo_name] = ['Branch Not Found!'] 86 | if len(branch.json()) == 0: 87 | repos[repo_name] = ['master'] 88 | else: 89 | repos[repo_name] = [br['name'] for br in branch.json()] 90 | return json.dumps(repos) 91 | 92 | 93 | class PrivateGitGetRepoHandler(PrivateGitHandler): 94 | """ 95 | Get the accessible github repos and display them in the dropdown in github push menu 96 | """ 97 | 98 | def get(self): 99 | file_path = self.get_argument("filepath", "") 100 | try: 101 | repos = self.get_repo(file_path) 102 | self.finish(repos) 103 | except Exception as e: 104 | self.error_handler(str(e)) 105 | 106 | 107 | class PrivateGitPushHandler(PrivateGitHandler): 108 | """ 109 | Private sharing handler to push a notebook or a folder to remote repo. 110 | Step1: Git Clone (If necessary) 111 | Step2: Git Commit 112 | Step3: Git Push 113 | """ 114 | 115 | def post(self): 116 | repo_name = unquote(self.get_argument("repo")) 117 | branch = self.get_argument("branch") 118 | commit_message = self.get_argument("msg") 119 | file_path = unquote(self.get_argument("filepath")) 120 | file_name = unquote(self.get_argument("filename")) 121 | repo_url = GITHUB_URL_PREFIX + repo_name + ".git" 122 | local_repo_path = LOCAL_REPO_PREFIX + "/" + repo_name 123 | if not file_path.startswith(LOCAL_REPO_FOLDER): 124 | local_repo_file_path = local_repo_path + "/" + file_path 125 | else: 126 | local_repo_file_path = NOTEBOOK_STARTUP_PATH + file_path 127 | repo_instance = self.git_clone(local_repo_path, repo_url) 128 | try: 129 | self.git_commit(NOTEBOOK_STARTUP_PATH + file_path, local_repo_file_path, file_name, repo_instance, 130 | commit_message) 131 | except GitCommandError as e: 132 | if e.status == 1: 133 | self.error_handler(e.stdout, iserr=False) 134 | else: 135 | self.error_handler(e.stderr) 136 | try: 137 | push_info = repo_instance.remote().push("master:" + branch) 138 | assert push_info[0].flags in [512, 256, 2, 1] 139 | self.finish(file_name + " has been successfully pushed! ") 140 | except AssertionError as e: 141 | if push_info[0].flags == 1032: 142 | self.error_handler("Updates were rejected because the remote contains work that you do not have " 143 | "locally. Please do git pull and fix the possible conflicts before pushing again!") 144 | except GitCommandError as e: 145 | self.error_handler(push_info[0].summary) 146 | 147 | 148 | class PrivateGitPullHandler(PrivateGitHandler): 149 | """ 150 | Private Sharing handler to pull a notebook or an entire repo to local. 151 | If there is a conflict, it will show the conflict in notebook and ask the user to fix. 152 | """ 153 | 154 | def post(self): 155 | github_repo_url = unquote(self.get_argument("github_repo_url")) 156 | o = urlparse(github_repo_url) 157 | if o.path.endswith(".git"): 158 | repo = o.path.strip(".git") 159 | branch = "master" 160 | repo_url = github_repo_url 161 | else: 162 | split_word = "/blob/" if "/blob/" in o.path else "/tree/" 163 | if split_word in o.path: 164 | repo, path = o.path.split(split_word) 165 | branch = path.split("/")[0] 166 | repo_url = github_repo_url.split(split_word)[0] + ".git" 167 | else: 168 | repo = o.path 169 | branch = "master" 170 | repo_url = github_repo_url + ".git" 171 | local_repo_path = LOCAL_REPO_PREFIX + repo 172 | try: 173 | repo_instance = self.git_clone(local_repo_path, repo_url) 174 | git_instance = repo_instance.git 175 | git_instance.pull("origin", branch) 176 | self.finish("Successfully pulled to Sharing" + repo) 177 | except GitCommandError as e: 178 | if "conflict" in e.stdout: 179 | self.error_handler(e.stdout) 180 | else: 181 | self.error_handler(e.stderr) 182 | 183 | 184 | class PrivateGitCommitHandler(PrivateGitHandler): 185 | """ 186 | GitCommit handler used by the git commit button in notebook toolBar 187 | """ 188 | 189 | def post(self): 190 | repo_name = unquote(self.get_argument("repo")) 191 | file_name = unquote(self.get_argument("filename")) 192 | option = self.get_argument("option") 193 | commit_message = "Commit from PayPal Notebook" 194 | local_repo_path = NOTEBOOK_STARTUP_PATH + repo_name 195 | try: 196 | repo_instance = Repo(local_repo_path) 197 | except exc.InvalidGitRepositoryError as e: 198 | self.error_handler(str(e)) 199 | try: 200 | self.git_commit_inside(file_name, repo_instance, commit_message, option) 201 | self.finish("Commit Success!") 202 | except GitCommandError as e: 203 | if e.status == 1: 204 | self.error_handler(e.stdout) 205 | else: 206 | self.error_handler(e.stderr) 207 | 208 | 209 | def load_jupyter_server_extension(nb_server_app): 210 | """ 211 | Called when the extension is loaded. 212 | 213 | Args: 214 | nb_server_app (NotebookWebApplication): handle to the Notebook webserver instance. 215 | """ 216 | web_app = nb_server_app.web_app 217 | handlers = [ 218 | (r'/github/private_github_push', PrivateGitPushHandler), 219 | (r'/github/private_github_pull', PrivateGitPullHandler), 220 | (r'/github/private_github_get_repo', PrivateGitGetRepoHandler), 221 | (r'/github/private_github_commit', PrivateGitCommitHandler), 222 | ] 223 | 224 | base_url = web_app.settings['base_url'] 225 | handlers = [(url_path_join(base_url, h[0]), h[1]) for h in handlers] 226 | 227 | host_pattern = '.*$' 228 | web_app.add_handlers(host_pattern, handlers) 229 | -------------------------------------------------------------------------------- /github/static/github.js: -------------------------------------------------------------------------------- 1 | define(["base/js/namespace", "base/js/dialog", "tree/js/notebooklist", "base/js/utils", "jquery"], function (Jupyter, dialog, notebooklist, utils, $) { 2 | var GithubOperation = function () { 3 | this.base_url = Jupyter.notebook_list.base_url; 4 | this.bind_events(); 5 | }; 6 | 7 | GithubOperation.prototype = Object.create(notebooklist.NotebookList.prototype); 8 | 9 | GithubOperation.prototype.bind_events = function () { 10 | var that = this; 11 | $(".private-github-push").click($.proxy(that.private_github_push, this)); 12 | $(".private-github-pull").click($.proxy(that.private_github_pull, this)); 13 | }; 14 | 15 | GithubOperation.prototype.private_github_push = function () { 16 | var that = this; 17 | var repo = $(""); 20 | branch.append(""); 21 | 22 | function initializeDropdown(res) { 23 | for (var rp in res) { 24 | repo.append(new Option(rp, rp)); 25 | } 26 | repo.change(function () { 27 | var branches = res[repo.val()]; 28 | branch.empty(); 29 | branch.append(""); 30 | $.each(branches, function (i, el) { 31 | branch.append(new Option(el, el)); 32 | }); 33 | }); 34 | } 35 | 36 | var selected = Jupyter.notebook_list.selected[0]; 37 | var settings = { 38 | method: "GET", 39 | data: {"filepath": selected.path}, 40 | success: function(res) { 41 | res = JSON.parse(res); 42 | for (var rp in res) { 43 | repo.append(new Option(rp, rp)); 44 | } 45 | repo.change(function() { 46 | var branches=res[repo.val()]; 47 | branch.empty(); 48 | branch.append(""); 49 | $.each(branches, function(i, el) { 50 | branch.append(new Option(el, el)); 51 | }); 52 | }); 53 | }, 54 | error: function(res) { 55 | console.log(res); 56 | }, 57 | }; 58 | var url = utils.url_path_join(that.base_url, "/github/private_github_get_repo"); 59 | if (sessionStorage.getItem(url) != null) { 60 | var res = JSON.parse(sessionStorage.getItem(url)); 61 | initializeDropdown(res); 62 | } else { 63 | utils.ajax(url, settings); 64 | } 65 | 66 | var commit_msg = $("").css("margin-left", "12px"); 67 | var repo_div = $("
") 68 | .append("") 69 | .append(repo); 70 | var branch_div = $("
") 71 | .append("") 72 | .append(branch); 73 | var text_div = $("
") 74 | .append("") 75 | .append(commit_msg); 76 | var dialog_body=$("

Please notice: if you are pushing one notebook, other notebooks that are already committed in the same folder will also be pushed!

") 77 | .append($("
") 78 | .append(repo_div) 79 | .append(branch_div) 80 | .append(text_div)); 81 | 82 | dialog.modal({ 83 | title: "Push to Github", 84 | body: dialog_body, 85 | buttons: { 86 | Push: { 87 | class: "btn-primary", 88 | click: function () { 89 | var spin = dialog.modal({ 90 | title: "Pushing...", 91 | body: $("
") 92 | .append($("
Notebook is being pushing from github, please wait for a few seconds.
")) 93 | }); 94 | var payload = { 95 | "msg": commit_msg.val(), 96 | "branch": branch.val(), 97 | "repo": repo.val(), 98 | "filepath": selected.path, 99 | "filename": encodeURI(selected.name) 100 | }; 101 | var settings = { 102 | method: "POST", 103 | data: payload, 104 | success: function (res) { 105 | spin.modal("hide"); 106 | dialog.modal({ 107 | title: "Git Push Success", 108 | body: $("
").append(res), 109 | button: { 110 | OK: { 111 | "class": "btn-primary", 112 | click: function () { 113 | Jupyter.notebook_list.load_list(); 114 | } 115 | } 116 | } 117 | }); 118 | }, 119 | error: function (res) { 120 | spin.modal("hide"); 121 | dialog.modal({ 122 | title: "Git Push Failed", 123 | body: $("
").append(res.responseText), 124 | button: { 125 | OK: { 126 | "class": "btn-primary", 127 | click: function () { 128 | Jupyter.notebook_list.load_list(); 129 | } 130 | } 131 | } 132 | }); 133 | } 134 | }; 135 | var url = utils.url_path_join(that.base_url, "/github/private_github_push"); 136 | utils.ajax(url, settings); 137 | } 138 | }, 139 | Cancel: {} 140 | } 141 | }); 142 | }; 143 | GithubOperation.prototype.private_github_pull = function () { 144 | var that = this; 145 | 146 | var dialog_body = $("
") 147 | .append($("")) 148 | .append($("")); 149 | 150 | dialog.modal({ 151 | title: "Pull from Github", 152 | body: dialog_body, 153 | buttons: { 154 | Pull: { 155 | class: "btn-primary", 156 | click: function () { 157 | var spin = dialog.modal({ 158 | title: "Pulling...", 159 | body: $("
") 160 | .append($("
Notebook is being pulled from github, please wait for a few seconds.
")) 161 | }); 162 | var payload = { 163 | "github_repo_url": $("#gru").val() 164 | }; 165 | var settings = { 166 | method: "POST", 167 | data: payload, 168 | success: function (res) { 169 | spin.modal("hide"); 170 | dialog.modal({ 171 | title: "Pull success!", 172 | body: $("
").append(res), 173 | buttons: { 174 | OK: { 175 | class: "btn-primary", 176 | click: function () { 177 | Jupyter.notebook_list.load_list(); 178 | } 179 | } 180 | } 181 | }); 182 | }, 183 | error: function (res) { 184 | spin.modal("hide"); 185 | dialog.modal({ 186 | title: "Pull failed!", 187 | body: $("
").append(res.responseText), 188 | buttons: { 189 | OK: {class: "btn-primary"} 190 | } 191 | }); 192 | } 193 | }; 194 | var url = utils.url_path_join(that.base_url, "/github/private_github_pull"); 195 | utils.ajax(url, settings); 196 | } 197 | }, 198 | Cancel: {} 199 | } 200 | }); 201 | }; 202 | 203 | return {GithubOperation: GithubOperation}; 204 | }); 205 | 206 | -------------------------------------------------------------------------------- /github/static/githubcommit.js: -------------------------------------------------------------------------------- 1 | define(["base/js/namespace", "base/js/dialog", "base/js/utils", "jquery"], function (Jupyter, dialog, utils, $) { 2 | 3 | var git_commit = { 4 | help: "Commit current notebook", 5 | icon: "fa-github", 6 | help_index: "", 7 | handler: function (env) { 8 | var re = /^\/notebooks(.*?)$/; 9 | var filepath = window.location.pathname.match(re)[1]; 10 | var repo = filepath.substring(1, filepath.lastIndexOf("/")); 11 | var filename = filepath.substring(filepath.lastIndexOf("/") + 1, filepath.length); 12 | var dialog_body = $("
") 13 | .append("\n" + 14 | ""); 15 | 16 | dialog.modal({ 17 | title: "Commit Notebook", 18 | body: dialog_body, 19 | buttons: { 20 | Commit: { 21 | class: "btn-primary", 22 | click: function () { 23 | var payload = { 24 | "repo": repo, 25 | "filename": filename, 26 | "option": $("input[name=optradio]:checked", "#option").val() 27 | }; 28 | if (repo === "/"){ alert("Please commit inside local repo!"); return; } 29 | var spin = dialog.modal({ 30 | title: "Committing...", 31 | body: $("
") 32 | .append($("
Notebook is being committed to local github repository, please wait for a few seconds.
")) 33 | }); 34 | var settings = { 35 | method: "POST", 36 | data: payload, 37 | success: function (res) { 38 | spin.modal("hide"); 39 | dialog.modal({ 40 | title: "Commit Success!", 41 | body: $("
").append(res), 42 | button: { 43 | OK: { "class": "btn-primary" } 44 | } 45 | }); 46 | }, 47 | error: function (res) { 48 | spin.modal("hide"); 49 | dialog.modal({ 50 | title: "Commit Failed", 51 | body: $("
").append(res.responseText), 52 | button: { 53 | OK: { "class": "btn-primary" } 54 | } 55 | }); 56 | } 57 | }; 58 | var url = utils.url_path_join(Jupyter.notebook.base_url, "/github/private_github_commit"); 59 | utils.ajax(url, settings); 60 | } 61 | }, 62 | Cancel: {} 63 | }, 64 | keyboard_manager: env.notebook.keyboard_manager 65 | }); 66 | } 67 | }; 68 | 69 | function _on_load () { 70 | var action_name = Jupyter.actions.register(git_commit, "commit", "git"); 71 | Jupyter.toolbar.add_buttons_group([action_name]); 72 | } 73 | return { load_ipython_extension: _on_load }; 74 | }); 75 | 76 | -------------------------------------------------------------------------------- /github/static/githubmain.js: -------------------------------------------------------------------------------- 1 | define(["jquery", 2 | "base/js/namespace", 3 | "base/js/utils", 4 | "./github" 5 | ], function ($, Jupyter, utils, githuboperation) { 6 | function load_ipython_extension () { 7 | var github_html = $("
\n" + 8 | " \n" + 12 | " \n" + 20 | "
"); 21 | 22 | $(".tree-buttons > .pull-right").prepend(github_html); 23 | 24 | var _selection_changed = Jupyter.notebook_list.__proto__._selection_changed; 25 | 26 | var gitoperation = new githuboperation.GithubOperation(); 27 | 28 | Jupyter.notebook_list.__proto__._selection_changed = function () { 29 | _selection_changed.apply(this); 30 | var selected = this.selected; 31 | if (selected.length === 1) { 32 | $(".private-github-push").css("display", "block"); 33 | } else { 34 | $(".private-github-push").css("display", "none"); 35 | } 36 | }; 37 | Jupyter.notebook_list._selection_changed(); 38 | } 39 | 40 | return { 41 | load_ipython_extension: load_ipython_extension 42 | }; 43 | }); 44 | 45 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: PPExtensions 2 | theme: readthedocs 3 | -------------------------------------------------------------------------------- /ppextensions/__init__.py: -------------------------------------------------------------------------------- 1 | def _jupyter_nbextension_paths(): 2 | return [] 3 | -------------------------------------------------------------------------------- /ppextensions/ppmagics/__init__.py: -------------------------------------------------------------------------------- 1 | from ppextensions.ppmagics.ppmagics import * 2 | -------------------------------------------------------------------------------- /ppextensions/ppmagics/parameters.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | from IPython.core.magic import Magics, magics_class, line_magic, needs_local_scope 27 | from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring 28 | 29 | from ppextensions.pputils import ParameterWidgets, ParameterArgs, WidgetType 30 | from ppextensions.pputils.utils.exceptions import InvalidParameterType 31 | 32 | try: 33 | from traitlets.config.configurable import Configurable 34 | 35 | except ImportError: 36 | from IPython.config.configurable import Configurable 37 | 38 | 39 | @magics_class 40 | class Parameters(Magics, Configurable): 41 | """Parameterization magics..""" 42 | 43 | def __init__(self, shell): 44 | Configurable.__init__(self, config=shell.config) 45 | Magics.__init__(self, shell=shell) 46 | self.widgets = ParameterWidgets(shell) 47 | self.shell.user_ns['ppwidgets'] = self.widgets 48 | # Add ourself to the list of module configurable via %config 49 | self.shell.configurables.append(self) 50 | 51 | @needs_local_scope 52 | @magic_arguments() 53 | @line_magic('parameter') 54 | @argument("-n", "--name", type=str, help="Name of the widget.") 55 | @argument("-t", "--type", type=str, default="read", 56 | help="Type of parameter. textbox/dropdown are currently supported.") 57 | @argument("-d", "--defaultValue", type=str, default="textbox", 58 | help="Type of parameter. textbox/dropzone are currently supported.") 59 | @argument("-v", "--values", type=str, default='', 60 | help="List of values separated by ':::', if type is dropdown. Ex: first:::last") 61 | def parameter(self, arg, local_ns={}): 62 | """ 63 | Magic to parameterize your notebooks. 64 | 65 | This magic allows you to parameterize notebooks. You can create two kinds of parameterization - textbox and dropdown. 66 | 67 | Create a parameter widget: 68 | ========================== 69 | 70 | %parameter -t textbox -n myTextbox -d text 71 | 72 | %parameter -t dropdown -n myDropdown -d dropdown1 -v dropdown1:::dropdown2 73 | 74 | 75 | Reading from widget: 76 | ==================== 77 | 78 | %parameter -n myTextbox 79 | 80 | The same can be done using ppwidgets python API. 81 | 82 | """ 83 | user_ns = self.shell.user_ns.copy() 84 | user_ns.update(local_ns) 85 | args = ParameterArgs(parse_argstring(self.parameter, arg)) 86 | if args.widget_type() is WidgetType.READ: 87 | return self.widgets.get(args.get('name')) 88 | elif args.widget_type() is WidgetType.TEXTBOX: 89 | self.widgets.text(args.get('name'), args.get('defaultValue')) 90 | elif args.widget_type() is WidgetType.DROPDOWN: 91 | if args.get('defaultValue') not in args.get_list('values'): 92 | raise InvalidParameterType("defaultValue should be present in dropdown values specified.") 93 | self.widgets.dropdown(args.get('name'), args.get('defaultValue'), args.get_list('values')) 94 | else: 95 | raise InvalidParameterType("%s is not supported" % args.widget_type()) 96 | 97 | 98 | def load_ipython_extension(ip): 99 | """Load the extension in IPython.""" 100 | ip.register_magics(Parameters) 101 | -------------------------------------------------------------------------------- /ppextensions/ppsql/__init__.py: -------------------------------------------------------------------------------- 1 | from .connection.csvconnection import CSVConnection 2 | from .connection.hiveconnection import HiveConnection 3 | from .connection.prestoconnection import PrestoConnection 4 | from .connection.teradataconnection import TeradataConnection 5 | -------------------------------------------------------------------------------- /ppextensions/ppsql/connection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paypal/PPExtensions/596e5b532cc0e1e7337295b8b368a53bd9432a33/ppextensions/ppsql/connection/__init__.py -------------------------------------------------------------------------------- /ppextensions/ppsql/connection/basesql.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Base class for all connections. Each connection will implement this class.""" 27 | 28 | import abc 29 | 30 | 31 | class BaseConnection: 32 | def __init__(self, connection): 33 | self.connection = connection 34 | 35 | def __del__(self): 36 | try: 37 | if self.connection: 38 | self.connection.close() 39 | except BaseException: 40 | pass 41 | 42 | @abc.abstractmethod 43 | def execute(self, sql, displaylimit, progress_bar=False): 44 | """ 45 | Executes sql. 46 | :param progress_bar: 47 | :param sql: 48 | :return: 49 | """ 50 | -------------------------------------------------------------------------------- /ppextensions/ppsql/connection/csvconnection.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """This class enables working with CSV files. Implements BaseConnection.""" 27 | 28 | import re 29 | import pandas as pd 30 | 31 | from IPython import get_ipython 32 | 33 | from ppextensions.ppsql.connection.basesql import BaseConnection 34 | from ppextensions.pputils.utils.exceptions import InvalidParameterType 35 | 36 | 37 | class CSVConnection(BaseConnection): 38 | first_run = True 39 | dflist = [] 40 | 41 | def __init__(self): 42 | super(CSVConnection, self).__init__('') 43 | 44 | def execute(self, sql): 45 | return self._execute_csv_data_(str(sql)) 46 | 47 | def _execute_csv_data_(self, query): 48 | """ Parse the sql query csv fields Returns the required csv results for persisted dataframe. 49 | """ 50 | ipython = get_ipython() 51 | if self.first_run: 52 | ipython.magic("reload_ext sql") 53 | self.first_run = False 54 | try: 55 | filename = re.split("from", query, 1, flags=re.IGNORECASE)[1].split()[0] 56 | df_name = filename.replace("/", "_").replace(" ", "_").replace(".", "_").replace(":", "_").replace("-", "") 57 | except BaseException: 58 | raise InvalidParameterType("Problem in select query. Type the correct query and try again") 59 | if df_name in self.dflist: 60 | query = query.replace(filename, df_name) 61 | result = ipython.magic("sql {}".format(query)) 62 | else: 63 | query = query.replace(filename, df_name) 64 | try: 65 | if filename.endswith('.tsv'): 66 | exec('{}= pd.read_csv(\'{}\',sep=\'\\t\')'.format(df_name, filename)) 67 | else: 68 | exec('{}= pd.read_csv(\'{}\')'.format(df_name, filename)) 69 | except IOError: 70 | raise IOError('File %s does not exist. Please type correct file name and try again' % (filename)) 71 | ipython.magic("sql sqlite://") 72 | ipython.magic("sql persist {}".format(df_name)) 73 | self.dflist.append(df_name) 74 | result = ipython.magic("sql {}".format(query)) 75 | return result 76 | -------------------------------------------------------------------------------- /ppextensions/ppsql/connection/prestoconnection.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """This class enables connecting to Presto. Implements BaseConnection.""" 27 | 28 | import getpass 29 | import os 30 | 31 | import sqlalchemy 32 | 33 | from ppextensions.ppsql.connection.basesql import BaseConnection 34 | from ppextensions.pputils import PrestoStatusBar, ResultSet 35 | from ppextensions.pputils.utils.configuration import conf_info 36 | 37 | 38 | class PrestoConnection(BaseConnection): 39 | def __init__(self, cluster, host, port, auth): 40 | 41 | self.cluster_details = conf_info('presto') 42 | if cluster in self.cluster_details: 43 | self.cluster_details = self.cluster_details[cluster] 44 | host = self.cluster_details['host'] 45 | port = self.cluster_details['port'] 46 | auth = self.cluster_details['auth'] 47 | 48 | if auth.upper() == 'GSSAPI': 49 | PrestoConnection._authecticate_() 50 | engine = sqlalchemy.create_engine("presto://%s:%d/" % (host, port)) 51 | self.session = None 52 | super(PrestoConnection, self).__init__(engine) 53 | 54 | def execute(self, sql, limit, displaylimit, progress_bar): 55 | if progress_bar is False: 56 | return self._execute_without_progress_bar_(sql, limit, displaylimit) 57 | 58 | cursor = self.connection.execute("%s" % sql).cursor 59 | PrestoStatusBar(cursor) 60 | 61 | keys = [] 62 | if cursor.description is not None and isinstance(cursor.description, list): 63 | for column in cursor.description: 64 | keys.append(column[0]) 65 | if limit: 66 | data = cursor.fetchmany(size=limit) 67 | else: 68 | data = cursor.fetchall() 69 | 70 | return ResultSet(keys, data, displaylimit) 71 | 72 | def _execute_without_progress_bar_(self, sql, limit, displaylimit): 73 | if self.session is None: 74 | self.session = self.connection.connect() 75 | 76 | result = self.session.execute("%s" % sql) 77 | keys = result.keys() 78 | if limit: 79 | data = result.fetchmany(size=limit) 80 | else: 81 | data = result.fetchall() 82 | result.close() 83 | return ResultSet(keys, data, displaylimit) 84 | 85 | @staticmethod 86 | def _authecticate_(): 87 | """ 88 | Enables Kerberos authentication of connection. 89 | """ 90 | if os.system('klist -s') != 0: 91 | inp = 0 92 | while inp not in ("1", "2"): 93 | inp = input("Do you have keytab or password? If keytab Enter 1 else 2: ") 94 | if inp != "1" and inp != "2": 95 | print("You must type 1 or 2") 96 | if inp == "1": 97 | connection = False 98 | location = input("Enter keytab Location ") 99 | principal = input("Enter keytab Principal ") 100 | if os.system("kinit -kt %s %s" % (location, principal)) == 0: 101 | print("Successfully renewed kerberos ticket.") 102 | connection = True 103 | if not connection: 104 | print( 105 | "Error:Some problem with your keytab principal and location , please check your password and " 106 | "restart the kernel with correct password.") 107 | if inp == "2": 108 | connection = False 109 | for _ in range(0, 4): 110 | print("Enter Kerberos password to renew Kerberos ticket: ") 111 | password = getpass.getpass() 112 | user = getpass.getuser() 113 | cmd = "export password='%s'; echo $password|kinit %s" % (password, user) 114 | if os.system(cmd) == 0: 115 | print("Successfully renewed kerberos ticket.") 116 | connection = True 117 | break 118 | else: 119 | print("Invalid password, please try again") 120 | if not connection: 121 | print( 122 | "Error: Some problem with your LDAP password, please check your password and restart the " 123 | "kernel with correct password.") 124 | -------------------------------------------------------------------------------- /ppextensions/pputils/__init__.py: -------------------------------------------------------------------------------- 1 | from .widgets import ParameterWidgets 2 | from .utils import ParameterArgs, WidgetType, FileSystemReaderWriter, conf_info 3 | from .utils import Log 4 | from .utils.tableau import publish 5 | from .utils.exceptions import wrap_exceptions 6 | from .widgets.widgets import PrestoStatusBar 7 | from .widgets.messages import UserMessages 8 | from .utils.resultset import ResultSet 9 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .parameterargs import ParameterArgs, WidgetType 2 | from .filesystemreader import FileSystemReaderWriter 3 | from .configuration import conf_info 4 | from .constants import HOME_PATH, CONFIG_FILE 5 | from .log import Log 6 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/configuration.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Configuration management for PPExtensions.""" 27 | 28 | import json 29 | import os 30 | 31 | from ppextensions.pputils.utils import FileSystemReaderWriter 32 | from ppextensions.pputils.utils.constants import HOME_PATH, CONFIG_FILE 33 | 34 | PATH = os.path.join(HOME_PATH, CONFIG_FILE) 35 | 36 | 37 | def load_conf(path, fsrw_class=None): 38 | """ 39 | Creates a dictionary of configuration by reading from the configuration file. 40 | """ 41 | if fsrw_class is None: 42 | fsrw_class = FileSystemReaderWriter 43 | 44 | config_file = fsrw_class(path) 45 | config_file.ensure_file_exists() 46 | config_text = config_file.read_lines() 47 | line = u"".join(config_text).strip() 48 | 49 | if line == u"": 50 | conf_details = {} 51 | else: 52 | conf_details = json.loads(line) 53 | return conf_details 54 | 55 | 56 | def conf_info(engine): 57 | """ 58 | Returns a dictionary of configuration by reading from the configuration file. 59 | """ 60 | conf_details = load_conf(PATH) 61 | config_dict = {} 62 | 63 | if engine in conf_details: 64 | config_dict = conf_details[engine] 65 | 66 | return config_dict 67 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/constants.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """PPExtentions Constants.""" 27 | 28 | import os 29 | 30 | HOME_PATH = os.environ.get("PPMAGICS_CONF_DIR", "~/.ppextensions") 31 | CONFIG_FILE = os.environ.get("PPMAGICS_CONF_FILE", "config.json") 32 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Exceptions for PPExtensions.""" 27 | 28 | import functools 29 | 30 | from IPython import get_ipython 31 | 32 | from .log import Log 33 | 34 | 35 | class ParameterNotDefined(Exception): 36 | """ 37 | Exception for parameter not defined cases. 38 | """ 39 | 40 | def __init__(self, parameter_name): 41 | Exception.__init__(self, "Parameter %s not defined." % parameter_name) 42 | 43 | 44 | class UnsupportedCluster(Exception): 45 | """ 46 | Exception when a cluster is not supported. 47 | """ 48 | 49 | def __init__(self, cluster): 50 | Exception.__init__(self, "Cluster %s is not supported." % cluster) 51 | 52 | 53 | class InvalidParameterType(Exception): 54 | """ 55 | Exception for parameter type mismatch. 56 | """ 57 | 58 | def __init__(self, message): 59 | Exception.__init__(self, message) 60 | 61 | 62 | class MissingArgument(Exception): 63 | """ 64 | Exception when dealing with missing arguments. 65 | """ 66 | 67 | def __init__(self, param): 68 | Exception.__init__(self, "Missing required argument %s" % param) 69 | 70 | 71 | class TableauException(Exception): 72 | """ 73 | Exception for Tableau related errors. 74 | """ 75 | 76 | def __init__(self, message): 77 | Exception.__init__(self, message) 78 | 79 | 80 | class ResourceManagerException(Exception): 81 | """ 82 | Exception to describe resource allocation errors. 83 | """ 84 | 85 | def __init__(self, message): 86 | Exception.__init__(self, message) 87 | 88 | 89 | class DownloadException(Exception): 90 | """ 91 | Exception to describe download errors. 92 | """ 93 | 94 | def __init__(self, message): 95 | Exception.__init__(self, message) 96 | 97 | 98 | def wrap_exceptions(function_name): 99 | """ 100 | A decorator that wraps the passed in function and logs 101 | exceptions should one occur 102 | """ 103 | 104 | @functools.wraps(function_name) 105 | def wrapper(*args, **kwargs): 106 | try: 107 | return function_name(*args, **kwargs) 108 | except Exception as error_msg: 109 | # log the exception 110 | log = Log(function_name.__name__, 'wrap_exception') 111 | error_formatted_message = '{}: {}'.format(error_msg.__class__.__name__, error_msg) 112 | log.exception(error_formatted_message) 113 | get_ipython().write_err(error_formatted_message) 114 | raise error_msg 115 | return wrapper 116 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/filesystemreader.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Reading and Managing PPExtensions Configuration.""" 27 | 28 | import os 29 | 30 | 31 | class FileSystemReaderWriter: 32 | """ 33 | Credits: Thanks to 'SPARKMAGIC' for FileSystemReader. 34 | """ 35 | 36 | def __init__(self, path): 37 | assert path is not None 38 | self.path = os.path.expanduser(path) 39 | 40 | def ensure_path_exists(self): 41 | """ 42 | Ensure PPExtensions path exists in user's home. 43 | """ 44 | FileSystemReaderWriter._ensure_path_exists(self.path) 45 | 46 | def ensure_file_exists(self): 47 | """ 48 | Ensure PPExtensions configuration file exists. 49 | """ 50 | self._ensure_path_exists(os.path.dirname(self.path)) 51 | if not os.path.exists(self.path): 52 | open(self.path, 'w').close() 53 | 54 | def read_lines(self): 55 | """ 56 | Read PPExtensions Configuration. 57 | """ 58 | if os.path.isfile(self.path): 59 | with open(self.path, "r+") as config_file: 60 | return config_file.readlines() 61 | else: 62 | return "" 63 | 64 | def overwrite_with_line(self, line): 65 | """ 66 | Write additional configuration to PPExtensions. 67 | """ 68 | with open(self.path, "w+") as f: 69 | f.writelines(line) 70 | 71 | @staticmethod 72 | def _ensure_path_exists(path): 73 | """ 74 | Creates a path to PPExtensions configuration. 75 | """ 76 | try: 77 | os.makedirs(path) 78 | except OSError: 79 | if not os.path.isdir(path): 80 | raise 81 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/log.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Enables Logging for PPExtensions.""" 27 | 28 | import logging 29 | import getpass 30 | import os 31 | 32 | from pathlib import Path 33 | 34 | 35 | class Log: 36 | """ 37 | Custom Logging for PPExtensions. 38 | """ 39 | 40 | def __init__(self, logger_name, module='', 41 | filename='{}/logs/ppextensions.log'.format(str(Path.home())), 42 | level=logging.INFO): 43 | self.logger_name = logger_name 44 | self._module = module 45 | if not os.path.isdir("{}/logs/".format(str(Path.home()))): 46 | os.mkdir("{}/logs/".format(str(Path.home()))) 47 | logging.basicConfig( 48 | filename=filename, 49 | level=level, 50 | format='%(asctime)-4s %(levelname)-4s %(name)-4s {} %(message)s'.format(getpass.getuser()), 51 | datefmt='%m-%d %H:%M:%S' 52 | ) 53 | self._init_logger_() 54 | 55 | def _init_logger_(self): 56 | """ 57 | Initialize logger. 58 | """ 59 | self.logger = logging.getLogger(self.logger_name) 60 | 61 | def debug(self, message): 62 | """ 63 | Logging debug messages. 64 | """ 65 | self.logger.debug(self._format_message_(message)) 66 | 67 | def error(self, message): 68 | """ 69 | Logging error messages. 70 | """ 71 | self.logger.error(self._format_message_(message)) 72 | 73 | def info(self, message): 74 | """ 75 | Logging info. 76 | """ 77 | self.logger.info(self._format_message_(message)) 78 | 79 | def exception(self, message): 80 | """ 81 | Logging exceptions. 82 | """ 83 | self.logger.exception(self._format_message_(message)) 84 | 85 | def _format_message_(self, message): 86 | """ 87 | Formatting log messages. 88 | """ 89 | if self._module is None or len(self._module) is 0: 90 | return message 91 | return '{} {}'.format(self._module, message) 92 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/parameterargs.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Manage Line and Cell magic parameters.""" 27 | 28 | from enum import Enum 29 | 30 | from ppextensions.pputils.utils.exceptions import InvalidParameterType, MissingArgument 31 | 32 | 33 | class WidgetType(Enum): 34 | """ 35 | Widget Types. 36 | """ 37 | TEXTBOX = "textbox" 38 | DROPDOWN = "dropdown" 39 | READ = "read" 40 | 41 | 42 | class ParameterArgs: 43 | """ 44 | Manage Line and Cell magic parameters. 45 | """ 46 | 47 | def __init__(self, args): 48 | self.args = args 49 | 50 | def widget_type(self): 51 | """ 52 | Get Widget type. 53 | """ 54 | try: 55 | return WidgetType(getattr(self.args, 'type')) 56 | except ValueError: 57 | raise InvalidParameterType("Invalid parameter type. Only textbox or dropdown are supported.") 58 | 59 | def get(self, key): 60 | """ 61 | Get parameter value. 62 | """ 63 | if hasattr(self.args, key): 64 | param_value = getattr(self.args, key) 65 | else: 66 | raise MissingArgument(key) 67 | 68 | return param_value 69 | 70 | def hasattr(self, key): 71 | """ 72 | Checks if paramter is present. 73 | """ 74 | return hasattr(self.args, key) 75 | 76 | def get_list(self, key): 77 | """ 78 | Get list of parameters. 79 | """ 80 | return self.get(key).split(":::") 81 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/resultset.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Wrapper for all results from queries.""" 27 | 28 | import operator 29 | import prettytable 30 | import re 31 | import six 32 | 33 | from sql.column_guesser import ColumnGuesserMixin 34 | from sql.run import CsvResultDescriptor, UnicodeWriter 35 | from functools import reduce 36 | 37 | 38 | class ResultSet(list, ColumnGuesserMixin): 39 | """ 40 | Results of SQL outputs. 41 | 42 | Credits: Thanks to 'Ipython-sql' for ResultSet. 43 | """ 44 | 45 | def __init__(self, columns, data, displaylimit=100): 46 | self.keys = columns 47 | self.displaylimit = displaylimit 48 | self.field_names = unduplicate_field_names(self.keys) 49 | self.pretty = prettytable.PrettyTable(self.field_names) 50 | self.style = prettytable.__dict__["DEFAULT"] 51 | list.__init__(self, data) 52 | self.data = data 53 | for row in self[:self.displaylimit or None]: 54 | self.pretty.add_row(row) 55 | 56 | def __getitem__(self, key): 57 | """ 58 | Access by integer (row position within result set) 59 | or by string (value of leftmost column) 60 | """ 61 | try: 62 | return list.__getitem__(self, key) 63 | except TypeError: 64 | result = [row for row in self if row[0] == key] 65 | if not result: 66 | raise KeyError(key) 67 | if len(result) > 1: 68 | raise KeyError('%d results for "%s"' % (len(result), key)) 69 | return result[0] 70 | 71 | def _repr_html_(self): 72 | _cell_with_spaces_pattern = re.compile(r'()( {2,})') 73 | if self.pretty: 74 | result = self.pretty.get_html_string() 75 | result = _cell_with_spaces_pattern.sub(_nonbreaking_spaces, result) 76 | if self.displaylimit and len(self) > self.displaylimit: 77 | result = '%s\n%d rows, truncated to displaylimit of %d' % ( 78 | result, len(self), self.displaylimit) 79 | return result 80 | else: 81 | return None 82 | 83 | def __str__(self, *arg, **kwarg): 84 | return str(self.pretty or '') 85 | 86 | def dict(self): 87 | """Returns a dict built from the result set, with column names as keys""" 88 | return dict(zip(self.keys, zip(*self))) 89 | 90 | def DataFrame(self): 91 | """Returns a Pandas DataFrame instance built from the result set.""" 92 | import pandas as pd 93 | frame = pd.DataFrame(self.data, columns=(self and self.keys) or []) 94 | return frame 95 | 96 | def pie(self, key_word_sep=" ", title=None, **kwargs): 97 | """Generates a pylab pie chart from the result set. 98 | ``matplotlib`` must be installed, and in an 99 | IPython Notebook, inlining must be on:: 100 | %%matplotlib inline 101 | Values (pie slice sizes) are taken from the 102 | rightmost column (numerical values required). 103 | All other columns are used to label the pie slices. 104 | Parameters 105 | ---------- 106 | key_word_sep: string used to separate column values 107 | from each other in pie labels 108 | title: Plot title, defaults to name of value column 109 | Any additional keyword arguments will be passsed 110 | through to ``matplotlib.pylab.pie``. 111 | """ 112 | self.guess_pie_columns(xlabel_sep=key_word_sep) 113 | import matplotlib.pylab as plt 114 | pie = plt.pie(self.ys[0], labels=self.xlabels, **kwargs) 115 | plt.title(title or self.ys[0].name) 116 | return pie 117 | 118 | def plot(self, title=None, **kwargs): 119 | """Generates a pylab plot from the result set. 120 | ``matplotlib`` must be installed, and in an 121 | IPython Notebook, inlining must be on:: 122 | %%matplotlib inline 123 | The first and last columns are taken as the X and Y 124 | values. Any columns between are ignored. 125 | Parameters 126 | ---------- 127 | title: Plot title, defaults to names of Y value columns 128 | Any additional keyword arguments will be passsed 129 | through to ``matplotlib.pylab.plot``. 130 | """ 131 | import matplotlib.pylab as plt 132 | self.guess_plot_columns() 133 | self.x = self.x_value or range(len(self.ys[0])) 134 | coords = reduce(operator.add, [(self.x, y) for y in self.ys]) 135 | plot = plt.plot(*coords, **kwargs) 136 | if hasattr(self.x, 'name'): 137 | plt.xlabel(self.x.name) 138 | ylabel = ", ".join(y.name for y in self.ys) 139 | plt.title(title or ylabel) 140 | plt.ylabel(ylabel) 141 | return plot 142 | 143 | def bar(self, key_word_sep=" ", title=None, **kwargs): 144 | """Generates a pylab bar plot from the result set. 145 | ``matplotlib`` must be installed, and in an 146 | IPython Notebook, inlining must be on:: 147 | %%matplotlib inline 148 | The last quantitative column is taken as the Y values; 149 | all other columns are combined to label the X axis. 150 | Parameters 151 | ---------- 152 | title: Plot title, defaults to names of Y value columns 153 | key_word_sep: string used to separate column values 154 | from each other in labels 155 | Any additional keyword arguments will be passsed 156 | through to ``matplotlib.pylab.bar``. 157 | """ 158 | import matplotlib.pylab as plt 159 | self.guess_pie_columns(xlabel_sep=key_word_sep) 160 | plot = plt.bar(range(len(self.ys[0])), self.ys[0], **kwargs) 161 | if self.xlabels: 162 | plt.xticks(range(len(self.xlabels)), self.xlabels, 163 | rotation=45) 164 | plt.xlabel(self.xlabel) 165 | plt.ylabel(self.ys[0].name) 166 | return plot 167 | 168 | def csv(self, filename=None, **format_params): 169 | """Generate results in comma-separated form. Write to ``filename`` if given. 170 | Any other parameters will be passed on to csv.writer.""" 171 | if not self.pretty: 172 | return None # no results 173 | if filename: 174 | encoding = format_params.get('encoding', 'utf-8') 175 | if six.PY2: 176 | outfile = open(filename, 'wb') 177 | else: 178 | outfile = open(filename, 'w', newline='', encoding=encoding) 179 | else: 180 | outfile = six.StringIO() 181 | writer = UnicodeWriter(outfile, **format_params) 182 | writer.writerow(self.field_names) 183 | for row in self: 184 | writer.writerow(row) 185 | if filename: 186 | outfile.close() 187 | return CsvResultDescriptor(filename) 188 | else: 189 | return outfile.getvalue() 190 | 191 | 192 | def _nonbreaking_spaces(match_obj): 193 | """ 194 | Make spaces visible in HTML by replacing all `` `` with `` `` 195 | Call with a ``re`` match object. Retain group 1, replace group 2 196 | with nonbreaking speaces. 197 | """ 198 | spaces = ' ' * len(match_obj.group(2)) 199 | return '%s%s' % (match_obj.group(1), spaces) 200 | 201 | 202 | def unduplicate_field_names(field_names): 203 | """Append a number to duplicate field names to make them unique. """ 204 | res = [] 205 | for k in field_names: 206 | if k in res: 207 | i = 1 208 | while k + '_' + str(i) in res: 209 | i += 1 210 | k += '_' + str(i) 211 | res.append(k) 212 | return res 213 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/tableau.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Manage Tableau Publish.""" 27 | 28 | import datetime 29 | import decimal 30 | import getpass 31 | import glob 32 | import os 33 | import re 34 | import subprocess 35 | 36 | import pandas as pd 37 | from IPython.core.display import display, HTML 38 | from paramiko.ssh_exception import SSHException 39 | from pysftp.exceptions import ConnectionException 40 | 41 | try: 42 | from tableausdk import * 43 | from tableausdk.Extract import * 44 | TABLEAU_SDK_NOT_FOUND = False 45 | except ImportError: 46 | TABLEAU_SDK_NOT_FOUND = True 47 | 48 | 49 | from ppextensions.pputils.utils.configuration import conf_info 50 | from ppextensions.pputils.widgets.messages import UserMessages 51 | from .resultset import ResultSet 52 | 53 | 54 | def tableau_extract(resultset, data_file): 55 | """ 56 | Create TDE extract. 57 | """ 58 | 59 | if isinstance(resultset, ResultSet): 60 | df_name = resultset.DataFrame() 61 | else: 62 | df_name = resultset 63 | data_type = [] 64 | fieldnames = [] 65 | data_type_map = {int: Type.INTEGER, 66 | str: Type.UNICODE_STRING, 67 | bool: Type.BOOLEAN, 68 | bytearray: Type.CHAR_STRING, 69 | list: Type.CHAR_STRING, 70 | dict: Type.CHAR_STRING, 71 | float: Type.DOUBLE, 72 | decimal.Decimal: Type.DOUBLE, 73 | datetime.date: Type.DATE, 74 | datetime.time: Type.DURATION, 75 | datetime.datetime: Type.DATETIME, 76 | pd._libs.tslib.Timestamp: Type.DATETIME 77 | } 78 | 79 | for col in df_name: 80 | fieldnames.append(col) 81 | data_type.append(df_name[col].apply(type).iat[0]) 82 | data_dict = dict(zip(fieldnames, data_type)) 83 | 84 | for col_name in data_dict: 85 | if data_dict[col_name] in data_type_map: 86 | data_dict[col_name] = data_type_map[data_dict[col_name]] 87 | else: 88 | data_dict[col_name] = Type.UNICODE_STRING 89 | # Initialize a new extract 90 | try: 91 | os.remove(data_file) 92 | except OSError: 93 | pass 94 | new_extract = Extract(data_file) 95 | table_definition = TableDefinition() 96 | for col_name in data_dict: 97 | table_definition.addColumn(col_name, data_dict[col_name]) 98 | new_table = new_extract.addTable('Extract', table_definition) 99 | new_row = Row(table_definition) 100 | tde_types = {'INTEGER': 7, 'DOUBLE': 10, 'BOOLEAN': 11, 'DATE': 12, 101 | 'DATETIME': 13, 'DURATION': 14, 102 | 'CHAR_STRING': 15, 'UNICODE_STRING': 16} 103 | for i in range(0, len(df_name)): 104 | for col in range(0, table_definition.getColumnCount()): 105 | col_name = table_definition.getColumnName(col) 106 | try: 107 | if data_dict[col_name] == tde_types['INTEGER']: 108 | new_row.setInteger(col, int(df_name[col_name][i])) 109 | elif data_dict[col_name] == tde_types['DOUBLE']: 110 | new_row.setDouble(col, float(df_name[col_name][i])) 111 | elif data_dict[col_name] == tde_types['BOOLEAN']: 112 | new_row.setBoolean(col, bool(df_name[col_name][i])) 113 | elif data_dict[col_name] == tde_types['DATE']: 114 | data = df_name[col_name][i] 115 | new_row.setDate(col, data.year, data.month, data.day) 116 | elif data_dict[col_name] == tde_types['DATETIME']: 117 | data = df_name[col_name][i] 118 | new_row.setDateTime(col, data.year, data.month, data.day, 119 | data.hour, data.minute, data.second, 0) 120 | elif data_dict[col_name] == tde_types['DURATION']: 121 | data = df_name[col_name][i] 122 | new_row.setDuration(col, data.hour, data.minute, data.second, 0) 123 | elif data_dict[col_name] == tde_types['CHAR_STRING']: 124 | new_row.setCharString(col, str(df_name[col_name][i])) 125 | elif data_dict[col_name] == tde_types['UNICODE_STRING']: 126 | new_row.setString(col, str(df_name[col_name][i])) 127 | else: 128 | print('Error') 129 | new_row.setNull(col) 130 | except TypeError: 131 | new_row.setNull(col) 132 | new_table.insert(new_row) 133 | 134 | new_extract.close() 135 | ExtractAPI.cleanup() 136 | for file_name in glob.glob("DataExtract*.log"): 137 | os.remove(file_name) 138 | 139 | 140 | def publish(data, data_file=None, project_name=None): 141 | """ 142 | Publish to Tableau. 143 | """ 144 | 145 | overwrite = False 146 | if not data_file: 147 | data_file = ("data-%s.tde" % (str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')))).strip() 148 | 149 | else: 150 | data_file = re.sub(r"\.tde$", "", data_file) 151 | data_file = getpass.getuser() + '-' + data_file + '.tde' 152 | 153 | tableau_details = conf_info('tableau') 154 | 155 | try: 156 | if project_name: 157 | project_name = project_name.strip('\'"') 158 | 159 | else: 160 | project_name = 'default' 161 | 162 | log = UserMessages() 163 | 164 | if TABLEAU_SDK_NOT_FOUND: 165 | log.error("Tableau SDK not found. Please install it before using Tableau functionality.") 166 | return 167 | else: 168 | tableau_extract(data, data_file) 169 | 170 | if tableau_details: 171 | site_name = tableau_details['site_name'] 172 | username = tableau_details['user_name'] 173 | password = tableau_details['password'] 174 | else: 175 | site_name = input("Enter the site name to publish ") 176 | username = input("Enter tableau user name ") 177 | password = getpass.getpass("Please enter your password ") 178 | 179 | data_file_name = str(data_file).rsplit('.tde', 1)[0] 180 | 181 | subprocess.run(["tabcmd", "--accepteula"], stdout=subprocess.PIPE, 182 | stderr=subprocess.PIPE) 183 | 184 | result = subprocess.run(["tabcmd", "login", "-s", 185 | "{}".format(site_name), "-u", "{}".format(username), 186 | "-p", "{}".format(password)], stdout=subprocess.PIPE, 187 | stderr=subprocess.PIPE) 188 | 189 | if result.returncode != 0: 190 | raise ConnectionException("Unable to connect to tableau server") 191 | 192 | result = subprocess.run(["tabcmd", "publish", 193 | "{}".format(data_file), 194 | "-r", "{}".format(project_name)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 195 | 196 | if result.returncode != 0: 197 | if ("A data source named '{}' already exists in project".format( 198 | data_file_name) in str(result.stderr)): 199 | result = subprocess.run( 200 | ["tabcmd", "publish", "{}".format(data_file), 201 | "-r", "{}".format(project_name), "--overwrite"], stdout=subprocess.PIPE, 202 | stderr=subprocess.PIPE) 203 | overwrite = True 204 | if "Unexpected response from the server:" not in str(result.stderr): 205 | print(result.stderr) 206 | raise SSHException("Unable to get response from tableau server") 207 | 208 | finally: 209 | os.system("rm %s" % data_file) 210 | 211 | if overwrite: 212 | link_t = "Click here to access " \ 213 | "Tableau published data" 214 | result_file = "{}/datasources/{}".format(site_name, data_file_name) 215 | else: 216 | link_t = "Click here to access " \ 217 | "Tableau published data" 218 | result_file = "{}/authoringNewWorkbook/{}".format(site_name, 219 | data_file_name) 220 | 221 | html = HTML(link_t.format(href=result_file)) 222 | display(html) 223 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/utils.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Utils supporting PPMagics.""" 27 | 28 | import ast 29 | import datetime 30 | import getpass 31 | import os 32 | import re 33 | import pandas as pd 34 | import astor 35 | 36 | from autovizwidget.widget.utils import display_dataframe 37 | 38 | 39 | def renew_kerberos_ticket(principal, keytab): 40 | """ 41 | Check for existing Kerberos ticket. 42 | """ 43 | return os.system("kinit -kt %s %s" % (keytab, principal)) == 0 44 | 45 | 46 | def parse_run_str(run_str): 47 | """ 48 | Parser to support run and run pipeline magics. 49 | :return Run notebook filename, Save notebook filename and key value parameters. 50 | """ 51 | notebook_filename = None 52 | notebook_save_filename = None 53 | params = None 54 | 55 | def append_notebook_ext(file_name): 56 | _, ext = os.path.splitext(file_name) 57 | if ext == '': 58 | file_name = file_name + '.ipynb' 59 | return file_name 60 | 61 | save_nb_idx = run_str.find(':') 62 | param_start_idx = run_str.find('=') 63 | 64 | if (save_nb_idx != -1 and save_nb_idx < param_start_idx) or (save_nb_idx != -1 and param_start_idx == -1): 65 | notebook_filename = run_str[:save_nb_idx] 66 | notebook_filename = append_notebook_ext(notebook_filename) 67 | rest = run_str[save_nb_idx + 1:] 68 | else: 69 | rest = run_str 70 | 71 | args = list(re.finditer(" [\w]+=", rest)) 72 | num_total_args = len(args) 73 | 74 | if num_total_args == 0: 75 | if not notebook_filename: 76 | notebook_filename = rest 77 | notebook_filename = append_notebook_ext(notebook_filename) 78 | else: 79 | notebook_save_filename = rest 80 | notebook_save_filename = append_notebook_ext(notebook_save_filename) 81 | return notebook_filename, notebook_save_filename, params 82 | 83 | params = dict() 84 | 85 | if notebook_filename: 86 | notebook_save_filename = rest[:args[0].start()] 87 | notebook_save_filename = append_notebook_ext(notebook_save_filename) 88 | else: 89 | notebook_filename = rest[:args[0].start()] 90 | notebook_filename = append_notebook_ext(notebook_filename) 91 | 92 | for arg_idx in range(num_total_args - 1): 93 | this_arg = args[arg_idx] 94 | next_arg = args[arg_idx + 1] 95 | arg = rest[this_arg.start() + 1: this_arg.end() - 1] 96 | value = rest[this_arg.end(): next_arg.start()] 97 | params[arg] = arg + '=' + value 98 | 99 | last_arg = rest[args[-1].start() + 1:args[-1].end() - 1] 100 | last_value = rest[args[-1].end():] 101 | params[last_arg] = last_arg + '=' + last_value 102 | 103 | return notebook_filename, notebook_save_filename, params 104 | 105 | 106 | def substitute_params(param_cell_src, params): 107 | """ 108 | Updates parameters of parameterized notebooks. Supports run and run pipeline magics. 109 | :return updated code cell. 110 | """ 111 | param_cell_ast = ast.parse(param_cell_src) 112 | param_cell_ast_nodes = param_cell_ast.body 113 | original_params_dict = dict() 114 | updated_param_cell_src = '' 115 | 116 | def check_ast_node(ast_node): 117 | ret_val = False 118 | if isinstance(ast_node, ast.NameConstant) and ast_node.value in (True, False): 119 | ret_val = True 120 | elif isinstance(ast_node, (ast.Dict, ast.List, ast.Str, ast.Num)): 121 | ret_val = True 122 | elif isinstance(ast_node, ast.Dict): 123 | ret_val = True 124 | return ret_val 125 | 126 | for node in param_cell_ast_nodes: 127 | 128 | if not isinstance(node, ast.Assign): 129 | raise AttributeError("Parameters cell should only contain assignments.") 130 | 131 | node_value = node.value 132 | if not check_ast_node(node_value): 133 | raise AttributeError("Parameters cell can contain Strings, Numbers, Bools, Lists and Dicts.") 134 | 135 | param_name = node.targets[0].id 136 | original_params_dict[param_name] = node 137 | 138 | for param, value in params.items(): 139 | if param in original_params_dict: 140 | new_param_node = ast.parse(value) 141 | new_param_node = new_param_node.body[0] 142 | 143 | if not isinstance(new_param_node, ast.Assign): 144 | raise AttributeError("Parameters should be assignments.") 145 | 146 | new_param_node_value = new_param_node.value 147 | if not check_ast_node(new_param_node_value): 148 | raise AttributeError("Parameters cell can contain Strings, Numbers, Bools, Lists and Dicts.") 149 | 150 | if isinstance(new_param_node_value, type(original_params_dict[param].value)): 151 | original_params_dict[param] = new_param_node 152 | else: 153 | raise AttributeError("Parameter Type Mismatch.") 154 | 155 | for _, value in original_params_dict.items(): 156 | updated_param_cell_src = updated_param_cell_src + astor.to_source(value) 157 | 158 | return updated_param_cell_src 159 | 160 | 161 | def csv_to_df(user_ns, args): 162 | """ 163 | Converts CSCV to DataFrame. 164 | :return DataFrame 165 | """ 166 | if args.get("csv"): 167 | csv_args = args.get("csv") 168 | df_name = pd.read_csv(csv_args, index_col=0) 169 | if args.get("dataframe"): 170 | df_name = user_ns[args.get('dataframe')] 171 | return df_name 172 | 173 | 174 | def df_to_csv(user_ns, args): 175 | """ 176 | Converts DataFrame to CSV. 177 | :return CSV 178 | """ 179 | if args.get("csv"): 180 | data_file = args.get("csv") 181 | if args.get("dataframe"): 182 | data_file = "/home/%s/data-%s.csv" % (getpass.getuser(), 183 | str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))) 184 | df_name = user_ns[args.get('dataframe')] 185 | df_name.to_csv(data_file, index=False) 186 | return data_file 187 | 188 | 189 | def register_autoviz_code(result): 190 | """ 191 | Enables AutoViz. 192 | """ 193 | ip = get_ipython() 194 | ip.display_formatter.ipython_display_formatter.for_type_by_name( 195 | 'pandas.core.frame', 'DataFrame', display_dataframe) 196 | return result.DataFrame() 197 | -------------------------------------------------------------------------------- /ppextensions/pputils/utils/yarnapi.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Manging Queuing system for Spark Thrift Server.""" 27 | 28 | import requests 29 | try: 30 | from urllib import urlencode 31 | except ImportError: 32 | from urllib.parse import urlencode 33 | 34 | from ppextensions.pputils.utils.exceptions import ResourceManagerException 35 | 36 | 37 | class ResourceManager: 38 | """ 39 | Manging Queuing system for Spark Thrift Server. 40 | """ 41 | 42 | def __init__(self, url): 43 | self._url = url 44 | 45 | def _request_(self, api_path, ignore_errors, **query_args): 46 | """Base request handler for all HTTP requests""" 47 | params = urlencode(query_args) 48 | if params: 49 | response = requests.get(url=self._url + api_path, params=params, allow_redirects=True) 50 | else: 51 | response = requests.get(url=self._url + api_path, allow_redirects=True) 52 | 53 | if response.ok: 54 | response_json = response.json() 55 | elif not ignore_errors: 56 | raise ResourceManagerException(response.text) 57 | return response_json 58 | 59 | def cluster_application(self, application_id, ignore_errors=False): 60 | """ 61 | Gives status of an application from Resource Manager 62 | :param application_id: The application id 63 | :param ignore_errors: Set to True will ignore the errors 64 | :return: API response object with JSON data 65 | """ 66 | path = '/ws/v1/cluster/apps/{appid}'.format(appid=application_id) 67 | return self._request_(path, ignore_errors) 68 | 69 | def cluster_metrics(self, ignore_errors=False): 70 | """ 71 | The cluster metrics resource provides some overall metrics about the 72 | cluster. More detailed metrics should be retrieved from the jmx 73 | interface. 74 | :param ignore_errors: Set to True will ignore the errors 75 | :returns: API response object with JSON data 76 | """ 77 | path = '/ws/v1/cluster/metrics' 78 | return self._request_(path, ignore_errors) 79 | -------------------------------------------------------------------------------- /ppextensions/pputils/widgets/__init__.py: -------------------------------------------------------------------------------- 1 | from .ppwidgets import ParameterWidgets 2 | -------------------------------------------------------------------------------- /ppextensions/pputils/widgets/messages.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Printing log messages from PPExtensions to Notebook.""" 27 | 28 | from IPython.display import display 29 | 30 | from .widgetsfactory import WidgetsFactory 31 | 32 | 33 | class UserMessages: 34 | """ 35 | Printing log messages from PPExtensions to Notebook. 36 | """ 37 | 38 | def __init__(self): 39 | self._init_html_() 40 | 41 | def _init_html_(self): 42 | self.html = WidgetsFactory.get_html( 43 | value='' 44 | ) 45 | display(self.html) 46 | 47 | def info(self, message, new_line=False): 48 | """ 49 | Print INFO logging to Notebook. 50 | """ 51 | if new_line: 52 | self._init_html_() 53 | self.html.value = '{}'.format(message.replace("\n", "
")) 54 | 55 | def warning(self, message, new_line=False): 56 | """ 57 | Print WARNING logging to Notebook. 58 | """ 59 | if new_line: 60 | self._init_html_() 61 | self.html.value = '{}'.format(message.replace("\n", "
")) 62 | 63 | def error(self, message, new_line=False): 64 | """ 65 | Print Error logging to Notebook. 66 | """ 67 | if new_line: 68 | self._init_html_() 69 | self.html.value = '{}'.format(message.replace("\n", "
")) 70 | -------------------------------------------------------------------------------- /ppextensions/pputils/widgets/ppwidgets.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """IPyWidgets for PPExtensions.""" 27 | 28 | from ppextensions.pputils.utils.exceptions import ParameterNotDefined 29 | 30 | from .widgets import ParameterBox 31 | from .widgetsfactory import WidgetsFactory 32 | 33 | 34 | class ParameterWidgets: 35 | """ 36 | Widget to drive user driven parameters. 37 | """ 38 | 39 | def __init__(self, shell): 40 | super(ParameterWidgets, self).__init__() 41 | self._shell = shell 42 | # exec (shell, "from pputils.widgets.widgets import ParameterBox", shell.user_ns) 43 | # exec (shell, "_para_box = ParameterBox()", shell.user_ns) 44 | self._para_box = ParameterBox() 45 | self._enabled = False 46 | self._child = {} 47 | self._data = {} 48 | self._enable_() 49 | 50 | def _enable_(self): 51 | self._enabled = True 52 | self._para_box.display() 53 | 54 | def text(self, name, default_value, label='', observer=None): 55 | """ 56 | Gives a text box for a parameter to be used in notebook. 57 | :param name: name of the parameter 58 | :param default_value: default value for the parameter. 59 | :param label: Is a placeholder to be used later. 60 | :return: 61 | """ 62 | if not self._enabled: 63 | self._enable_() 64 | 65 | child = WidgetsFactory.get_text( 66 | value=default_value, 67 | description=name, 68 | ) 69 | self._child[name] = child 70 | self._para_box.add_child(child) 71 | self._register_observer_(child, observer) 72 | self._set_values_(name, default_value) 73 | 74 | def dropdown(self, name, default_value, sequence, label='', observer=None): 75 | """ 76 | Gives a dropdown for a parameter to be used in notebook. 77 | :param name: name of the parameter 78 | :param default_value: default value for the parameter. 79 | :param sequence: sequence of values for dropdown. 80 | :param label: is a placeholder to be used later. 81 | :return: 82 | """ 83 | if not self._enabled: 84 | self._enable_() 85 | 86 | child = WidgetsFactory.get_dropdown( 87 | value=default_value, 88 | description=name, 89 | options=sequence, 90 | ) 91 | self._child[name] = child 92 | self._para_box.add_child(child) 93 | self._register_observer_(child, observer) 94 | self._set_values_(name, default_value) 95 | 96 | def _register_observer_(self, child, observer=None): 97 | child.observe(self._update_shell_value_, names='value') 98 | if observer: 99 | child.observe(observer, names='value') 100 | 101 | def _update_shell_value_(self, event): 102 | # print("Changing value") 103 | # print(event) 104 | self._set_values_(event['owner'].description, event['new']) 105 | 106 | def disable_widgets(self): 107 | """ 108 | Disables all widgets so user won't be able to change any values. 109 | :return: 110 | """ 111 | for name in self._child: 112 | self._child[name].disabled = True 113 | 114 | def enable_widgets(self): 115 | """ 116 | Enables all widgets so user will be able to change values. 117 | :return: 118 | """ 119 | for name in self._child: 120 | self._child[name].disabled = False 121 | 122 | def _set_values_(self, name, value): 123 | self._data[name] = value 124 | self._shell.user_ns[name] = value 125 | 126 | def get(self, name): 127 | """ 128 | Gives the value of the parameter set/changed. 129 | :param name: parameter name. 130 | :return: parameter value 131 | """ 132 | if name in self._data: 133 | return self._data[name] 134 | else: 135 | raise ParameterNotDefined(name) 136 | -------------------------------------------------------------------------------- /ppextensions/pputils/widgets/widgets.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """IPyWidgets for PPExtensions.""" 27 | 28 | import qgrid 29 | 30 | from time import sleep 31 | 32 | from ipywidgets import Box 33 | from ipywidgets import widgets 34 | from pyhive.exc import DatabaseError 35 | from IPython.display import display 36 | 37 | 38 | class MenuWidgets(Box): 39 | """ 40 | Widget to drive user driven parameters. 41 | """ 42 | 43 | def __init__(self): 44 | super(MenuWidgets, self).__init__() 45 | # Change layout settings. 46 | self.layout.flex_flow = 'row' 47 | self.layout.display = 'flex' 48 | self.layout.align_items = 'stretch' 49 | self.layout.overflow_x = 'scroll' 50 | self.style = {'description_width': 'initial'} 51 | self.children = [] 52 | 53 | 54 | class StatusBar(MenuWidgets): 55 | """ 56 | Widget to drive user driven parameters. 57 | """ 58 | 59 | def __init__(self): 60 | super(StatusBar, self).__init__() 61 | self.children = [ 62 | widgets.FloatProgress( 63 | value=0, 64 | min=0, 65 | max=100.0, 66 | step=0.1, 67 | description='Status:', 68 | bar_style='info', 69 | orientation='horizontal', 70 | style=self.style, 71 | ), 72 | widgets.HTML( 73 | value='', 74 | description='', 75 | style=self.style, 76 | )] 77 | self.layout.visibility = 'visible' 78 | display(self) 79 | 80 | def update_max(self, splits): 81 | """ 82 | Update Max. 83 | """ 84 | self.children[0].max = splits 85 | 86 | def update_description(self, description): 87 | """ 88 | Update Description. 89 | """ 90 | self.children[0].description = description 91 | 92 | def update_status(self, splits): 93 | """ 94 | Update Status. 95 | """ 96 | self.children[0].value = splits 97 | 98 | def update_info_message(self, status): 99 | """ 100 | Update Info Messages. 101 | """ 102 | self.children[1].value = '%s' % str(status) 103 | 104 | def update_status_error(self, message): 105 | """ 106 | Update Status. 107 | """ 108 | self.children[0].bar_style = 'danger' 109 | self.children[1].value = '%s' % str(message) 110 | 111 | def update_status_success(self, message): 112 | """ 113 | Update Status. 114 | """ 115 | self.children[0].value = self.children[0].max 116 | self.children[0].bar_style = 'success' 117 | self.children[1].value = '%s' % str(message) 118 | 119 | 120 | class ParameterBox(MenuWidgets): 121 | """ 122 | Widget to drive user driven parameters. 123 | """ 124 | 125 | def __init__(self): 126 | super(ParameterBox, self).__init__() 127 | 128 | def display(self): 129 | """ 130 | Display Widget. 131 | """ 132 | display(self) 133 | 134 | def add_child(self, child): 135 | """ 136 | Add Child. 137 | """ 138 | self.children = self.children + (child,) 139 | 140 | 141 | class PrestoStatusBar(StatusBar): 142 | """ 143 | Progress Bar for Presto. 144 | """ 145 | 146 | def __init__(self, cursor): 147 | super(PrestoStatusBar, self).__init__() 148 | self.run(cursor) 149 | 150 | def run(self, cursor): 151 | """ 152 | Update Status bar. 153 | """ 154 | 155 | # Don't use recursion here. The query might run for hours and tail-rec optimization is not supported in Python. 156 | if cursor: 157 | status = 'RUNNING' 158 | total_tasks = 100 159 | completed_tasks = 0 160 | try: 161 | while status.upper() == 'RUNNING': 162 | sleep(1) 163 | data = cursor.poll() 164 | if data and 'stats' in data and 'state' in data['stats']: 165 | status = data['stats']['state'] 166 | if 'completedSplits' in data['stats'] and completed_tasks != data['stats']['completedSplits']: 167 | completed_tasks = data['stats']['completedSplits'] 168 | self.update_status(completed_tasks) 169 | if 'totalSplits' in data['stats'] and total_tasks != data['stats']['totalSplits']: 170 | total_tasks = data['stats']['totalSplits'] 171 | self.update_max(total_tasks) 172 | self.update_info_message("%s - %d/%d tasks completed" % (status, completed_tasks, total_tasks)) 173 | else: 174 | # TODO: Need to handle better way. 175 | return 176 | if status.upper() == 'FINISHED': 177 | self.update_status_success('Execution Completed.') 178 | except DatabaseError as error: 179 | self.update_status_error("Unable to execute query. Please check logs below.") 180 | raise error 181 | 182 | 183 | class HorizontalBox(widgets.HBox): 184 | """ 185 | Widgets layout in HorizontalBox. 186 | """ 187 | 188 | def __init__(self, children, data=None): 189 | super().__init__(children) 190 | self.data = data 191 | 192 | def DataFrame(self): 193 | """ 194 | Return widget data as DataFrame. 195 | """ 196 | return self.data 197 | 198 | 199 | class VerticalBox(widgets.VBox): 200 | """ 201 | Widgets layout in VerticalBox. 202 | """ 203 | 204 | def __init__(self, children, data=None): 205 | super().__init__(children) 206 | self.data = data 207 | 208 | def DataFrame(self): 209 | """ 210 | Return widget data as DataFrame. 211 | """ 212 | return self.data 213 | 214 | def csv(self, filename=None): 215 | """ 216 | Return widget data as DataFrame. 217 | """ 218 | if filename is not None: 219 | self.data.to_csv(filename) 220 | 221 | 222 | class TabView(widgets.Tab): 223 | """ 224 | Widget to Manage Tab Layout. 225 | """ 226 | 227 | def __init__(self, children, data): 228 | super().__init__(children) 229 | self.data = data 230 | for i in range(len(data)): 231 | self.set_title(i, 'Result Set ' + str(i)) 232 | 233 | def DataFrame(self, idx=-1): 234 | """ 235 | Return widget data as DataFrame. 236 | idx : Int Tab Index 237 | :return DataFrame 238 | """ 239 | return self.data[idx] 240 | 241 | def csv(self, filename=None, idx=-1): 242 | """ 243 | Return widget data as CSV. 244 | idx : Int Tab Index 245 | :return CSV 246 | """ 247 | if filename is not None: 248 | self.data[idx].to_csv(filename) 249 | 250 | 251 | class QGridCustomWidget(qgrid.QGridWidget): 252 | """ 253 | Widget to render as QGrid. 254 | """ 255 | 256 | def __init__(self, dataframe, display_limit=1000, grid_options=None): 257 | self.full_df = dataframe 258 | 259 | if grid_options is None: 260 | grid_options = { 261 | 'fullWidthRows': True, 262 | 'syncColumnCellResize': True, 263 | 'forceFitColumns': False, 264 | 'defaultColumnWidth': 150, 265 | 'rowHeight': 35, 266 | 'enableColumnReorder': True, 267 | 'enableTextSelectionOnCells': True, 268 | 'editable': True, 269 | 'autoEdit': False, 270 | 'explicitInitialization': True, 271 | 'maxVisibleRows': 10, 272 | 'minVisibleRows': 0, 273 | 'maxVisibleColumns': 10, 274 | 'minVisibleColumns': 0, 275 | 'sortable': True, 276 | 'filterable': True, 277 | 'highlightSelectedCell': True, 278 | 'highlightSelectedRow': True 279 | } 280 | 281 | super().__init__(df=self.full_df[:display_limit], grid_options=grid_options) 282 | 283 | def DataFrame(self): 284 | """ 285 | Return widget data as DataFrame. 286 | """ 287 | return self.full_df 288 | 289 | def csv(self, filename=None): 290 | """ 291 | Return widget data as CSV. 292 | """ 293 | if filename is not None: 294 | self.full_df.to_csv(filename) 295 | -------------------------------------------------------------------------------- /ppextensions/pputils/widgets/widgetsfactory.py: -------------------------------------------------------------------------------- 1 | """Copyright (c) 2018, PayPal Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | """ 25 | 26 | """Get IPyWidgets.""" 27 | from ipywidgets import VBox, Output, Button, HTML, HBox, Dropdown, Checkbox, ToggleButtons, Text, Textarea, Tab, Password 28 | 29 | 30 | class WidgetsFactory: 31 | 32 | @staticmethod 33 | def get_vbox(**kwargs): 34 | """ 35 | Get VBox Widget. 36 | """ 37 | return VBox(**kwargs) 38 | 39 | @staticmethod 40 | def get_output(**kwargs): 41 | """ 42 | Get Output. 43 | """ 44 | return Output(**kwargs) 45 | 46 | @staticmethod 47 | def get_button(**kwargs): 48 | """ 49 | Get Button. 50 | """ 51 | return Button(**kwargs) 52 | 53 | @staticmethod 54 | def get_html(value, **kwargs): 55 | """ 56 | Get HTML. 57 | """ 58 | return HTML(value, **kwargs) 59 | 60 | @staticmethod 61 | def get_hbox(**kwargs): 62 | """ 63 | Get HBox Widget. 64 | """ 65 | return HBox(**kwargs) 66 | 67 | @staticmethod 68 | def get_dropdown(**kwargs): 69 | """ 70 | Get Dropdown Widget. 71 | """ 72 | return Dropdown(**kwargs) 73 | 74 | @staticmethod 75 | def get_checkbox(**kwargs): 76 | """ 77 | Get Checkbox Widget. 78 | """ 79 | return Checkbox(**kwargs) 80 | 81 | @staticmethod 82 | def get_toggle_buttons(**kwargs): 83 | """ 84 | Get Toggle Buttons. 85 | """ 86 | return ToggleButtons(**kwargs) 87 | 88 | @staticmethod 89 | def get_text(**kwargs): 90 | """ 91 | Get Text. 92 | """ 93 | return Text(**kwargs) 94 | 95 | @staticmethod 96 | def get_password(**kwargs): 97 | """ 98 | Get Password Widget. 99 | """ 100 | return Password(**kwargs) 101 | 102 | @staticmethod 103 | def get_text_area(**kwargs): 104 | """ 105 | Get Text Area. 106 | """ 107 | return Textarea(**kwargs) 108 | 109 | @staticmethod 110 | def get_tab(**kwargs): 111 | """ 112 | Get Tab. 113 | """ 114 | return Tab(**kwargs) 115 | -------------------------------------------------------------------------------- /scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | """Package sparkmonitor 2 | 3 | This package contains two modules: 4 | kernelextension.py is the Jupyter kernel extension. 5 | serverextension.py is the Jupyter web server extension. 6 | """ 7 | 8 | def _jupyter_nbextension_paths(): 9 | """Used by 'jupyter nbextension' command to install frontend extension""" 10 | return [dict( 11 | section="tree", 12 | # the path is relative to the `my_fancy_module` directory 13 | src="static", 14 | # directory in the `nbextension/` namespace 15 | dest="scheduler", 16 | # _also_ in the `nbextension/` namespace 17 | require="scheduler/scheduler"), 18 | dict( 19 | section="tree", 20 | # the path is relative to the `my_fancy_module` directory 21 | src="static", 22 | # directory in the `nbextension/` namespace 23 | dest="scheduler", 24 | # _also_ in the `nbextension/` namespace 25 | require="scheduler/schedulermain") 26 | ] 27 | 28 | 29 | def _jupyter_server_extension_paths(): 30 | """Used by "jupyter serverextension" command to install web server extension'""" 31 | return [{ 32 | "module": "scheduler.scheduler" 33 | }] 34 | -------------------------------------------------------------------------------- /scheduler/scheduler.py: -------------------------------------------------------------------------------- 1 | from notebook.utils import url_path_join 2 | from notebook.base.handlers import IPythonHandler 3 | from shutil import copyfile 4 | from sqlalchemy import create_engine 5 | from airflow import settings, models 6 | from airflow.utils.db import provide_session 7 | 8 | import datetime 9 | import configparser 10 | import getpass 11 | import os 12 | import sqlalchemy 13 | 14 | 15 | CONNECTION_STRING = os.getenv("AIRFLOW_METADATA_CONNECTION_STRING","") 16 | AIRFLOW_HOME = os.getenv("AIRFLOW_HOME","") 17 | NOTEBOOK_STARTUP_PATH = os.getcwd() + "/" 18 | DAG_TEMPLATE = os.path.dirname(os.path.abspath(__file__)) + "/template/dag_template.py" 19 | VAR_TEMPLATE = os.path.dirname(os.path.abspath(__file__)) + "/template/var_template.conf" 20 | SCHEDULER_STATIC_FILE_PATH = os.path.dirname(os.path.abspath(__file__)) + "/static" 21 | 22 | 23 | class SchedulerHandler(IPythonHandler): 24 | try: 25 | engine = create_engine(CONNECTION_STRING) 26 | except sqlalchemy.exc.ArgumentError: 27 | pass 28 | cf = configparser.ConfigParser() 29 | 30 | @staticmethod 31 | def get_dag_id(notebook_name): 32 | return getpass.getuser() + "_" + notebook_name 33 | 34 | @staticmethod 35 | def get_dag_path(dag_id): 36 | dag_path = AIRFLOW_HOME + "/dags/dag_" + dag_id + ".py" 37 | var_path = AIRFLOW_HOME + "/variables/var_" + dag_id + ".conf" 38 | return dag_path, var_path 39 | 40 | @staticmethod 41 | def get_delta(start, interval): 42 | start = datetime.datetime.strptime(start, '%Y-%m-%d %H:%M:%S') 43 | itv = interval.split(" ") 44 | delta = datetime.timedelta(**dict([(itv[1], int(itv[0]))])) 45 | return start, delta 46 | 47 | @staticmethod 48 | @provide_session 49 | def dag_info(dag_inst, session): 50 | interval = dag_inst.schedule_interval 51 | notebook_name = dag_inst.dag_id.split('_')[1] 52 | task = dag_inst.get_task("notebook_task") 53 | start_date = task.start_date 54 | end_date = task.end_date 55 | task_instances = task.get_task_instances(session, start_date=start_date, end_date=end_date) 56 | if len(task_instances) != 0: 57 | for ti in task_instances[::-1]: 58 | dag_run = dag_inst.get_dagrun(execution_date=ti.execution_date) 59 | if dag_run.external_trigger is False: 60 | last_run_time = ti.execution_date + interval 61 | last_run_status = ti.state 62 | last_run_duration = ti.duration 63 | next_run_time = last_run_time + interval 64 | return [notebook_name, last_run_time, last_run_status, last_run_duration, next_run_time] 65 | return [notebook_name, 'N/A', 'N/A', 'N/A', task.start_date + interval] 66 | else: 67 | return [notebook_name, 'N/A', 'N/A', 'N/A', task.start_date + interval] 68 | 69 | def get_dag(self, username): 70 | dag_bag = models.DagBag(settings.DAGS_FOLDER) 71 | dag_instances = [dag_inst for (dag_id, dag_inst) in dag_bag.dags.items() if dag_inst.owner == username] 72 | dags = [] 73 | for dag_inst in dag_instances: 74 | dags.append(self.dag_info(dag_inst)) 75 | return dags 76 | 77 | def delete_dag(self, notebook_name): 78 | dag_id = self.get_dag_id(notebook_name) 79 | dag_path, var_path = self.get_dag_path(dag_id) 80 | os.remove(dag_path) 81 | os.remove(var_path) 82 | with self.engine.begin() as con: 83 | for t in ["dag", "xcom", "task_instance", "sla_miss", "log", "job", "dag_run", "task_fail", "dag_stats"]: 84 | query = "delete from {} where dag_id='{}'".format(t, dag_id) 85 | con.execute(query) 86 | 87 | def configure(self, dag_id, notebook_path, emails_failure, emails_success, start, runs, interval): 88 | dag_path, var_path = self.get_dag_path(dag_id) 89 | copyfile(DAG_TEMPLATE, dag_path) 90 | copyfile(VAR_TEMPLATE, var_path) 91 | start, delta = self.get_delta(start, interval) 92 | start -= delta 93 | if runs == "None": 94 | end = datetime.datetime.max.replace(microsecond=0) 95 | else: 96 | end = start + int(runs) * delta 97 | self.cf.read(var_path) 98 | self.cf.set("config", "dag_id", dag_id) 99 | self.cf.set("config", "username", getpass.getuser()) 100 | self.cf.set("config", "interval", interval) 101 | self.cf.set("config", "notebook_path", notebook_path) 102 | self.cf.set("config", "start", str(start)) 103 | self.cf.set("config", "end", str(end)) 104 | self.cf.set("config", "emails_failure", emails_failure) 105 | self.cf.set("config", "emails_success", emails_success) 106 | self.cf.write(open(var_path, "w")) 107 | 108 | 109 | class CreateDagHandler(SchedulerHandler): 110 | """ 111 | Backend handler to create a dag and store it in airflow dag folder when the user schedules a job. 112 | """ 113 | 114 | def post(self): 115 | notebook_name = self.get_argument('notebook_name') 116 | notebook_path = self.get_argument('notebook_path') 117 | emails_failure = self.get_argument('emails_failure') 118 | emails_success = self.get_argument('emails_success') 119 | start = self.get_argument('start') 120 | runs = self.get_argument('runs') 121 | interval = self.get_argument('interval') 122 | dag_id = self.get_dag_id(notebook_name) 123 | notebook_path = NOTEBOOK_STARTUP_PATH + notebook_path 124 | self.configure(dag_id, notebook_path, emails_failure, emails_success, start, runs, interval) 125 | self.set_status(204, "") 126 | 127 | 128 | class GetDagHandler(SchedulerHandler): 129 | """ 130 | Backend handler to get dag information and display it scheduled jobs tab 131 | """ 132 | 133 | def get(self): 134 | dag_list = self.get_dag(getpass.getuser()) 135 | base_url = self.get_argument('base_url') 136 | self.render('daginfo.html', base_url=base_url, dag_list=dag_list) 137 | 138 | 139 | class DeleteDagHandler(SchedulerHandler): 140 | """ 141 | Backend handler to delete the dag information includes: 142 | 1. All related dag and task records stored in airflow metadata db 143 | 2. Dag file in dag folder 144 | 3. Var file in variable folder 145 | """ 146 | 147 | def post(self): 148 | notebook_name = self.get_argument("notebook_name") 149 | try: 150 | self.delete_dag(notebook_name) 151 | except Exception as e: 152 | self.set_status(400) 153 | self.finish(str(e)) 154 | self.set_status(204, "") 155 | 156 | 157 | class EditDagHandler(SchedulerHandler): 158 | """ 159 | Backend handler required by the edit dag button in scheduled job tab. 160 | For get request: 161 | It will fectch all dag related information from configuration file and display it in edit menu 162 | For post request: 163 | It will update all dag related infromation in the configuration file based on user's input in edit menu 164 | """ 165 | 166 | def get(self): 167 | notebook_name = self.get_argument("notebook_name") 168 | dag_id = self.get_dag_id(notebook_name) 169 | _, var_path = self.get_dag_path(dag_id) 170 | self.cf.read(var_path) 171 | interval = self.cf.get("config", "interval") 172 | start, delta = self.get_delta(self.cf.get("config", "start"), interval) 173 | emails_failure = self.cf.get("config", "emails_failure") 174 | emails_success = self.cf.get("config", "emails_success") 175 | base_url = self.get_argument("base_url") 176 | start += delta 177 | configuration = [dag_id, start, interval, emails_failure, emails_success, base_url] 178 | self.render("editdag.html", configuration=configuration) 179 | 180 | def post(self): 181 | dag_id = self.cf.get("config", "dag_id") 182 | notebook_path = self.cf.get("config", "notebook_path") 183 | start = self.get_argument('start') 184 | freq = self.get_argument('freq') 185 | unit = self.get_argument('unit') 186 | runs = self.get_argument('runs') 187 | emails_failure = self.get_argument("emails_failure") 188 | emails_success = self.get_argument("emails_success") 189 | interval = freq + ' ' + unit 190 | self.configure(dag_id, notebook_path, emails_failure, emails_success, start, runs, interval) 191 | self.set_status(204, "") 192 | 193 | 194 | class CheckDagHandler(SchedulerHandler): 195 | """ 196 | Backend handler to check whether the dag is already existed or not 197 | """ 198 | 199 | def get(self): 200 | dag_bag = models.DagBag(settings.DAGS_FOLDER) 201 | notebook_name = self.get_argument("notebook_name") 202 | dag_id = self.get_dag_id(notebook_name) 203 | if dag_id in dag_bag.dags: 204 | self.finish("True") 205 | else: 206 | self.finish("False") 207 | 208 | 209 | def load_jupyter_server_extension(nb_server_app): 210 | """ 211 | Called when the extension is loaded. 212 | 213 | Args: 214 | nb_server_app (NotebookWebApplication): handle to the Notebook webserver instance. 215 | """ 216 | web_app = nb_server_app.web_app 217 | 218 | handlers = [ 219 | (r'/scheduler/create_dag', CreateDagHandler), 220 | (r'/scheduler/get_dag', GetDagHandler), 221 | (r'/scheduler/delete_dag', DeleteDagHandler), 222 | (r'/scheduler/edit_dag', EditDagHandler), 223 | (r'/scheduler/check_dag', CheckDagHandler) 224 | ] 225 | web_app.settings['template_path'] = SCHEDULER_STATIC_FILE_PATH 226 | base_url = web_app.settings['base_url'] 227 | handlers = [(url_path_join(base_url, h[0]), h[1]) for h in handlers] 228 | 229 | host_pattern = '.*$' 230 | web_app.add_handlers(host_pattern, handlers) 231 | -------------------------------------------------------------------------------- /scheduler/static/daginfo.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | {% for dag in dag_list %} 14 | 15 | 16 | 17 | 18 | 19 | 20 | 26 | 29 | 30 | {% end %} 31 | 32 |
Notebook NameLast Run TimeLast Run StatusLast Run DurationNext Scheduled RunAction
{{ dag[0] }}{{ dag[1] }}{{ dag[2] }}{{ dag[3] }}{{ dag[4] }} 21 | 22 | 23 | {% raw xsrf_form_html() %} 24 | 25 | 27 | 28 |
33 |
34 | 35 |
36 |
37 | 38 |
39 |
40 | 41 | 67 | 68 | -------------------------------------------------------------------------------- /scheduler/static/editdag.html: -------------------------------------------------------------------------------- 1 | Configuration for job: {{ configuration[0] }} 2 |
3 |
4 |
5 |
6 | 7 | 9 |
10 |
11 | 12 | 17 |
18 |
19 | 20 | 21 |
22 | 23 |
24 | 25 | 26 |
27 |
28 | 29 | 40 |
41 |
42 |
43 | 44 | 45 |
46 | 47 |
48 | 49 | 50 |
51 |
52 |
53 |
54 | 55 |
56 |
57 | 147 | 148 | -------------------------------------------------------------------------------- /scheduler/static/scheduler.js: -------------------------------------------------------------------------------- 1 | define(["base/js/namespace", "base/js/dialog", "tree/js/notebooklist", "base/js/utils", "jquery"], function (Jupyter, dialog, notebooklist, utils, $) { 2 | var ScheduleOperation = function () { 3 | this.base_url = Jupyter.notebook_list.base_url; 4 | this.bind_events(); 5 | }; 6 | 7 | ScheduleOperation.prototype = Object.create(notebooklist.NotebookList.prototype); 8 | 9 | ScheduleOperation.prototype.bind_events = function () { 10 | var that = this; 11 | $(".schedule-button").click($.proxy(that.schedule_selected, this)); 12 | }; 13 | 14 | ScheduleOperation.prototype.schedule_selected = function () { 15 | var that = this; 16 | var selected = Jupyter.notebook_list.selected; 17 | if (selected.length > 1) { 18 | alert("Cannot schedule more than one notebook at the same time!"); 19 | return false; 20 | } 21 | 22 | // Part1 Schedule 23 | var lst = Array(20).fill(0).map((i, j) => j + 1); 24 | var every_num = $(""); 25 | $.each(lst, function (i, el) { every_num.append(new Option(el, el)); }); 26 | var every_unit = $(""); 27 | var unit_list = ["hours", "days", "weeks"]; 28 | $.each(unit_list, function (i, el) { every_unit.append(new Option(el, el)); }); 29 | 30 | every_unit.change(function () { 31 | switch (every_unit.val()) { 32 | case "hours": 33 | lst = Array(20).fill(0).map((i, j) => j + 1); 34 | break; 35 | case "days": 36 | lst = Array(30).fill(0).map((i, j) => j + 1); 37 | break; 38 | case "weeks": 39 | lst = Array(52).fill(0).map((i, j) => j + 1); 40 | } 41 | every_num.empty(); 42 | $.each(lst, function (i, el) { every_num.append(new Option(el, el)); }); 43 | }); 44 | 45 | var start_time = $("").val("00:00"); 46 | var start_date = $("").val(new Date().toISOString().split("T")[0]); 47 | var runs = $(""); 48 | var runs_list = ["None", "1 time", "2 times", "3 times", "4 times", "5 times", "10 times", "50 times", "100 times"]; 49 | $.each(runs_list, function (i, el) { runs.append(new Option(el, el)); }); 50 | 51 | var schedule_part = $("
") 52 | .append("") 53 | .append(every_num) 54 | .append(every_unit) 55 | .append("
") 56 | .append("") 57 | .append(start_time) 58 | .append("
") 59 | .append("") 60 | .append(start_date) 61 | .append("
") 62 | .append("