├── .gitignore
├── docs
    ├── build
    │   ├── html
    │   │   ├── .nojekyll
    │   │   ├── _static
    │   │   │   ├── custom.css
    │   │   │   ├── down.png
    │   │   │   ├── file.png
    │   │   │   ├── plus.png
    │   │   │   ├── up.png
    │   │   │   ├── minus.png
    │   │   │   ├── comment.png
    │   │   │   ├── ajax-loader.gif
    │   │   │   ├── up-pressed.png
    │   │   │   ├── comment-bright.png
    │   │   │   ├── comment-close.png
    │   │   │   ├── down-pressed.png
    │   │   │   ├── pygments.css
    │   │   │   ├── doctools.js
    │   │   │   ├── underscore.js
    │   │   │   ├── basic.css
    │   │   │   ├── alabaster.css
    │   │   │   ├── searchtools.js
    │   │   │   └── websupport.js
    │   │   ├── objects.inv
    │   │   ├── .buildinfo
    │   │   ├── _sources
    │   │   │   └── index.rst.txt
    │   │   ├── searchindex.js
    │   │   ├── _modules
    │   │   │   └── index.html
    │   │   ├── search.html
    │   │   ├── py-modindex.html
    │   │   ├── genindex.html
    │   │   └── index.html
    │   └── doctrees
    │   │   ├── index.doctree
    │   │   └── environment.pickle
    ├── README_files
    │   └── README_5_0.png
    ├── Makefile
    ├── source
    │   ├── index.rst
    │   └── conf.py
    └── README.md
├── pyspark_dist_explore
    ├── tests
    │   ├── __init__.py
    │   ├── test_pyspark_dist_explore.pyc
    │   └── test_pyspark_dist_explore.py
    ├── __init__.py
    ├── requirements.txt
    └── pyspark_dist_explore.py
├── pyspark_dist_explore.egg-info
    ├── not-zip-safe
    ├── dependency_links.txt
    ├── top_level.txt
    ├── requires.txt
    ├── PKG-INFO
    └── SOURCES.txt
├── README_files
    ├── README_4_0.png
    ├── README_5_0.png
    ├── README_5_1.png
    ├── README_7_0.png
    ├── README_7_1.png
    ├── README_8_0.png
    └── README_8_1.png
├── setup.py
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | 


--------------------------------------------------------------------------------
/docs/build/html/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore.egg-info/not-zip-safe:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pyspark_dist_explore
2 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | numpy
3 | scipy
4 | matplotlib
5 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore/__init__.py:
--------------------------------------------------------------------------------
1 | from .pyspark_dist_explore import Histogram, hist, distplot, pandas_histogram


--------------------------------------------------------------------------------
/README_files/README_4_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/README_files/README_4_0.png


--------------------------------------------------------------------------------
/README_files/README_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/README_files/README_5_0.png


--------------------------------------------------------------------------------
/README_files/README_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/README_files/README_5_1.png


--------------------------------------------------------------------------------
/README_files/README_7_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/README_files/README_7_0.png


--------------------------------------------------------------------------------
/README_files/README_7_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/README_files/README_7_1.png


--------------------------------------------------------------------------------
/README_files/README_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/README_files/README_8_0.png


--------------------------------------------------------------------------------
/README_files/README_8_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/README_files/README_8_1.png


--------------------------------------------------------------------------------
/docs/build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/objects.inv


--------------------------------------------------------------------------------
/docs/README_files/README_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/README_files/README_5_0.png


--------------------------------------------------------------------------------
/docs/build/html/_static/down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/down.png


--------------------------------------------------------------------------------
/docs/build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/file.png


--------------------------------------------------------------------------------
/docs/build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/plus.png


--------------------------------------------------------------------------------
/docs/build/html/_static/up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/up.png


--------------------------------------------------------------------------------
/docs/build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/minus.png


--------------------------------------------------------------------------------
/docs/build/html/_static/comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/comment.png


--------------------------------------------------------------------------------
/docs/build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/build/html/_static/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/ajax-loader.gif


--------------------------------------------------------------------------------
/docs/build/html/_static/up-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/up-pressed.png


--------------------------------------------------------------------------------
/docs/build/html/_static/comment-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/comment-bright.png


--------------------------------------------------------------------------------
/docs/build/html/_static/comment-close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/comment-close.png


--------------------------------------------------------------------------------
/docs/build/html/_static/down-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/docs/build/html/_static/down-pressed.png


--------------------------------------------------------------------------------
/pyspark_dist_explore/tests/test_pyspark_dist_explore.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bergvca/pyspark_dist_explore/HEAD/pyspark_dist_explore/tests/test_pyspark_dist_explore.pyc


--------------------------------------------------------------------------------
/pyspark_dist_explore/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=0.20.1
2 | spark_testing_base>=0.6.0
3 | numpy>=1.12.1
4 | scipy>=0.19.0
5 | matplotlib>=2.0.2
6 | findspark>=1.1.0
7 | pyspark>=2.2.2
8 | 


--------------------------------------------------------------------------------
/docs/build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 5a1addecaae6ebf371e18734a5b42090
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: pyspark-dist-explore
 3 | Version: 0.1.5
 4 | Summary: Create histogram and density plots from PySpark Dataframes
 5 | Home-page: UNKNOWN
 6 | Author: Chris van den Berg
 7 | Author-email: fake_email@gmail.com
 8 | License: MIT License
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/pyspark_dist_explore.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | README.md
 2 | setup.py
 3 | pyspark_dist_explore/__init__.py
 4 | pyspark_dist_explore/pyspark_dist_explore.py
 5 | pyspark_dist_explore.egg-info/PKG-INFO
 6 | pyspark_dist_explore.egg-info/SOURCES.txt
 7 | pyspark_dist_explore.egg-info/dependency_links.txt
 8 | pyspark_dist_explore.egg-info/not-zip-safe
 9 | pyspark_dist_explore.egg-info/requires.txt
10 | pyspark_dist_explore.egg-info/top_level.txt


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = pyspark_dist_explore
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='pyspark_dist_explore',
 5 |     version='0.1.7',
 6 |     packages=['pyspark_dist_explore'],
 7 |     license='MIT License',
 8 |     description='Create histogram and density plots from PySpark Dataframes',
 9 |     author='Chris van den Berg',
10 |     author_email='fake_email@gmail.com',
11 |     zip_safe=False,
12 |     install_requires=['pandas'
13 |                       , 'numpy'
14 |                       , 'scipy'
15 |                       , 'matplotlib'
16 |                       # , 'spark_testing_base' # Only required for testing
17 |                       # , 'findspark' # Only required for testing
18 |                       ]
19 | 
20 | )
21 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. pyspark_histogram documentation master file, created by
 2 |    sphinx-quickstart on Tue Jun 27 16:46:34 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to pyspark_histogram's documentation!
 7 | =============================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 
22 | 
23 | Documentation for the Code
24 | **************************
25 | .. automodule:: pyspark_dist_explore
26 |    :members: hist, distplot, pandas_histogram
27 | 
28 | 
29 | .. autoclass:: Histogram
30 |    :members: add_data, add_column, build, to_pandas, plot_hist, plot_density
31 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | .. pyspark_histogram documentation master file, created by
 2 |    sphinx-quickstart on Tue Jun 27 16:46:34 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to pyspark_histogram's documentation!
 7 | =============================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 
22 | 
23 | Documentation for the Code
24 | **************************
25 | .. automodule:: pyspark_dist_explore
26 |    :members: hist, distplot, pandas_histogram
27 | 
28 | 
29 | .. autoclass:: Histogram
30 |    :members: add_data, add_column, build, to_pandas, plot_hist, plot_density
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Chris van den Berg
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/build/html/searchindex.js:
--------------------------------------------------------------------------------
1 | Search.setIndex({docnames:["index"],envversion:51,filenames:["index.rst"],objects:{"":{pyspark_dist_explore:[0,0,0,"-"]},"pyspark_dist_explore.Histogram":{add_column:[0,2,1,""],add_data:[0,2,1,""],build:[0,2,1,""],plot_density:[0,2,1,""],plot_hist:[0,2,1,""],to_pandas:[0,2,1,""]},pyspark_dist_explore:{Histogram:[0,1,1,""],distplot:[0,3,1,""],hist:[0,3,1,""],pandas_histogram:[0,3,1,""]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"class":0,"default":0,"function":0,"int":0,"new":0,"return":0,"true":0,Ads:0,Axes:0,The:0,Uses:0,add:0,add_column:0,add_data:0,again:0,all:0,alreadi:0,alwai:0,append:0,arg:0,argument:0,arrai:0,array_lik:0,autosc:0,avail:0,axi:0,base:0,becom:0,been:0,better:0,bin:0,bool:0,boundari:0,bucket:0,build:0,calcul:0,call:0,can:0,center:0,close:0,column:0,consist:0,contain:0,creat:0,data1:0,data2:0,data:0,datafram:0,dataset:0,densiti:0,descript:0,distplot:0,doesn:0,done:0,each:0,edg:0,effect:0,even:0,except:0,fals:0,format:0,formatted_yaxi:0,from:0,gener:0,given:0,group:0,has:0,hist:0,histogram:0,ignor:0,index:0,individu:0,input:0,instead:0,integ:0,interfac:0,keyword:0,kind:0,kwarg:0,last:0,left:0,length:0,leverag:0,line:0,list:0,lower:0,make:0,matplotlib:0,max:0,mean:0,min:0,modul:0,more:0,multi:0,multipl:0,name:0,nbin:0,next:0,none:0,norm:0,normal:0,normalis:0,num:0,number:0,numer:0,numpi:0,obj:[],object:0,one:0,open:0,option:0,order:0,orient:0,outlier:0,overlap:0,page:0,panda:0,pandas_histogram:0,pass:0,patch:0,plot:0,plot_dens:0,plot_hist:0,possibl:0,predefin:0,provid:0,put:0,pyplot:0,pyspark:0,pyspark_dist_explor:0,rang:0,readabl:0,right:0,same:0,scale:0,search:0,see:0,semant:0,sequenc:0,set:0,share:0,silent:0,singl:0,sourc:0,space:0,spark:0,specifi:0,str:0,style:0,support:0,text:0,thi:0,to_panda:0,tupl:0,type:0,unequ:0,upper:0,use_log10:[],used:0,using:0,valu:0,version:0,visual:0,weight:0,when:0,where:0,which:0,yaxi:0,yet:0},titles:["Welcome to pyspark_histogram&#8217;s documentation!"],titleterms:{code:0,document:0,indic:0,pyspark_histogram:0,tabl:0,welcom:0}})


--------------------------------------------------------------------------------
/docs/build/html/_modules/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>Overview: module code &#8212; pyspark_dist_explore 0.1.0 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
12 |     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    '../',
17 |         VERSION:     '0.1.0',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true,
21 |         SOURCELINK_SUFFIX: '.txt'
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="../_static/jquery.js"></script>
25 |     <script type="text/javascript" src="../_static/underscore.js"></script>
26 |     <script type="text/javascript" src="../_static/doctools.js"></script>
27 |     <link rel="index" title="Index" href="../genindex.html" />
28 |     <link rel="search" title="Search" href="../search.html" />
29 |    
30 |   <link rel="stylesheet" href="../_static/custom.css" type="text/css" />
31 |   
32 |   
33 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
34 | 
35 |   </head>
36 |   <body role="document">
37 |   
38 | 
39 |     <div class="document">
40 |       <div class="documentwrapper">
41 |         <div class="bodywrapper">
42 |           <div class="body" role="main">
43 |             
44 |   <h1>All modules for which code is available</h1>
45 | <ul><li><a href="pyspark_dist_explore/pyspark_dist_explore.html">pyspark_dist_explore.pyspark_dist_explore</a></li>
46 | </ul>
47 | 
48 |           </div>
49 |         </div>
50 |       </div>
51 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
52 |         <div class="sphinxsidebarwrapper"><div class="relations">
53 | <h3>Related Topics</h3>
54 | <ul>
55 |   <li><a href="../index.html">Documentation overview</a><ul>
56 |   </ul></li>
57 | </ul>
58 | </div>
59 | <div id="searchbox" style="display: none" role="search">
60 |   <h3>Quick search</h3>
61 |     <form class="search" action="../search.html" method="get">
62 |       <div><input type="text" name="q" /></div>
63 |       <div><input type="submit" value="Go" /></div>
64 |       <input type="hidden" name="check_keywords" value="yes" />
65 |       <input type="hidden" name="area" value="default" />
66 |     </form>
67 | </div>
68 | <script type="text/javascript">$('#searchbox').show(0);</script>
69 |         </div>
70 |       </div>
71 |       <div class="clearer"></div>
72 |     </div>
73 |     <div class="footer">
74 |       &copy;2017, Chris van den Berg.
75 |       
76 |       |
77 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.5.6</a>
78 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.10</a>
79 |       
80 |     </div>
81 | 
82 |     
83 | 
84 |     
85 |   </body>
86 | </html>


--------------------------------------------------------------------------------
/docs/build/html/search.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Search &#8212; pyspark_dist_explore 0.1.0 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.1.0',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="_static/doctools.js"></script>
 27 |     <script type="text/javascript" src="_static/searchtools.js"></script>
 28 |     <link rel="index" title="Index" href="genindex.html" />
 29 |     <link rel="search" title="Search" href="#" />
 30 |   <script type="text/javascript">
 31 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
 32 |   </script>
 33 |   
 34 |   <script type="text/javascript" id="searchindexloader"></script>
 35 |   
 36 |    
 37 |   <link rel="stylesheet" href="_static/custom.css" type="text/css" />
 38 |   
 39 |   
 40 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 41 | 
 42 | 
 43 |   </head>
 44 |   <body role="document">
 45 |   
 46 | 
 47 |     <div class="document">
 48 |       <div class="documentwrapper">
 49 |         <div class="bodywrapper">
 50 |           <div class="body" role="main">
 51 |             
 52 |   <h1 id="search-documentation">Search</h1>
 53 |   <div id="fallback" class="admonition warning">
 54 |   <script type="text/javascript">$('#fallback').hide();</script>
 55 |   <p>
 56 |     Please activate JavaScript to enable the search
 57 |     functionality.
 58 |   </p>
 59 |   </div>
 60 |   <p>
 61 |     From here you can search these documents. Enter your search
 62 |     words into the box below and click "search". Note that the search
 63 |     function will automatically search for all of the words. Pages
 64 |     containing fewer words won't appear in the result list.
 65 |   </p>
 66 |   <form action="" method="get">
 67 |     <input type="text" name="q" value="" />
 68 |     <input type="submit" value="search" />
 69 |     <span id="search-progress" style="padding-left: 10px"></span>
 70 |   </form>
 71 |   
 72 |   <div id="search-results">
 73 |   
 74 |   </div>
 75 | 
 76 |           </div>
 77 |         </div>
 78 |       </div>
 79 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 80 |         <div class="sphinxsidebarwrapper"><div class="relations">
 81 | <h3>Related Topics</h3>
 82 | <ul>
 83 |   <li><a href="index.html">Documentation overview</a><ul>
 84 |   </ul></li>
 85 | </ul>
 86 | </div>
 87 |         </div>
 88 |       </div>
 89 |       <div class="clearer"></div>
 90 |     </div>
 91 |     <div class="footer">
 92 |       &copy;2017, Chris van den Berg.
 93 |       
 94 |       |
 95 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.5.6</a>
 96 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.10</a>
 97 |       
 98 |     </div>
 99 | 
100 |     
101 | 
102 |     
103 |   </body>
104 | </html>


--------------------------------------------------------------------------------
/docs/build/html/py-modindex.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Python Module Index &#8212; pyspark_dist_explore 0.1.0 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.1.0',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="_static/doctools.js"></script>
 27 |     <link rel="index" title="Index" href="genindex.html" />
 28 |     <link rel="search" title="Search" href="search.html" />
 29 | 
 30 |    
 31 |   <link rel="stylesheet" href="_static/custom.css" type="text/css" />
 32 |   
 33 |   
 34 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 35 | 
 36 | 
 37 |     <script type="text/javascript">
 38 |       DOCUMENTATION_OPTIONS.COLLAPSE_INDEX = true;
 39 |     </script>
 40 | 
 41 | 
 42 |   </head>
 43 |   <body role="document">
 44 |   
 45 | 
 46 |     <div class="document">
 47 |       <div class="documentwrapper">
 48 |         <div class="bodywrapper">
 49 |           <div class="body" role="main">
 50 |             
 51 | 
 52 |    <h1>Python Module Index</h1>
 53 | 
 54 |    <div class="modindex-jumpbox">
 55 |    <a href="#cap-p"><strong>p</strong></a>
 56 |    </div>
 57 | 
 58 |    <table class="indextable modindextable">
 59 |      <tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
 60 |      <tr class="cap" id="cap-p"><td></td><td>
 61 |        <strong>p</strong></td><td></td></tr>
 62 |      <tr>
 63 |        <td></td>
 64 |        <td>
 65 |        <a href="index.html#module-pyspark_dist_explore"><code class="xref">pyspark_dist_explore</code></a></td><td>
 66 |        <em></em></td></tr>
 67 |    </table>
 68 | 
 69 | 
 70 |           </div>
 71 |         </div>
 72 |       </div>
 73 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 74 |         <div class="sphinxsidebarwrapper"><div class="relations">
 75 | <h3>Related Topics</h3>
 76 | <ul>
 77 |   <li><a href="index.html">Documentation overview</a><ul>
 78 |   </ul></li>
 79 | </ul>
 80 | </div>
 81 | <div id="searchbox" style="display: none" role="search">
 82 |   <h3>Quick search</h3>
 83 |     <form class="search" action="search.html" method="get">
 84 |       <div><input type="text" name="q" /></div>
 85 |       <div><input type="submit" value="Go" /></div>
 86 |       <input type="hidden" name="check_keywords" value="yes" />
 87 |       <input type="hidden" name="area" value="default" />
 88 |     </form>
 89 | </div>
 90 | <script type="text/javascript">$('#searchbox').show(0);</script>
 91 |         </div>
 92 |       </div>
 93 |       <div class="clearer"></div>
 94 |     </div>
 95 |     <div class="footer">
 96 |       &copy;2017, Chris van den Berg.
 97 |       
 98 |       |
 99 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.5.6</a>
100 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.10</a>
101 |       
102 |     </div>
103 | 
104 |     
105 | 
106 |     
107 |   </body>
108 | </html>


--------------------------------------------------------------------------------
/docs/build/html/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #eeffcc; }
 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #333333 } /* Generic.Output */
19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #902000 } /* Keyword.Type */
29 | .highlight .m { color: #208050 } /* Literal.Number */
30 | .highlight .s { color: #4070a0 } /* Literal.String */
31 | .highlight .na { color: #4070a0 } /* Name.Attribute */
32 | .highlight .nb { color: #007020 } /* Name.Builtin */
33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #60add5 } /* Name.Constant */
35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #007020 } /* Name.Exception */
38 | .highlight .nf { color: #06287e } /* Name.Function */
39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */
51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */
65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # pyspark_dist_explore documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Jun 27 16:46:34 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | # import os
 20 | # import sys
 21 | # sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #
 28 | # needs_sphinx = '1.0'
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = ['sphinx.ext.autodoc',
 34 |     'sphinx.ext.intersphinx',
 35 |     'sphinx.ext.ifconfig',
 36 |     'sphinx.ext.viewcode',
 37 |     'sphinx.ext.githubpages']
 38 | 
 39 | # Add any paths that contain templates here, relative to this directory.
 40 | templates_path = ['_templates']
 41 | 
 42 | # The suffix(es) of source filenames.
 43 | # You can specify multiple suffix as a list of string:
 44 | #
 45 | # source_suffix = ['.rst', '.md']
 46 | source_suffix = '.rst'
 47 | 
 48 | # The master toctree document.
 49 | master_doc = 'index'
 50 | 
 51 | # General information about the project.
 52 | project = u'pyspark_dist_explore'
 53 | copyright = u'2017, Chris van den Berg'
 54 | author = u'Chris van den Berg'
 55 | 
 56 | # The version info for the project you're documenting, acts as replacement for
 57 | # |version| and |release|, also used in various other places throughout the
 58 | # built documents.
 59 | #
 60 | # The short X.Y version.
 61 | version = u'0.1.0'
 62 | # The full version, including alpha/beta/rc tags.
 63 | release = u'0.1.0'
 64 | 
 65 | # The language for content autogenerated by Sphinx. Refer to documentation
 66 | # for a list of supported languages.
 67 | #
 68 | # This is also used if you do content translation via gettext catalogs.
 69 | # Usually you set "language" from the command line for these cases.
 70 | language = None
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | # This patterns also effect to html_static_path and html_extra_path
 75 | exclude_patterns = []
 76 | 
 77 | # The name of the Pygments (syntax highlighting) style to use.
 78 | pygments_style = 'sphinx'
 79 | 
 80 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 81 | todo_include_todos = False
 82 | 
 83 | 
 84 | # -- Options for HTML output ----------------------------------------------
 85 | 
 86 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 87 | # a list of builtin themes.
 88 | #
 89 | html_theme = 'alabaster'
 90 | 
 91 | # Theme options are theme-specific and customize the look and feel of a theme
 92 | # further.  For a list of options available for each theme, see the
 93 | # documentation.
 94 | #
 95 | # html_theme_options = {}
 96 | 
 97 | # Add any paths that contain custom static files (such as style sheets) here,
 98 | # relative to this directory. They are copied after the builtin static files,
 99 | # so a file named "default.css" will overwrite the builtin "default.css".
100 | html_static_path = ['_static']
101 | 
102 | 
103 | # -- Options for HTMLHelp output ------------------------------------------
104 | 
105 | # Output file base name for HTML help builder.
106 | htmlhelp_basename = 'pyspark_histogramdoc'
107 | 
108 | 
109 | # -- Options for LaTeX output ---------------------------------------------
110 | 
111 | latex_elements = {
112 |     # The paper size ('letterpaper' or 'a4paper').
113 |     #
114 |     # 'papersize': 'letterpaper',
115 | 
116 |     # The font size ('10pt', '11pt' or '12pt').
117 |     #
118 |     # 'pointsize': '10pt',
119 | 
120 |     # Additional stuff for the LaTeX preamble.
121 |     #
122 |     # 'preamble': '',
123 | 
124 |     # Latex figure (float) alignment
125 |     #
126 |     # 'figure_align': 'htbp',
127 | }
128 | 
129 | # Grouping the document tree into LaTeX files. List of tuples
130 | # (source start file, target name, title,
131 | #  author, documentclass [howto, manual, or own class]).
132 | latex_documents = [
133 |     (master_doc, 'pyspark_dist_explore.tex', u'pyspark\\_histogram Documentation',
134 |      u'Chris van den Berg', 'manual'),
135 | ]
136 | 
137 | 
138 | # -- Options for manual page output ---------------------------------------
139 | 
140 | # One entry per manual page. List of tuples
141 | # (source start file, name, description, authors, manual section).
142 | man_pages = [
143 |     (master_doc, 'pyspark_dist_explore', u'pyspark_dist_explore Documentation',
144 |      [author], 1)
145 | ]
146 | 
147 | 
148 | # -- Options for Texinfo output -------------------------------------------
149 | 
150 | # Grouping the document tree into Texinfo files. List of tuples
151 | # (source start file, target name, title, author,
152 | #  dir menu entry, description, category)
153 | texinfo_documents = [
154 |     (master_doc, 'pyspark_dist_explore', u'pyspark_dist_explore Documentation',
155 |      author, 'pyspark_dist_explore', 'One line description of project.',
156 |      'Miscellaneous'),
157 | ]
158 | 
159 | 
160 | 
161 | 
162 | # Example configuration for intersphinx: refer to the Python standard library.
163 | intersphinx_mapping = {'https://docs.python.org/': None}
164 | 
165 | import sys
166 | import os
167 | sys.path.append(os.path.abspath('../../'))
168 | 


--------------------------------------------------------------------------------
/docs/build/html/genindex.html:
--------------------------------------------------------------------------------
  1 | 
  2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  4 | 
  5 | 
  6 | <html xmlns="http://www.w3.org/1999/xhtml">
  7 |   <head>
  8 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  9 |     
 10 |     <title>Index &#8212; pyspark_dist_explore 0.1.0 documentation</title>
 11 |     
 12 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
 13 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 14 |     
 15 |     <script type="text/javascript">
 16 |       var DOCUMENTATION_OPTIONS = {
 17 |         URL_ROOT:    './',
 18 |         VERSION:     '0.1.0',
 19 |         COLLAPSE_INDEX: false,
 20 |         FILE_SUFFIX: '.html',
 21 |         HAS_SOURCE:  true,
 22 |         SOURCELINK_SUFFIX: '.txt'
 23 |       };
 24 |     </script>
 25 |     <script type="text/javascript" src="_static/jquery.js"></script>
 26 |     <script type="text/javascript" src="_static/underscore.js"></script>
 27 |     <script type="text/javascript" src="_static/doctools.js"></script>
 28 |     <link rel="index" title="Index" href="#" />
 29 |     <link rel="search" title="Search" href="search.html" />
 30 |    
 31 |   <link rel="stylesheet" href="_static/custom.css" type="text/css" />
 32 |   
 33 |   
 34 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 35 | 
 36 |   </head>
 37 |   <body role="document">
 38 |   
 39 | 
 40 |     <div class="document">
 41 |       <div class="documentwrapper">
 42 |         <div class="bodywrapper">
 43 |           <div class="body" role="main">
 44 |             
 45 | 
 46 | <h1 id="index">Index</h1>
 47 | 
 48 | <div class="genindex-jumpbox">
 49 |  <a href="#A"><strong>A</strong></a>
 50 |  | <a href="#B"><strong>B</strong></a>
 51 |  | <a href="#D"><strong>D</strong></a>
 52 |  | <a href="#H"><strong>H</strong></a>
 53 |  | <a href="#P"><strong>P</strong></a>
 54 |  | <a href="#T"><strong>T</strong></a>
 55 |  
 56 | </div>
 57 | <h2 id="A">A</h2>
 58 | <table style="width: 100%" class="indextable genindextable"><tr>
 59 |   <td style="width: 33%; vertical-align: top;"><ul>
 60 |       <li><a href="index.html#pyspark_dist_explore.Histogram.add_column">add_column() (pyspark_dist_explore.Histogram method)</a>
 61 | </li>
 62 |   </ul></td>
 63 |   <td style="width: 33%; vertical-align: top;"><ul>
 64 |       <li><a href="index.html#pyspark_dist_explore.Histogram.add_data">add_data() (pyspark_dist_explore.Histogram method)</a>
 65 | </li>
 66 |   </ul></td>
 67 | </tr></table>
 68 | 
 69 | <h2 id="B">B</h2>
 70 | <table style="width: 100%" class="indextable genindextable"><tr>
 71 |   <td style="width: 33%; vertical-align: top;"><ul>
 72 |       <li><a href="index.html#pyspark_dist_explore.Histogram.build">build() (pyspark_dist_explore.Histogram method)</a>
 73 | </li>
 74 |   </ul></td>
 75 | </tr></table>
 76 | 
 77 | <h2 id="D">D</h2>
 78 | <table style="width: 100%" class="indextable genindextable"><tr>
 79 |   <td style="width: 33%; vertical-align: top;"><ul>
 80 |       <li><a href="index.html#pyspark_dist_explore.distplot">distplot() (in module pyspark_dist_explore)</a>
 81 | </li>
 82 |   </ul></td>
 83 | </tr></table>
 84 | 
 85 | <h2 id="H">H</h2>
 86 | <table style="width: 100%" class="indextable genindextable"><tr>
 87 |   <td style="width: 33%; vertical-align: top;"><ul>
 88 |       <li><a href="index.html#pyspark_dist_explore.hist">hist() (in module pyspark_dist_explore)</a>
 89 | </li>
 90 |   </ul></td>
 91 |   <td style="width: 33%; vertical-align: top;"><ul>
 92 |       <li><a href="index.html#pyspark_dist_explore.Histogram">Histogram (class in pyspark_dist_explore)</a>
 93 | </li>
 94 |   </ul></td>
 95 | </tr></table>
 96 | 
 97 | <h2 id="P">P</h2>
 98 | <table style="width: 100%" class="indextable genindextable"><tr>
 99 |   <td style="width: 33%; vertical-align: top;"><ul>
100 |       <li><a href="index.html#pyspark_dist_explore.pandas_histogram">pandas_histogram() (in module pyspark_dist_explore)</a>
101 | </li>
102 |       <li><a href="index.html#pyspark_dist_explore.Histogram.plot_density">plot_density() (pyspark_dist_explore.Histogram method)</a>
103 | </li>
104 |   </ul></td>
105 |   <td style="width: 33%; vertical-align: top;"><ul>
106 |       <li><a href="index.html#pyspark_dist_explore.Histogram.plot_hist">plot_hist() (pyspark_dist_explore.Histogram method)</a>
107 | </li>
108 |       <li><a href="index.html#module-pyspark_dist_explore">pyspark_dist_explore (module)</a>
109 | </li>
110 |   </ul></td>
111 | </tr></table>
112 | 
113 | <h2 id="T">T</h2>
114 | <table style="width: 100%" class="indextable genindextable"><tr>
115 |   <td style="width: 33%; vertical-align: top;"><ul>
116 |       <li><a href="index.html#pyspark_dist_explore.Histogram.to_pandas">to_pandas() (pyspark_dist_explore.Histogram method)</a>
117 | </li>
118 |   </ul></td>
119 | </tr></table>
120 | 
121 | 
122 | 
123 |           </div>
124 |         </div>
125 |       </div>
126 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
127 |         <div class="sphinxsidebarwrapper">
128 | 
129 |    <div class="relations">
130 | <h3>Related Topics</h3>
131 | <ul>
132 |   <li><a href="index.html">Documentation overview</a><ul>
133 |   </ul></li>
134 | </ul>
135 | </div>
136 | 
137 | <div id="searchbox" style="display: none" role="search">
138 |   <h3>Quick search</h3>
139 |     <form class="search" action="search.html" method="get">
140 |       <div><input type="text" name="q" /></div>
141 |       <div><input type="submit" value="Go" /></div>
142 |       <input type="hidden" name="check_keywords" value="yes" />
143 |       <input type="hidden" name="area" value="default" />
144 |     </form>
145 | </div>
146 | <script type="text/javascript">$('#searchbox').show(0);</script>
147 |         </div>
148 |       </div>
149 |       <div class="clearer"></div>
150 |     </div>
151 |     <div class="footer">
152 |       &copy;2017, Chris van den Berg.
153 |       
154 |       |
155 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.5.6</a>
156 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.10</a>
157 |       
158 |     </div>
159 | 
160 |     
161 | 
162 |     
163 |   </body>
164 | </html>


--------------------------------------------------------------------------------
/docs/build/html/_static/doctools.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * doctools.js
  3 |  * ~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx JavaScript utilities for all documentation.
  6 |  *
  7 |  * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | /**
 13 |  * select a different prefix for underscore
 14 |  */
 15 | $u = _.noConflict();
 16 | 
 17 | /**
 18 |  * make the code below compatible with browsers without
 19 |  * an installed firebug like debugger
 20 | if (!window.console || !console.firebug) {
 21 |   var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
 22 |     "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
 23 |     "profile", "profileEnd"];
 24 |   window.console = {};
 25 |   for (var i = 0; i < names.length; ++i)
 26 |     window.console[names[i]] = function() {};
 27 | }
 28 |  */
 29 | 
 30 | /**
 31 |  * small helper function to urldecode strings
 32 |  */
 33 | jQuery.urldecode = function(x) {
 34 |   return decodeURIComponent(x).replace(/\+/g, ' ');
 35 | };
 36 | 
 37 | /**
 38 |  * small helper function to urlencode strings
 39 |  */
 40 | jQuery.urlencode = encodeURIComponent;
 41 | 
 42 | /**
 43 |  * This function returns the parsed url parameters of the
 44 |  * current request. Multiple values per key are supported,
 45 |  * it will always return arrays of strings for the value parts.
 46 |  */
 47 | jQuery.getQueryParameters = function(s) {
 48 |   if (typeof s == 'undefined')
 49 |     s = document.location.search;
 50 |   var parts = s.substr(s.indexOf('?') + 1).split('&');
 51 |   var result = {};
 52 |   for (var i = 0; i < parts.length; i++) {
 53 |     var tmp = parts[i].split('=', 2);
 54 |     var key = jQuery.urldecode(tmp[0]);
 55 |     var value = jQuery.urldecode(tmp[1]);
 56 |     if (key in result)
 57 |       result[key].push(value);
 58 |     else
 59 |       result[key] = [value];
 60 |   }
 61 |   return result;
 62 | };
 63 | 
 64 | /**
 65 |  * highlight a given string on a jquery object by wrapping it in
 66 |  * span elements with the given class name.
 67 |  */
 68 | jQuery.fn.highlightText = function(text, className) {
 69 |   function highlight(node) {
 70 |     if (node.nodeType == 3) {
 71 |       var val = node.nodeValue;
 72 |       var pos = val.toLowerCase().indexOf(text);
 73 |       if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) {
 74 |         var span = document.createElement("span");
 75 |         span.className = className;
 76 |         span.appendChild(document.createTextNode(val.substr(pos, text.length)));
 77 |         node.parentNode.insertBefore(span, node.parentNode.insertBefore(
 78 |           document.createTextNode(val.substr(pos + text.length)),
 79 |           node.nextSibling));
 80 |         node.nodeValue = val.substr(0, pos);
 81 |       }
 82 |     }
 83 |     else if (!jQuery(node).is("button, select, textarea")) {
 84 |       jQuery.each(node.childNodes, function() {
 85 |         highlight(this);
 86 |       });
 87 |     }
 88 |   }
 89 |   return this.each(function() {
 90 |     highlight(this);
 91 |   });
 92 | };
 93 | 
 94 | /*
 95 |  * backward compatibility for jQuery.browser
 96 |  * This will be supported until firefox bug is fixed.
 97 |  */
 98 | if (!jQuery.browser) {
 99 |   jQuery.uaMatch = function(ua) {
100 |     ua = ua.toLowerCase();
101 | 
102 |     var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
103 |       /(webkit)[ \/]([\w.]+)/.exec(ua) ||
104 |       /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
105 |       /(msie) ([\w.]+)/.exec(ua) ||
106 |       ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
107 |       [];
108 | 
109 |     return {
110 |       browser: match[ 1 ] || "",
111 |       version: match[ 2 ] || "0"
112 |     };
113 |   };
114 |   jQuery.browser = {};
115 |   jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
116 | }
117 | 
118 | /**
119 |  * Small JavaScript module for the documentation.
120 |  */
121 | var Documentation = {
122 | 
123 |   init : function() {
124 |     this.fixFirefoxAnchorBug();
125 |     this.highlightSearchWords();
126 |     this.initIndexTable();
127 |     
128 |   },
129 | 
130 |   /**
131 |    * i18n support
132 |    */
133 |   TRANSLATIONS : {},
134 |   PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; },
135 |   LOCALE : 'unknown',
136 | 
137 |   // gettext and ngettext don't access this so that the functions
138 |   // can safely bound to a different name (_ = Documentation.gettext)
139 |   gettext : function(string) {
140 |     var translated = Documentation.TRANSLATIONS[string];
141 |     if (typeof translated == 'undefined')
142 |       return string;
143 |     return (typeof translated == 'string') ? translated : translated[0];
144 |   },
145 | 
146 |   ngettext : function(singular, plural, n) {
147 |     var translated = Documentation.TRANSLATIONS[singular];
148 |     if (typeof translated == 'undefined')
149 |       return (n == 1) ? singular : plural;
150 |     return translated[Documentation.PLURALEXPR(n)];
151 |   },
152 | 
153 |   addTranslations : function(catalog) {
154 |     for (var key in catalog.messages)
155 |       this.TRANSLATIONS[key] = catalog.messages[key];
156 |     this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
157 |     this.LOCALE = catalog.locale;
158 |   },
159 | 
160 |   /**
161 |    * add context elements like header anchor links
162 |    */
163 |   addContextElements : function() {
164 |     $('div[id] > :header:first').each(function() {
165 |       $('<a class="headerlink">\u00B6</a>').
166 |       attr('href', '#' + this.id).
167 |       attr('title', _('Permalink to this headline')).
168 |       appendTo(this);
169 |     });
170 |     $('dt[id]').each(function() {
171 |       $('<a class="headerlink">\u00B6</a>').
172 |       attr('href', '#' + this.id).
173 |       attr('title', _('Permalink to this definition')).
174 |       appendTo(this);
175 |     });
176 |   },
177 | 
178 |   /**
179 |    * workaround a firefox stupidity
180 |    * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075
181 |    */
182 |   fixFirefoxAnchorBug : function() {
183 |     if (document.location.hash)
184 |       window.setTimeout(function() {
185 |         document.location.href += '';
186 |       }, 10);
187 |   },
188 | 
189 |   /**
190 |    * highlight the search words provided in the url in the text
191 |    */
192 |   highlightSearchWords : function() {
193 |     var params = $.getQueryParameters();
194 |     var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
195 |     if (terms.length) {
196 |       var body = $('div.body');
197 |       if (!body.length) {
198 |         body = $('body');
199 |       }
200 |       window.setTimeout(function() {
201 |         $.each(terms, function() {
202 |           body.highlightText(this.toLowerCase(), 'highlighted');
203 |         });
204 |       }, 10);
205 |       $('<p class="highlight-link"><a href="javascript:Documentation.' +
206 |         'hideSearchWords()">' + _('Hide Search Matches') + '</a></p>')
207 |           .appendTo($('#searchbox'));
208 |     }
209 |   },
210 | 
211 |   /**
212 |    * init the domain index toggle buttons
213 |    */
214 |   initIndexTable : function() {
215 |     var togglers = $('img.toggler').click(function() {
216 |       var src = $(this).attr('src');
217 |       var idnum = $(this).attr('id').substr(7);
218 |       $('tr.cg-' + idnum).toggle();
219 |       if (src.substr(-9) == 'minus.png')
220 |         $(this).attr('src', src.substr(0, src.length-9) + 'plus.png');
221 |       else
222 |         $(this).attr('src', src.substr(0, src.length-8) + 'minus.png');
223 |     }).css('display', '');
224 |     if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) {
225 |         togglers.click();
226 |     }
227 |   },
228 | 
229 |   /**
230 |    * helper function to hide the search marks again
231 |    */
232 |   hideSearchWords : function() {
233 |     $('#searchbox .highlight-link').fadeOut(300);
234 |     $('span.highlighted').removeClass('highlighted');
235 |   },
236 | 
237 |   /**
238 |    * make the url absolute
239 |    */
240 |   makeURL : function(relativeURL) {
241 |     return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL;
242 |   },
243 | 
244 |   /**
245 |    * get the current relative url
246 |    */
247 |   getCurrentURL : function() {
248 |     var path = document.location.pathname;
249 |     var parts = path.split(/\//);
250 |     $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() {
251 |       if (this == '..')
252 |         parts.pop();
253 |     });
254 |     var url = parts.join('/');
255 |     return path.substring(url.lastIndexOf('/') + 1, path.length - 1);
256 |   },
257 | 
258 |   initOnKeyListeners: function() {
259 |     $(document).keyup(function(event) {
260 |       var activeElementType = document.activeElement.tagName;
261 |       // don't navigate when in search box or textarea
262 |       if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT') {
263 |         switch (event.keyCode) {
264 |           case 37: // left
265 |             var prevHref = $('link[rel="prev"]').prop('href');
266 |             if (prevHref) {
267 |               window.location.href = prevHref;
268 |               return false;
269 |             }
270 |           case 39: // right
271 |             var nextHref = $('link[rel="next"]').prop('href');
272 |             if (nextHref) {
273 |               window.location.href = nextHref;
274 |               return false;
275 |             }
276 |         }
277 |       }
278 |     });
279 |   }
280 | };
281 | 
282 | // quick alias for translations
283 | _ = Documentation.gettext;
284 | 
285 | $(document).ready(function() {
286 |   Documentation.init();
287 | });


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # pyspark_dist_explore
  3 | ______________________________
  4 | 
  5 | ## PySpark Dataframe Distribution Explorer 
  6 | 
  7 | Pyspark_dist_explore is a plotting library to get quick insights on data in Spark DataFrames through histograms and density plots,  where the heavy lifting is done in Spark. 
  8 | 
  9 | Pypsark_dist_explore has two ways of working: there are 3 functions to create matplotlib graphs or pandas dataframes easily, and a class (Histogram) to do more advanced explorations while minimizing the amount of computation needed. 
 10 | 
 11 | ## Functions:
 12 | * **hist(ax, x, \*\*kwargs)**. The *hist* function is almost exactly the same as the matplotlib hist function. See [here](https://matplotlib.org/examples/statistics/histogram_demo_multihist.html) for examples. The only two differences are:
 13 |     * Instead of being a function of an Axes object, an Axes object is needed as input. 
 14 |     * Instead of having an numpy array, list of arrays, or matrix as input, the function works on Spark DataFrames with a single column, a list of single-column Spark DataFrames, or a SparkDataframe with multiple columns. All other keyword arguments of the [Matplotlib hist](https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.hist.html) function can be used. 
 15 | 
 16 | 
 17 | * **distplot(ax, x, \*\*kwargs)**. Combines a normalized histogram of each column in x with a density plot of the same column. 
 18 | 
 19 | * **pandas_histogram(x, bins=None, range=None)**. Creates histograms for all columns in x and converts this to a Pandas DataFrame
 20 | 
 21 | ## Installing:
 22 | Install from PyPi:
 23 | 
 24 | ```pip install pyspark_dist_explore```
 25 | 
 26 | Or directly from github:
 27 | 
 28 | ```
 29 | git clone https://github.com/Bergvca/pyspark_dist_explore.git
 30 | cd pyspark_dist_explore
 31 | pip install .
 32 | ```
 33 | ### Examples
 34 | 
 35 | 
 36 | 
 37 | ```python
 38 | import pyspark
 39 | import pandas as pd
 40 | import numpy as np
 41 | import pyspark.sql.functions as F
 42 | import matplotlib.pyplot as plt
 43 | import seaborn as sns
 44 | 
 45 | from IPython.display import display, HTML, display_html #usefull to display wide tables
 46 | from pyspark_dist_explore import Histogram, hist, distplot, pandas_histogram
 47 | from pyspark.sql import Row
 48 | 
 49 | sc = pyspark.SparkContext()
 50 | sqlContext = pyspark.SQLContext(sc)
 51 | %matplotlib inline
 52 | ```
 53 | 
 54 | ```python
 55 | # Create some data in a Spark DataFrame:
 56 | n_observations = 200
 57 | 
 58 | random_dist_1 = np.random.logistic(100, 1000, n_observations)
 59 | random_dist_2 = np.random.logistic(400, 500, n_observations)
 60 | age_dist_1 = 20 * np.random.randn(n_observations) + 40
 61 | age_dist_2 = 15 * np.random.randn(n_observations) + 30
 62 | 
 63 | list_male = [('M', rand_value, age_dist_1[i]) for i, rand_value in enumerate(random_dist_1)]
 64 | list_female = [('F', rand_value, age_dist_2[i]) for i, rand_value in enumerate(random_dist_2)]
 65 | 
 66 | list_male_female = list_male + list_female
 67 | 
 68 | rdd = sc.parallelize(list_male_female)
 69 | transactions = rdd.map(lambda x: Row(gender=x[0], amount=float(x[1]), age=float(x[2])))
 70 | transactions_df = sqlContext.createDataFrame(transactions)
 71 | 
 72 | ```
 73 | 
 74 | 
 75 | ```python
 76 | # Create some selections on this data
 77 | 
 78 | filtered_by_gender_m = transactions_df.filter(F.col('gender') == 'M').select(F.col('amount').alias('amount_m'))
 79 | filtered_by_gender_f = transactions_df.filter(F.col('gender') == 'F').select(F.col('amount').alias('amount_f') )                                                                            
 80 | filtered_by_age_50_plus = transactions_df.filter(F.col('age') > 50).select(F.col('amount').alias('amount_50_plus'))
 81 | filtered_by_age_50_minus = transactions_df.filter(F.col('age') <= 50).select(F.col('amount').alias('amount_50_minus'))                                                                             
 82 |                                                                              
 83 | # Create the plots
 84 |     
 85 | fig, axes = plt.subplots(nrows=2, ncols=2)
 86 | fig.set_size_inches(20, 20)
 87 | 
 88 | # Use the hist function to plot histograms on the Axes
 89 | hist(axes[0, 0], [filtered_by_gender_m, filtered_by_gender_f], bins = 20, color=['red', 'tan'])
 90 | axes[0, 0].set_title('01. Compare Genders')
 91 | axes[0, 0].legend()
 92 | 
 93 | hist(axes[0, 1], [filtered_by_age_50_plus, filtered_by_age_50_minus], overlapping=True)
 94 | axes[0, 1].set_title('02. Compare Age')
 95 | axes[0, 1].legend()
 96 | 
 97 | # Use the distplot function to plot (scaled) histograms + density plots on the Axes
 98 | distplot(axes[1, 0], [filtered_by_gender_m, filtered_by_gender_f], bins=20)
 99 | axes[1, 0].set_title('03. Compare distribution per gender')
100 | axes[1, 0].legend()
101 | 
102 | distplot(axes[1, 1], [filtered_by_age_50_plus, filtered_by_age_50_minus], bins=20, color=['orange', 'green'])
103 | axes[1, 1].set_title('03. Compare distribution per age group')
104 | _ = axes[1, 1].legend()
105 | 
106 | ```
107 | 
108 | 
109 | ![png](README_files/README_5_0.png)
110 | 
111 | 
112 | 
113 | ```python
114 | # Convert Histograms of the 4 datasets to a pandas dataframe
115 | 
116 | # Put the outliers in seperate bins:
117 | bins = [-6000, -3000] + [bin_range for bin_range in range(-2500, 4000, 500)] + [6000]
118 |     
119 |     
120 | compare_all_df = pandas_histogram([filtered_by_gender_m, 
121 |                                     filtered_by_gender_f, 
122 |                                     filtered_by_age_50_plus, 
123 |                                     filtered_by_age_50_minus], 
124 |                                    bins=bins, range=(-4000, 4000))
125 | display(compare_all_df)
126 | ```
127 | 
128 | 
129 | <div>
130 | 
131 | <table border="1" class="dataframe">
132 |   <thead>
133 |     <tr style="text-align: right;">
134 |       <th></th>
135 |       <th>amount_50_minus</th>
136 |       <th>amount_50_plus</th>
137 |       <th>amount_f</th>
138 |       <th>amount_m</th>
139 |     </tr>
140 |   </thead>
141 |   <tbody>
142 |     <tr>
143 |       <th>-6000.00 - -3000.00</th>
144 |       <td>4</td>
145 |       <td>2</td>
146 |       <td>1</td>
147 |       <td>5</td>
148 |     </tr>
149 |     <tr>
150 |       <th>-3000.00 - -2500.00</th>
151 |       <td>4</td>
152 |       <td>2</td>
153 |       <td>0</td>
154 |       <td>6</td>
155 |     </tr>
156 |     <tr>
157 |       <th>-2500.00 - -2000.00</th>
158 |       <td>11</td>
159 |       <td>3</td>
160 |       <td>1</td>
161 |       <td>13</td>
162 |     </tr>
163 |     <tr>
164 |       <th>-2000.00 - -1500.00</th>
165 |       <td>10</td>
166 |       <td>5</td>
167 |       <td>4</td>
168 |       <td>11</td>
169 |     </tr>
170 |     <tr>
171 |       <th>-1500.00 - -1000.00</th>
172 |       <td>21</td>
173 |       <td>6</td>
174 |       <td>9</td>
175 |       <td>18</td>
176 |     </tr>
177 |     <tr>
178 |       <th>-1000.00 - -500.00</th>
179 |       <td>32</td>
180 |       <td>9</td>
181 |       <td>16</td>
182 |       <td>25</td>
183 |     </tr>
184 |     <tr>
185 |       <th>-500.00 - 0.00</th>
186 |       <td>39</td>
187 |       <td>8</td>
188 |       <td>30</td>
189 |       <td>17</td>
190 |     </tr>
191 |     <tr>
192 |       <th>0.00 - 500.00</th>
193 |       <td>68</td>
194 |       <td>13</td>
195 |       <td>52</td>
196 |       <td>29</td>
197 |     </tr>
198 |     <tr>
199 |       <th>500.00 - 1000.00</th>
200 |       <td>46</td>
201 |       <td>17</td>
202 |       <td>43</td>
203 |       <td>20</td>
204 |     </tr>
205 |     <tr>
206 |       <th>1000.00 - 1500.00</th>
207 |       <td>29</td>
208 |       <td>4</td>
209 |       <td>22</td>
210 |       <td>11</td>
211 |     </tr>
212 |     <tr>
213 |       <th>1500.00 - 2000.00</th>
214 |       <td>24</td>
215 |       <td>9</td>
216 |       <td>13</td>
217 |       <td>20</td>
218 |     </tr>
219 |     <tr>
220 |       <th>2000.00 - 2500.00</th>
221 |       <td>10</td>
222 |       <td>5</td>
223 |       <td>6</td>
224 |       <td>9</td>
225 |     </tr>
226 |     <tr>
227 |       <th>2500.00 - 3000.00</th>
228 |       <td>4</td>
229 |       <td>3</td>
230 |       <td>2</td>
231 |       <td>5</td>
232 |     </tr>
233 |     <tr>
234 |       <th>3000.00 - 3500.00</th>
235 |       <td>5</td>
236 |       <td>0</td>
237 |       <td>1</td>
238 |       <td>4</td>
239 |     </tr>
240 |     <tr>
241 |       <th>3500.00 - 6000.00</th>
242 |       <td>2</td>
243 |       <td>3</td>
244 |       <td>0</td>
245 |       <td>5</td>
246 |     </tr>
247 |   </tbody>
248 | </table>
249 | </div>
250 | 
251 | 
252 | ## The Histogram Class
253 | 
254 | Next to running the functions as above to get results quickly, the pyspark_dist_explore library contains a Histogram class. The advantage of using this class is that it retains state, so if the histogram is build once, multiple actions can be done withouth recalculating the bin values.
255 | 
256 | ### Examples
257 | 
258 | 
259 | ```python
260 | age_hist = Histogram(range=(-4000, 4000), bins=15)
261 | 
262 | # Create a histogram for different age groups
263 | for age in range(0, 90, 10):
264 |     age_hist.add_data(
265 |         transactions_df.
266 |         filter((F.col('age') > age) & (F.col('age') <= age+10)).
267 |         select(F.col('amount').alias('amount_%d_%d' % (age, age+10)))
268 |     )
269 | 
270 | fig, axes = plt.subplots(nrows=2)
271 | fig.set_size_inches(20, 10)    
272 | 
273 | age_hist.plot_hist(axes[0], histtype='step', linewidth=2.0, fill=False, cumulative=True) # The Histogram is build here
274 | age_hist.plot_density(axes[1]) # The density plot is created from the already build histogram
275 | 
276 | # Set the legends
277 | axes[0].legend(loc = 'upper left' )
278 | axes[0].set_title('Cumulative Histogram')
279 | axes[1].legend()
280 | axes[1].set_title('Kernel Density Plot')
281 | 
282 | age_hist_pd_df = age_hist.to_pandas() # Again the histograms don't need to be recalculated. 
283 | 
284 | # Create a heatmap from the Pandas Dataframe
285 | 
286 | fig, axes = plt.subplots()
287 | fig.set_size_inches(10, 10)    
288 | ax = sns.heatmap(age_hist_pd_df, annot=True, ax=axes)
289 | _ = ax.set_title('Heatmap')
290 | ```
291 | 
292 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # pyspark_dist_explore
  3 | ______________________________
  4 | 
  5 | ## PySpark Dataframe Distribution Explorer 
  6 | 
  7 | Pyspark_dist_explore is a plotting library to get quick insights on data in Spark DataFrames through histograms and density plots,  where the heavy lifting is done in Spark. 
  8 | 
  9 | Pypsark_dist_explore has two ways of working: there are 3 functions to create matplotlib graphs or pandas dataframes easily, and a class (Histogram) to do more advanced explorations while minimizing the amount of computation needed. 
 10 | 
 11 | ## Functions:
 12 | * **hist(ax, x, \*\*kwargs)**. The *hist* function is almost exactly the same as the matplotlib hist function. See [here](https://matplotlib.org/examples/statistics/histogram_demo_multihist.html) for examples. The only two differences are:
 13 |     * Instead of being a function of an Axes object, an Axes object is needed as input. 
 14 |     * Instead of having an numpy array, list of arrays, or matrix as input, the function works on Spark DataFrames with a single column, a list of single-column Spark DataFrames, or a SparkDataframe with multiple columns. All other keyword arguments of the [Matplotlib hist](https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.hist.html) function can be used. 
 15 | 
 16 | 
 17 | * **distplot(ax, x, \*\*kwargs)**. Combines a normalized histogram of each column in x with a density plot of the same column. 
 18 | 
 19 | * **pandas_histogram(x, bins=None, range=None)**. Creates histograms for all columns in x and converts this to a Pandas DataFrame
 20 | 
 21 | ## Installing:
 22 | Install from PyPi:
 23 | 
 24 | ```pip install pyspark_dist_explore```
 25 | 
 26 | Or directly from github:
 27 | 
 28 | ```
 29 | git clone https://github.com/Bergvca/pyspark_dist_explore.git
 30 | cd pyspark_dist_explore
 31 | pip install .
 32 | ```
 33 | ### Examples
 34 | 
 35 | 
 36 | 
 37 | ```python
 38 | import pyspark
 39 | import pandas as pd
 40 | import numpy as np
 41 | import pyspark.sql.functions as F
 42 | import matplotlib.pyplot as plt
 43 | import seaborn as sns
 44 | 
 45 | from IPython.display import display, HTML, display_html #usefull to display wide tables
 46 | from pyspark_dist_explore import Histogram, hist, distplot, pandas_histogram
 47 | from pyspark.sql import Row
 48 | 
 49 | sc = pyspark.SparkContext()
 50 | sqlContext = pyspark.SQLContext(sc)
 51 | %matplotlib inline
 52 | ```
 53 | 
 54 | ```python
 55 | # Create some data in a Spark DataFrame:
 56 | n_observations = 200
 57 | 
 58 | random_dist_1 = np.random.logistic(100, 1000, n_observations)
 59 | random_dist_2 = np.random.logistic(400, 500, n_observations)
 60 | age_dist_1 = 20 * np.random.randn(n_observations) + 40
 61 | age_dist_2 = 15 * np.random.randn(n_observations) + 30
 62 | 
 63 | list_male = [('M', rand_value, age_dist_1[i]) for i, rand_value in enumerate(random_dist_1)]
 64 | list_female = [('F', rand_value, age_dist_2[i]) for i, rand_value in enumerate(random_dist_2)]
 65 | 
 66 | list_male_female = list_male + list_female
 67 | 
 68 | rdd = sc.parallelize(list_male_female)
 69 | transactions = rdd.map(lambda x: Row(gender=x[0], amount=float(x[1]), age=float(x[2])))
 70 | transactions_df = sqlContext.createDataFrame(transactions)
 71 | 
 72 | ```
 73 | 
 74 | 
 75 | ```python
 76 | # Create some selections on this data
 77 | 
 78 | filtered_by_gender_m = transactions_df.filter(F.col('gender') == 'M').select(F.col('amount').alias('amount_m'))
 79 | filtered_by_gender_f = transactions_df.filter(F.col('gender') == 'F').select(F.col('amount').alias('amount_f') )                                                                            
 80 | filtered_by_age_50_plus = transactions_df.filter(F.col('age') > 50).select(F.col('amount').alias('amount_50_plus'))
 81 | filtered_by_age_50_minus = transactions_df.filter(F.col('age') <= 50).select(F.col('amount').alias('amount_50_minus'))                                                                             
 82 |                                                                              
 83 | # Create the plots
 84 |     
 85 | fig, axes = plt.subplots(nrows=2, ncols=2)
 86 | fig.set_size_inches(20, 20)
 87 | 
 88 | # Use the hist function to plot histograms on the Axes
 89 | hist(axes[0, 0], [filtered_by_gender_m, filtered_by_gender_f], bins = 20, color=['red', 'tan'])
 90 | axes[0, 0].set_title('01. Compare Genders')
 91 | axes[0, 0].legend()
 92 | 
 93 | hist(axes[0, 1], [filtered_by_age_50_plus, filtered_by_age_50_minus], overlapping=True)
 94 | axes[0, 1].set_title('02. Compare Age')
 95 | axes[0, 1].legend()
 96 | 
 97 | # Use the distplot function to plot (scaled) histograms + density plots on the Axes
 98 | distplot(axes[1, 0], [filtered_by_gender_m, filtered_by_gender_f], bins=20)
 99 | axes[1, 0].set_title('03. Compare distribution per gender')
100 | axes[1, 0].legend()
101 | 
102 | distplot(axes[1, 1], [filtered_by_age_50_plus, filtered_by_age_50_minus], bins=20, color=['orange', 'green'])
103 | axes[1, 1].set_title('03. Compare distribution per age group')
104 | _ = axes[1, 1].legend()
105 | 
106 | ```
107 | 
108 | 
109 | ![png](README_files/README_5_0.png)
110 | 
111 | 
112 | 
113 | ```python
114 | # Convert Histograms of the 4 datasets to a pandas dataframe
115 | 
116 | # Put the outliers in seperate bins:
117 | bins = [-6000, -3000] + [bin_range for bin_range in range(-2500, 4000, 500)] + [6000]
118 |     
119 |     
120 | compare_all_df = pandas_histogram([filtered_by_gender_m, 
121 |                                     filtered_by_gender_f, 
122 |                                     filtered_by_age_50_plus, 
123 |                                     filtered_by_age_50_minus], 
124 |                                    bins=bins, range=(-4000, 4000))
125 | display(compare_all_df)
126 | ```
127 | 
128 | 
129 | <div>
130 | <style>
131 |     .dataframe thead tr:only-child th {
132 |         text-align: right;
133 |     }
134 | 
135 |     .dataframe thead th {
136 |         text-align: left;
137 |     }
138 | 
139 |     .dataframe tbody tr th {
140 |         vertical-align: top;
141 |     }
142 | </style>
143 | <table border="1" class="dataframe">
144 |   <thead>
145 |     <tr style="text-align: right;">
146 |       <th></th>
147 |       <th>amount_50_minus</th>
148 |       <th>amount_50_plus</th>
149 |       <th>amount_f</th>
150 |       <th>amount_m</th>
151 |     </tr>
152 |   </thead>
153 |   <tbody>
154 |     <tr>
155 |       <th>-6000.00 - -3000.00</th>
156 |       <td>4</td>
157 |       <td>2</td>
158 |       <td>1</td>
159 |       <td>5</td>
160 |     </tr>
161 |     <tr>
162 |       <th>-3000.00 - -2500.00</th>
163 |       <td>4</td>
164 |       <td>2</td>
165 |       <td>0</td>
166 |       <td>6</td>
167 |     </tr>
168 |     <tr>
169 |       <th>-2500.00 - -2000.00</th>
170 |       <td>11</td>
171 |       <td>3</td>
172 |       <td>1</td>
173 |       <td>13</td>
174 |     </tr>
175 |     <tr>
176 |       <th>-2000.00 - -1500.00</th>
177 |       <td>10</td>
178 |       <td>5</td>
179 |       <td>4</td>
180 |       <td>11</td>
181 |     </tr>
182 |     <tr>
183 |       <th>-1500.00 - -1000.00</th>
184 |       <td>21</td>
185 |       <td>6</td>
186 |       <td>9</td>
187 |       <td>18</td>
188 |     </tr>
189 |     <tr>
190 |       <th>-1000.00 - -500.00</th>
191 |       <td>32</td>
192 |       <td>9</td>
193 |       <td>16</td>
194 |       <td>25</td>
195 |     </tr>
196 |     <tr>
197 |       <th>-500.00 - 0.00</th>
198 |       <td>39</td>
199 |       <td>8</td>
200 |       <td>30</td>
201 |       <td>17</td>
202 |     </tr>
203 |     <tr>
204 |       <th>0.00 - 500.00</th>
205 |       <td>68</td>
206 |       <td>13</td>
207 |       <td>52</td>
208 |       <td>29</td>
209 |     </tr>
210 |     <tr>
211 |       <th>500.00 - 1000.00</th>
212 |       <td>46</td>
213 |       <td>17</td>
214 |       <td>43</td>
215 |       <td>20</td>
216 |     </tr>
217 |     <tr>
218 |       <th>1000.00 - 1500.00</th>
219 |       <td>29</td>
220 |       <td>4</td>
221 |       <td>22</td>
222 |       <td>11</td>
223 |     </tr>
224 |     <tr>
225 |       <th>1500.00 - 2000.00</th>
226 |       <td>24</td>
227 |       <td>9</td>
228 |       <td>13</td>
229 |       <td>20</td>
230 |     </tr>
231 |     <tr>
232 |       <th>2000.00 - 2500.00</th>
233 |       <td>10</td>
234 |       <td>5</td>
235 |       <td>6</td>
236 |       <td>9</td>
237 |     </tr>
238 |     <tr>
239 |       <th>2500.00 - 3000.00</th>
240 |       <td>4</td>
241 |       <td>3</td>
242 |       <td>2</td>
243 |       <td>5</td>
244 |     </tr>
245 |     <tr>
246 |       <th>3000.00 - 3500.00</th>
247 |       <td>5</td>
248 |       <td>0</td>
249 |       <td>1</td>
250 |       <td>4</td>
251 |     </tr>
252 |     <tr>
253 |       <th>3500.00 - 6000.00</th>
254 |       <td>2</td>
255 |       <td>3</td>
256 |       <td>0</td>
257 |       <td>5</td>
258 |     </tr>
259 |   </tbody>
260 | </table>
261 | </div>
262 | 
263 | 
264 | ## The Histogram Class
265 | 
266 | Next to running the functions as above to get results quickly, the pyspark_dist_explore library contains a Histogram class. The advantage of using this class is that it retains state, so if the histogram is build once, multiple actions can be done withouth recalculating the bin values.
267 | 
268 | ### Examples
269 | 
270 | 
271 | ```python
272 | age_hist = Histogram(range=(-4000, 4000), bins=15)
273 | 
274 | # Create a histogram for different age groups
275 | for age in range(0, 90, 10):
276 |     age_hist.add_data(
277 |         transactions_df.
278 |         filter((F.col('age') > age) & (F.col('age') <= age+10)).
279 |         select(F.col('amount').alias('amount_%d_%d' % (age, age+10)))
280 |     )
281 | 
282 | fig, axes = plt.subplots(nrows=2)
283 | fig.set_size_inches(20, 10)    
284 | 
285 | age_hist.plot_hist(axes[0], histtype='step', linewidth=2.0, fill=False, cumulative=True) # The Histogram is build here
286 | age_hist.plot_density(axes[1]) # The density plot is created from the already build histogram
287 | 
288 | # Set the legends
289 | axes[0].legend(loc = 'upper left' )
290 | axes[0].set_title('Cumulative Histogram')
291 | axes[1].legend()
292 | axes[1].set_title('Kernel Density Plot')
293 | 
294 | age_hist_pd_df = age_hist.to_pandas() # Again the histograms don't need to be recalculated. 
295 | 
296 | # Create a heatmap from the Pandas Dataframe
297 | 
298 | fig, axes = plt.subplots()
299 | fig.set_size_inches(10, 10)    
300 | ax = sns.heatmap(age_hist_pd_df, annot=True, ax=axes)
301 | _ = ax.set_title('Heatmap')
302 | ```
303 | 
304 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/underscore.js:
--------------------------------------------------------------------------------
 1 | // Underscore.js 1.3.1
 2 | // (c) 2009-2012 Jeremy Ashkenas, DocumentCloud Inc.
 3 | // Underscore is freely distributable under the MIT license.
 4 | // Portions of Underscore are inspired or borrowed from Prototype,
 5 | // Oliver Steele's Functional, and John Resig's Micro-Templating.
 6 | // For all details and documentation:
 7 | // http://documentcloud.github.com/underscore
 8 | (function(){function q(a,c,d){if(a===c)return a!==0||1/a==1/c;if(a==null||c==null)return a===c;if(a._chain)a=a._wrapped;if(c._chain)c=c._wrapped;if(a.isEqual&&b.isFunction(a.isEqual))return a.isEqual(c);if(c.isEqual&&b.isFunction(c.isEqual))return c.isEqual(a);var e=l.call(a);if(e!=l.call(c))return false;switch(e){case "[object String]":return a==String(c);case "[object Number]":return a!=+a?c!=+c:a==0?1/a==1/c:a==+c;case "[object Date]":case "[object Boolean]":return+a==+c;case "[object RegExp]":return a.source==
 9 | c.source&&a.global==c.global&&a.multiline==c.multiline&&a.ignoreCase==c.ignoreCase}if(typeof a!="object"||typeof c!="object")return false;for(var f=d.length;f--;)if(d[f]==a)return true;d.push(a);var f=0,g=true;if(e=="[object Array]"){if(f=a.length,g=f==c.length)for(;f--;)if(!(g=f in a==f in c&&q(a[f],c[f],d)))break}else{if("constructor"in a!="constructor"in c||a.constructor!=c.constructor)return false;for(var h in a)if(b.has(a,h)&&(f++,!(g=b.has(c,h)&&q(a[h],c[h],d))))break;if(g){for(h in c)if(b.has(c,
10 | h)&&!f--)break;g=!f}}d.pop();return g}var r=this,G=r._,n={},k=Array.prototype,o=Object.prototype,i=k.slice,H=k.unshift,l=o.toString,I=o.hasOwnProperty,w=k.forEach,x=k.map,y=k.reduce,z=k.reduceRight,A=k.filter,B=k.every,C=k.some,p=k.indexOf,D=k.lastIndexOf,o=Array.isArray,J=Object.keys,s=Function.prototype.bind,b=function(a){return new m(a)};if(typeof exports!=="undefined"){if(typeof module!=="undefined"&&module.exports)exports=module.exports=b;exports._=b}else r._=b;b.VERSION="1.3.1";var j=b.each=
11 | b.forEach=function(a,c,d){if(a!=null)if(w&&a.forEach===w)a.forEach(c,d);else if(a.length===+a.length)for(var e=0,f=a.length;e<f;e++){if(e in a&&c.call(d,a[e],e,a)===n)break}else for(e in a)if(b.has(a,e)&&c.call(d,a[e],e,a)===n)break};b.map=b.collect=function(a,c,b){var e=[];if(a==null)return e;if(x&&a.map===x)return a.map(c,b);j(a,function(a,g,h){e[e.length]=c.call(b,a,g,h)});if(a.length===+a.length)e.length=a.length;return e};b.reduce=b.foldl=b.inject=function(a,c,d,e){var f=arguments.length>2;a==
12 | null&&(a=[]);if(y&&a.reduce===y)return e&&(c=b.bind(c,e)),f?a.reduce(c,d):a.reduce(c);j(a,function(a,b,i){f?d=c.call(e,d,a,b,i):(d=a,f=true)});if(!f)throw new TypeError("Reduce of empty array with no initial value");return d};b.reduceRight=b.foldr=function(a,c,d,e){var f=arguments.length>2;a==null&&(a=[]);if(z&&a.reduceRight===z)return e&&(c=b.bind(c,e)),f?a.reduceRight(c,d):a.reduceRight(c);var g=b.toArray(a).reverse();e&&!f&&(c=b.bind(c,e));return f?b.reduce(g,c,d,e):b.reduce(g,c)};b.find=b.detect=
13 | function(a,c,b){var e;E(a,function(a,g,h){if(c.call(b,a,g,h))return e=a,true});return e};b.filter=b.select=function(a,c,b){var e=[];if(a==null)return e;if(A&&a.filter===A)return a.filter(c,b);j(a,function(a,g,h){c.call(b,a,g,h)&&(e[e.length]=a)});return e};b.reject=function(a,c,b){var e=[];if(a==null)return e;j(a,function(a,g,h){c.call(b,a,g,h)||(e[e.length]=a)});return e};b.every=b.all=function(a,c,b){var e=true;if(a==null)return e;if(B&&a.every===B)return a.every(c,b);j(a,function(a,g,h){if(!(e=
14 | e&&c.call(b,a,g,h)))return n});return e};var E=b.some=b.any=function(a,c,d){c||(c=b.identity);var e=false;if(a==null)return e;if(C&&a.some===C)return a.some(c,d);j(a,function(a,b,h){if(e||(e=c.call(d,a,b,h)))return n});return!!e};b.include=b.contains=function(a,c){var b=false;if(a==null)return b;return p&&a.indexOf===p?a.indexOf(c)!=-1:b=E(a,function(a){return a===c})};b.invoke=function(a,c){var d=i.call(arguments,2);return b.map(a,function(a){return(b.isFunction(c)?c||a:a[c]).apply(a,d)})};b.pluck=
15 | function(a,c){return b.map(a,function(a){return a[c]})};b.max=function(a,c,d){if(!c&&b.isArray(a))return Math.max.apply(Math,a);if(!c&&b.isEmpty(a))return-Infinity;var e={computed:-Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;b>=e.computed&&(e={value:a,computed:b})});return e.value};b.min=function(a,c,d){if(!c&&b.isArray(a))return Math.min.apply(Math,a);if(!c&&b.isEmpty(a))return Infinity;var e={computed:Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;b<e.computed&&(e={value:a,computed:b})});
16 | return e.value};b.shuffle=function(a){var b=[],d;j(a,function(a,f){f==0?b[0]=a:(d=Math.floor(Math.random()*(f+1)),b[f]=b[d],b[d]=a)});return b};b.sortBy=function(a,c,d){return b.pluck(b.map(a,function(a,b,g){return{value:a,criteria:c.call(d,a,b,g)}}).sort(function(a,b){var c=a.criteria,d=b.criteria;return c<d?-1:c>d?1:0}),"value")};b.groupBy=function(a,c){var d={},e=b.isFunction(c)?c:function(a){return a[c]};j(a,function(a,b){var c=e(a,b);(d[c]||(d[c]=[])).push(a)});return d};b.sortedIndex=function(a,
17 | c,d){d||(d=b.identity);for(var e=0,f=a.length;e<f;){var g=e+f>>1;d(a[g])<d(c)?e=g+1:f=g}return e};b.toArray=function(a){return!a?[]:a.toArray?a.toArray():b.isArray(a)?i.call(a):b.isArguments(a)?i.call(a):b.values(a)};b.size=function(a){return b.toArray(a).length};b.first=b.head=function(a,b,d){return b!=null&&!d?i.call(a,0,b):a[0]};b.initial=function(a,b,d){return i.call(a,0,a.length-(b==null||d?1:b))};b.last=function(a,b,d){return b!=null&&!d?i.call(a,Math.max(a.length-b,0)):a[a.length-1]};b.rest=
18 | b.tail=function(a,b,d){return i.call(a,b==null||d?1:b)};b.compact=function(a){return b.filter(a,function(a){return!!a})};b.flatten=function(a,c){return b.reduce(a,function(a,e){if(b.isArray(e))return a.concat(c?e:b.flatten(e));a[a.length]=e;return a},[])};b.without=function(a){return b.difference(a,i.call(arguments,1))};b.uniq=b.unique=function(a,c,d){var d=d?b.map(a,d):a,e=[];b.reduce(d,function(d,g,h){if(0==h||(c===true?b.last(d)!=g:!b.include(d,g)))d[d.length]=g,e[e.length]=a[h];return d},[]);
19 | return e};b.union=function(){return b.uniq(b.flatten(arguments,true))};b.intersection=b.intersect=function(a){var c=i.call(arguments,1);return b.filter(b.uniq(a),function(a){return b.every(c,function(c){return b.indexOf(c,a)>=0})})};b.difference=function(a){var c=b.flatten(i.call(arguments,1));return b.filter(a,function(a){return!b.include(c,a)})};b.zip=function(){for(var a=i.call(arguments),c=b.max(b.pluck(a,"length")),d=Array(c),e=0;e<c;e++)d[e]=b.pluck(a,""+e);return d};b.indexOf=function(a,c,
20 | d){if(a==null)return-1;var e;if(d)return d=b.sortedIndex(a,c),a[d]===c?d:-1;if(p&&a.indexOf===p)return a.indexOf(c);for(d=0,e=a.length;d<e;d++)if(d in a&&a[d]===c)return d;return-1};b.lastIndexOf=function(a,b){if(a==null)return-1;if(D&&a.lastIndexOf===D)return a.lastIndexOf(b);for(var d=a.length;d--;)if(d in a&&a[d]===b)return d;return-1};b.range=function(a,b,d){arguments.length<=1&&(b=a||0,a=0);for(var d=arguments[2]||1,e=Math.max(Math.ceil((b-a)/d),0),f=0,g=Array(e);f<e;)g[f++]=a,a+=d;return g};
21 | var F=function(){};b.bind=function(a,c){var d,e;if(a.bind===s&&s)return s.apply(a,i.call(arguments,1));if(!b.isFunction(a))throw new TypeError;e=i.call(arguments,2);return d=function(){if(!(this instanceof d))return a.apply(c,e.concat(i.call(arguments)));F.prototype=a.prototype;var b=new F,g=a.apply(b,e.concat(i.call(arguments)));return Object(g)===g?g:b}};b.bindAll=function(a){var c=i.call(arguments,1);c.length==0&&(c=b.functions(a));j(c,function(c){a[c]=b.bind(a[c],a)});return a};b.memoize=function(a,
22 | c){var d={};c||(c=b.identity);return function(){var e=c.apply(this,arguments);return b.has(d,e)?d[e]:d[e]=a.apply(this,arguments)}};b.delay=function(a,b){var d=i.call(arguments,2);return setTimeout(function(){return a.apply(a,d)},b)};b.defer=function(a){return b.delay.apply(b,[a,1].concat(i.call(arguments,1)))};b.throttle=function(a,c){var d,e,f,g,h,i=b.debounce(function(){h=g=false},c);return function(){d=this;e=arguments;var b;f||(f=setTimeout(function(){f=null;h&&a.apply(d,e);i()},c));g?h=true:
23 | a.apply(d,e);i();g=true}};b.debounce=function(a,b){var d;return function(){var e=this,f=arguments;clearTimeout(d);d=setTimeout(function(){d=null;a.apply(e,f)},b)}};b.once=function(a){var b=false,d;return function(){if(b)return d;b=true;return d=a.apply(this,arguments)}};b.wrap=function(a,b){return function(){var d=[a].concat(i.call(arguments,0));return b.apply(this,d)}};b.compose=function(){var a=arguments;return function(){for(var b=arguments,d=a.length-1;d>=0;d--)b=[a[d].apply(this,b)];return b[0]}};
24 | b.after=function(a,b){return a<=0?b():function(){if(--a<1)return b.apply(this,arguments)}};b.keys=J||function(a){if(a!==Object(a))throw new TypeError("Invalid object");var c=[],d;for(d in a)b.has(a,d)&&(c[c.length]=d);return c};b.values=function(a){return b.map(a,b.identity)};b.functions=b.methods=function(a){var c=[],d;for(d in a)b.isFunction(a[d])&&c.push(d);return c.sort()};b.extend=function(a){j(i.call(arguments,1),function(b){for(var d in b)a[d]=b[d]});return a};b.defaults=function(a){j(i.call(arguments,
25 | 1),function(b){for(var d in b)a[d]==null&&(a[d]=b[d])});return a};b.clone=function(a){return!b.isObject(a)?a:b.isArray(a)?a.slice():b.extend({},a)};b.tap=function(a,b){b(a);return a};b.isEqual=function(a,b){return q(a,b,[])};b.isEmpty=function(a){if(b.isArray(a)||b.isString(a))return a.length===0;for(var c in a)if(b.has(a,c))return false;return true};b.isElement=function(a){return!!(a&&a.nodeType==1)};b.isArray=o||function(a){return l.call(a)=="[object Array]"};b.isObject=function(a){return a===Object(a)};
26 | b.isArguments=function(a){return l.call(a)=="[object Arguments]"};if(!b.isArguments(arguments))b.isArguments=function(a){return!(!a||!b.has(a,"callee"))};b.isFunction=function(a){return l.call(a)=="[object Function]"};b.isString=function(a){return l.call(a)=="[object String]"};b.isNumber=function(a){return l.call(a)=="[object Number]"};b.isNaN=function(a){return a!==a};b.isBoolean=function(a){return a===true||a===false||l.call(a)=="[object Boolean]"};b.isDate=function(a){return l.call(a)=="[object Date]"};
27 | b.isRegExp=function(a){return l.call(a)=="[object RegExp]"};b.isNull=function(a){return a===null};b.isUndefined=function(a){return a===void 0};b.has=function(a,b){return I.call(a,b)};b.noConflict=function(){r._=G;return this};b.identity=function(a){return a};b.times=function(a,b,d){for(var e=0;e<a;e++)b.call(d,e)};b.escape=function(a){return(""+a).replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;").replace(/'/g,"&#x27;").replace(/\//g,"&#x2F;")};b.mixin=function(a){j(b.functions(a),
28 | function(c){K(c,b[c]=a[c])})};var L=0;b.uniqueId=function(a){var b=L++;return a?a+b:b};b.templateSettings={evaluate:/<%([\s\S]+?)%>/g,interpolate:/<%=([\s\S]+?)%>/g,escape:/<%-([\s\S]+?)%>/g};var t=/.^/,u=function(a){return a.replace(/\\\\/g,"\\").replace(/\\'/g,"'")};b.template=function(a,c){var d=b.templateSettings,d="var __p=[],print=function(){__p.push.apply(__p,arguments);};with(obj||{}){__p.push('"+a.replace(/\\/g,"\\\\").replace(/'/g,"\\'").replace(d.escape||t,function(a,b){return"',_.escape("+
29 | u(b)+"),'"}).replace(d.interpolate||t,function(a,b){return"',"+u(b)+",'"}).replace(d.evaluate||t,function(a,b){return"');"+u(b).replace(/[\r\n\t]/g," ")+";__p.push('"}).replace(/\r/g,"\\r").replace(/\n/g,"\\n").replace(/\t/g,"\\t")+"');}return __p.join('');",e=new Function("obj","_",d);return c?e(c,b):function(a){return e.call(this,a,b)}};b.chain=function(a){return b(a).chain()};var m=function(a){this._wrapped=a};b.prototype=m.prototype;var v=function(a,c){return c?b(a).chain():a},K=function(a,c){m.prototype[a]=
30 | function(){var a=i.call(arguments);H.call(a,this._wrapped);return v(c.apply(b,a),this._chain)}};b.mixin(b);j("pop,push,reverse,shift,sort,splice,unshift".split(","),function(a){var b=k[a];m.prototype[a]=function(){var d=this._wrapped;b.apply(d,arguments);var e=d.length;(a=="shift"||a=="splice")&&e===0&&delete d[0];return v(d,this._chain)}});j(["concat","join","slice"],function(a){var b=k[a];m.prototype[a]=function(){return v(b.apply(this._wrapped,arguments),this._chain)}});m.prototype.chain=function(){this._chain=
31 | true;return this};m.prototype.value=function(){return this._wrapped}}).call(this);
32 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/basic.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * basic.css
  3 |  * ~~~~~~~~~
  4 |  *
  5 |  * Sphinx stylesheet -- basic theme.
  6 |  *
  7 |  * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | /* -- main layout ----------------------------------------------------------- */
 13 | 
 14 | div.clearer {
 15 |     clear: both;
 16 | }
 17 | 
 18 | /* -- relbar ---------------------------------------------------------------- */
 19 | 
 20 | div.related {
 21 |     width: 100%;
 22 |     font-size: 90%;
 23 | }
 24 | 
 25 | div.related h3 {
 26 |     display: none;
 27 | }
 28 | 
 29 | div.related ul {
 30 |     margin: 0;
 31 |     padding: 0 0 0 10px;
 32 |     list-style: none;
 33 | }
 34 | 
 35 | div.related li {
 36 |     display: inline;
 37 | }
 38 | 
 39 | div.related li.right {
 40 |     float: right;
 41 |     margin-right: 5px;
 42 | }
 43 | 
 44 | /* -- sidebar --------------------------------------------------------------- */
 45 | 
 46 | div.sphinxsidebarwrapper {
 47 |     padding: 10px 5px 0 10px;
 48 | }
 49 | 
 50 | div.sphinxsidebar {
 51 |     float: left;
 52 |     width: 230px;
 53 |     margin-left: -100%;
 54 |     font-size: 90%;
 55 |     word-wrap: break-word;
 56 |     overflow-wrap : break-word;
 57 | }
 58 | 
 59 | div.sphinxsidebar ul {
 60 |     list-style: none;
 61 | }
 62 | 
 63 | div.sphinxsidebar ul ul,
 64 | div.sphinxsidebar ul.want-points {
 65 |     margin-left: 20px;
 66 |     list-style: square;
 67 | }
 68 | 
 69 | div.sphinxsidebar ul ul {
 70 |     margin-top: 0;
 71 |     margin-bottom: 0;
 72 | }
 73 | 
 74 | div.sphinxsidebar form {
 75 |     margin-top: 10px;
 76 | }
 77 | 
 78 | div.sphinxsidebar input {
 79 |     border: 1px solid #98dbcc;
 80 |     font-family: sans-serif;
 81 |     font-size: 1em;
 82 | }
 83 | 
 84 | div.sphinxsidebar #searchbox input[type="text"] {
 85 |     width: 170px;
 86 | }
 87 | 
 88 | img {
 89 |     border: 0;
 90 |     max-width: 100%;
 91 | }
 92 | 
 93 | /* -- search page ----------------------------------------------------------- */
 94 | 
 95 | ul.search {
 96 |     margin: 10px 0 0 20px;
 97 |     padding: 0;
 98 | }
 99 | 
100 | ul.search li {
101 |     padding: 5px 0 5px 20px;
102 |     background-image: url(file.png);
103 |     background-repeat: no-repeat;
104 |     background-position: 0 7px;
105 | }
106 | 
107 | ul.search li a {
108 |     font-weight: bold;
109 | }
110 | 
111 | ul.search li div.context {
112 |     color: #888;
113 |     margin: 2px 0 0 30px;
114 |     text-align: left;
115 | }
116 | 
117 | ul.keywordmatches li.goodmatch a {
118 |     font-weight: bold;
119 | }
120 | 
121 | /* -- index page ------------------------------------------------------------ */
122 | 
123 | table.contentstable {
124 |     width: 90%;
125 |     margin-left: auto;
126 |     margin-right: auto;
127 | }
128 | 
129 | table.contentstable p.biglink {
130 |     line-height: 150%;
131 | }
132 | 
133 | a.biglink {
134 |     font-size: 1.3em;
135 | }
136 | 
137 | span.linkdescr {
138 |     font-style: italic;
139 |     padding-top: 5px;
140 |     font-size: 90%;
141 | }
142 | 
143 | /* -- general index --------------------------------------------------------- */
144 | 
145 | table.indextable {
146 |     width: 100%;
147 | }
148 | 
149 | table.indextable td {
150 |     text-align: left;
151 |     vertical-align: top;
152 | }
153 | 
154 | table.indextable ul {
155 |     margin-top: 0;
156 |     margin-bottom: 0;
157 |     list-style-type: none;
158 | }
159 | 
160 | table.indextable > tbody > tr > td > ul {
161 |     padding-left: 0em;
162 | }
163 | 
164 | table.indextable tr.pcap {
165 |     height: 10px;
166 | }
167 | 
168 | table.indextable tr.cap {
169 |     margin-top: 10px;
170 |     background-color: #f2f2f2;
171 | }
172 | 
173 | img.toggler {
174 |     margin-right: 3px;
175 |     margin-top: 3px;
176 |     cursor: pointer;
177 | }
178 | 
179 | div.modindex-jumpbox {
180 |     border-top: 1px solid #ddd;
181 |     border-bottom: 1px solid #ddd;
182 |     margin: 1em 0 1em 0;
183 |     padding: 0.4em;
184 | }
185 | 
186 | div.genindex-jumpbox {
187 |     border-top: 1px solid #ddd;
188 |     border-bottom: 1px solid #ddd;
189 |     margin: 1em 0 1em 0;
190 |     padding: 0.4em;
191 | }
192 | 
193 | /* -- domain module index --------------------------------------------------- */
194 | 
195 | table.modindextable td {
196 |     padding: 2px;
197 |     border-collapse: collapse;
198 | }
199 | 
200 | /* -- general body styles --------------------------------------------------- */
201 | 
202 | div.body p, div.body dd, div.body li, div.body blockquote {
203 |     -moz-hyphens: auto;
204 |     -ms-hyphens: auto;
205 |     -webkit-hyphens: auto;
206 |     hyphens: auto;
207 | }
208 | 
209 | a.headerlink {
210 |     visibility: hidden;
211 | }
212 | 
213 | h1:hover > a.headerlink,
214 | h2:hover > a.headerlink,
215 | h3:hover > a.headerlink,
216 | h4:hover > a.headerlink,
217 | h5:hover > a.headerlink,
218 | h6:hover > a.headerlink,
219 | dt:hover > a.headerlink,
220 | caption:hover > a.headerlink,
221 | p.caption:hover > a.headerlink,
222 | div.code-block-caption:hover > a.headerlink {
223 |     visibility: visible;
224 | }
225 | 
226 | div.body p.caption {
227 |     text-align: inherit;
228 | }
229 | 
230 | div.body td {
231 |     text-align: left;
232 | }
233 | 
234 | .first {
235 |     margin-top: 0 !important;
236 | }
237 | 
238 | p.rubric {
239 |     margin-top: 30px;
240 |     font-weight: bold;
241 | }
242 | 
243 | img.align-left, .figure.align-left, object.align-left {
244 |     clear: left;
245 |     float: left;
246 |     margin-right: 1em;
247 | }
248 | 
249 | img.align-right, .figure.align-right, object.align-right {
250 |     clear: right;
251 |     float: right;
252 |     margin-left: 1em;
253 | }
254 | 
255 | img.align-center, .figure.align-center, object.align-center {
256 |   display: block;
257 |   margin-left: auto;
258 |   margin-right: auto;
259 | }
260 | 
261 | .align-left {
262 |     text-align: left;
263 | }
264 | 
265 | .align-center {
266 |     text-align: center;
267 | }
268 | 
269 | .align-right {
270 |     text-align: right;
271 | }
272 | 
273 | /* -- sidebars -------------------------------------------------------------- */
274 | 
275 | div.sidebar {
276 |     margin: 0 0 0.5em 1em;
277 |     border: 1px solid #ddb;
278 |     padding: 7px 7px 0 7px;
279 |     background-color: #ffe;
280 |     width: 40%;
281 |     float: right;
282 | }
283 | 
284 | p.sidebar-title {
285 |     font-weight: bold;
286 | }
287 | 
288 | /* -- topics ---------------------------------------------------------------- */
289 | 
290 | div.topic {
291 |     border: 1px solid #ccc;
292 |     padding: 7px 7px 0 7px;
293 |     margin: 10px 0 10px 0;
294 | }
295 | 
296 | p.topic-title {
297 |     font-size: 1.1em;
298 |     font-weight: bold;
299 |     margin-top: 10px;
300 | }
301 | 
302 | /* -- admonitions ----------------------------------------------------------- */
303 | 
304 | div.admonition {
305 |     margin-top: 10px;
306 |     margin-bottom: 10px;
307 |     padding: 7px;
308 | }
309 | 
310 | div.admonition dt {
311 |     font-weight: bold;
312 | }
313 | 
314 | div.admonition dl {
315 |     margin-bottom: 0;
316 | }
317 | 
318 | p.admonition-title {
319 |     margin: 0px 10px 5px 0px;
320 |     font-weight: bold;
321 | }
322 | 
323 | div.body p.centered {
324 |     text-align: center;
325 |     margin-top: 25px;
326 | }
327 | 
328 | /* -- tables ---------------------------------------------------------------- */
329 | 
330 | table.docutils {
331 |     border: 0;
332 |     border-collapse: collapse;
333 | }
334 | 
335 | table caption span.caption-number {
336 |     font-style: italic;
337 | }
338 | 
339 | table caption span.caption-text {
340 | }
341 | 
342 | table.docutils td, table.docutils th {
343 |     padding: 1px 8px 1px 5px;
344 |     border-top: 0;
345 |     border-left: 0;
346 |     border-right: 0;
347 |     border-bottom: 1px solid #aaa;
348 | }
349 | 
350 | table.footnote td, table.footnote th {
351 |     border: 0 !important;
352 | }
353 | 
354 | th {
355 |     text-align: left;
356 |     padding-right: 5px;
357 | }
358 | 
359 | table.citation {
360 |     border-left: solid 1px gray;
361 |     margin-left: 1px;
362 | }
363 | 
364 | table.citation td {
365 |     border-bottom: none;
366 | }
367 | 
368 | /* -- figures --------------------------------------------------------------- */
369 | 
370 | div.figure {
371 |     margin: 0.5em;
372 |     padding: 0.5em;
373 | }
374 | 
375 | div.figure p.caption {
376 |     padding: 0.3em;
377 | }
378 | 
379 | div.figure p.caption span.caption-number {
380 |     font-style: italic;
381 | }
382 | 
383 | div.figure p.caption span.caption-text {
384 | }
385 | 
386 | /* -- field list styles ----------------------------------------------------- */
387 | 
388 | table.field-list td, table.field-list th {
389 |     border: 0 !important;
390 | }
391 | 
392 | .field-list ul {
393 |     margin: 0;
394 |     padding-left: 1em;
395 | }
396 | 
397 | .field-list p {
398 |     margin: 0;
399 | }
400 | 
401 | /* -- other body styles ----------------------------------------------------- */
402 | 
403 | ol.arabic {
404 |     list-style: decimal;
405 | }
406 | 
407 | ol.loweralpha {
408 |     list-style: lower-alpha;
409 | }
410 | 
411 | ol.upperalpha {
412 |     list-style: upper-alpha;
413 | }
414 | 
415 | ol.lowerroman {
416 |     list-style: lower-roman;
417 | }
418 | 
419 | ol.upperroman {
420 |     list-style: upper-roman;
421 | }
422 | 
423 | dl {
424 |     margin-bottom: 15px;
425 | }
426 | 
427 | dd p {
428 |     margin-top: 0px;
429 | }
430 | 
431 | dd ul, dd table {
432 |     margin-bottom: 10px;
433 | }
434 | 
435 | dd {
436 |     margin-top: 3px;
437 |     margin-bottom: 10px;
438 |     margin-left: 30px;
439 | }
440 | 
441 | dt:target, .highlighted {
442 |     background-color: #fbe54e;
443 | }
444 | 
445 | dl.glossary dt {
446 |     font-weight: bold;
447 |     font-size: 1.1em;
448 | }
449 | 
450 | .optional {
451 |     font-size: 1.3em;
452 | }
453 | 
454 | .sig-paren {
455 |     font-size: larger;
456 | }
457 | 
458 | .versionmodified {
459 |     font-style: italic;
460 | }
461 | 
462 | .system-message {
463 |     background-color: #fda;
464 |     padding: 5px;
465 |     border: 3px solid red;
466 | }
467 | 
468 | .footnote:target  {
469 |     background-color: #ffa;
470 | }
471 | 
472 | .line-block {
473 |     display: block;
474 |     margin-top: 1em;
475 |     margin-bottom: 1em;
476 | }
477 | 
478 | .line-block .line-block {
479 |     margin-top: 0;
480 |     margin-bottom: 0;
481 |     margin-left: 1.5em;
482 | }
483 | 
484 | .guilabel, .menuselection {
485 |     font-family: sans-serif;
486 | }
487 | 
488 | .accelerator {
489 |     text-decoration: underline;
490 | }
491 | 
492 | .classifier {
493 |     font-style: oblique;
494 | }
495 | 
496 | abbr, acronym {
497 |     border-bottom: dotted 1px;
498 |     cursor: help;
499 | }
500 | 
501 | /* -- code displays --------------------------------------------------------- */
502 | 
503 | pre {
504 |     overflow: auto;
505 |     overflow-y: hidden;  /* fixes display issues on Chrome browsers */
506 | }
507 | 
508 | span.pre {
509 |     -moz-hyphens: none;
510 |     -ms-hyphens: none;
511 |     -webkit-hyphens: none;
512 |     hyphens: none;
513 | }
514 | 
515 | td.linenos pre {
516 |     padding: 5px 0px;
517 |     border: 0;
518 |     background-color: transparent;
519 |     color: #aaa;
520 | }
521 | 
522 | table.highlighttable {
523 |     margin-left: 0.5em;
524 | }
525 | 
526 | table.highlighttable td {
527 |     padding: 0 0.5em 0 0.5em;
528 | }
529 | 
530 | div.code-block-caption {
531 |     padding: 2px 5px;
532 |     font-size: small;
533 | }
534 | 
535 | div.code-block-caption code {
536 |     background-color: transparent;
537 | }
538 | 
539 | div.code-block-caption + div > div.highlight > pre {
540 |     margin-top: 0;
541 | }
542 | 
543 | div.code-block-caption span.caption-number {
544 |     padding: 0.1em 0.3em;
545 |     font-style: italic;
546 | }
547 | 
548 | div.code-block-caption span.caption-text {
549 | }
550 | 
551 | div.literal-block-wrapper {
552 |     padding: 1em 1em 0;
553 | }
554 | 
555 | div.literal-block-wrapper div.highlight {
556 |     margin: 0;
557 | }
558 | 
559 | code.descname {
560 |     background-color: transparent;
561 |     font-weight: bold;
562 |     font-size: 1.2em;
563 | }
564 | 
565 | code.descclassname {
566 |     background-color: transparent;
567 | }
568 | 
569 | code.xref, a code {
570 |     background-color: transparent;
571 |     font-weight: bold;
572 | }
573 | 
574 | h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
575 |     background-color: transparent;
576 | }
577 | 
578 | .viewcode-link {
579 |     float: right;
580 | }
581 | 
582 | .viewcode-back {
583 |     float: right;
584 |     font-family: sans-serif;
585 | }
586 | 
587 | div.viewcode-block:target {
588 |     margin: -1px -10px;
589 |     padding: 0 10px;
590 | }
591 | 
592 | /* -- math display ---------------------------------------------------------- */
593 | 
594 | img.math {
595 |     vertical-align: middle;
596 | }
597 | 
598 | div.body div.math p {
599 |     text-align: center;
600 | }
601 | 
602 | span.eqno {
603 |     float: right;
604 | }
605 | 
606 | span.eqno a.headerlink {
607 |     position: relative;
608 |     left: 0px;
609 |     z-index: 1;
610 | }
611 | 
612 | div.math:hover a.headerlink {
613 |     visibility: visible;
614 | }
615 | 
616 | /* -- printout stylesheet --------------------------------------------------- */
617 | 
618 | @media print {
619 |     div.document,
620 |     div.documentwrapper,
621 |     div.bodywrapper {
622 |         margin: 0 !important;
623 |         width: 100%;
624 |     }
625 | 
626 |     div.sphinxsidebar,
627 |     div.related,
628 |     div.footer,
629 |     #top-link {
630 |         display: none;
631 |     }
632 | }


--------------------------------------------------------------------------------
/pyspark_dist_explore/tests/test_pyspark_dist_explore.py:
--------------------------------------------------------------------------------
  1 | import findspark
  2 | findspark.init('/media/chris/data/spark-2.4.0-bin-hadoop2.7/')
  3 | 
  4 | import pyspark.sql.functions as F
  5 | import sparktestingbase.sqltestcase
  6 | import pandas as pd
  7 | import unittest
  8 | import math
  9 | from pyspark.sql import Row
 10 | from unittest import mock
 11 | 
 12 | import sys
 13 | sys.path.append('../' )
 14 | from pyspark_dist_explore import Histogram
 15 | from pyspark_dist_explore.pyspark_dist_explore import create_histogram_object
 16 | 
 17 | 
 18 | class HistogramTest(sparktestingbase.sqltestcase.SQLTestCase):
 19 |     def test_init_default(self):
 20 |         """Should set default settings when no arguments are given"""
 21 |         hist = Histogram()
 22 |         self.assertIsNone(hist.min_value)
 23 |         self.assertIsNone(hist.max_value)
 24 |         self.assertEqual(10, hist.nr_bins)
 25 |         self.assertEqual(0, len(hist.bin_boundaries))
 26 |         self.assertEqual(0, len(hist.hist_dict))
 27 |         self.assertEqual(0, len(hist.col_list))
 28 |         self.assertFalse(hist.is_build)
 29 | 
 30 |     def test_init_non_default(self):
 31 |         """"Should set min bin, max bin, and number of bins"""
 32 |         hist = Histogram(bins=10, range=(5, 8))
 33 |         self.assertEqual(10, hist.nr_bins)
 34 |         self.assertEqual(5, hist.min_value)
 35 |         self.assertEqual(8, hist.max_value)
 36 |         self.assertEqual(0, len(hist.bin_boundaries))
 37 | 
 38 |     def test_init_bins_given(self):
 39 |         """"Should set the list of bins when given in the constructor,
 40 |         bins are converted to float"""
 41 |         hist = Histogram(bins=[1, 2, '3'])
 42 |         self.assertListEqual([1, 2, 3], hist.bin_boundaries)
 43 | 
 44 |     def create_test_df(self):
 45 |         test_list = [(1, 2), (2, 3), (3, 4)]
 46 |         rdd = self.sc.parallelize(test_list)
 47 |         rdd_f = rdd.map(lambda x: Row(value=x[0], value2=x[1]))
 48 |         return self.sqlCtx.createDataFrame(rdd_f)
 49 | 
 50 |     def test_add_column(self):
 51 |         """"Should add a column name, column tuple to the col_list when a single column data frame is given"""
 52 |         hist = Histogram(bins=10)
 53 |         test_df = self.create_test_df()
 54 |         hist.add_column(test_df.select(F.col('value')))
 55 |         self.assertEqual(1, len(hist.col_list))
 56 |         self.assertEqual('value', hist.col_list[0][1])
 57 |         self.assertDataFrameEqual(test_df.select(F.col('value')), hist.col_list[0][0])
 58 | 
 59 |     def test_add_column_more_then_1_column_in_dataframe(self):
 60 |         """"Should throw an error when the input data frame contains more then one column"""
 61 |         hist = Histogram(bins=10)
 62 |         test_df = self.create_test_df()
 63 |         with self.assertRaises(ValueError):
 64 |             hist.add_column(test_df)
 65 | 
 66 |     def test_add_column_non_numeric(self):
 67 |         """Should raise an ValueError if a non-numeric column is added"""
 68 |         test_list = ['a', 'b']
 69 |         rdd = self.sc.parallelize(test_list)
 70 |         rdd_f = rdd.map(lambda x: Row(value=x))
 71 |         spark_df = self.sqlCtx.createDataFrame(rdd_f)
 72 |         hist = Histogram()
 73 |         with self.assertRaises(ValueError):
 74 |             hist.add_column(spark_df)
 75 | 
 76 |     def test_add_multiple_columns(self):
 77 |         """Adds new items to the col_list when new items are added"""
 78 |         hist = Histogram(bins=10)
 79 |         test_df = self.create_test_df()
 80 |         hist.add_column(test_df.select(F.col('value')))
 81 |         hist.add_column(test_df.select(F.col('value2')))
 82 |         self.assertEqual(2, len(hist.col_list))
 83 |         self.assertEqual('value', hist.col_list[0][1])
 84 |         self.assertDataFrameEqual(test_df.select(F.col('value')), hist.col_list[0][0])
 85 |         self.assertEqual('value2', hist.col_list[1][1])
 86 |         self.assertDataFrameEqual(test_df.select(F.col('value2')), hist.col_list[1][0])
 87 | 
 88 |     def test_get_min_value(self):
 89 |         """Should return the minimum value over all columns in a Histogram"""
 90 |         hist = Histogram(bins=10)
 91 |         test_df = self.create_test_df()
 92 |         hist.add_column(test_df.select(F.col('value')))
 93 |         hist.add_column(test_df.select(F.col('value2')))
 94 |         self.assertEqual(1, hist._get_min_value())
 95 | 
 96 |     def test_get_max_value(self):
 97 |         """Should return the maximum value over all columns in a Histogram"""
 98 |         hist = Histogram(bins=10)
 99 |         test_df = self.create_test_df()
100 |         hist.add_column(test_df.select(F.col('value')))
101 |         hist.add_column(test_df.select(F.col('value2')))
102 |         self.assertEqual(4, hist._get_max_value())
103 | 
104 |     def test_calculate_bins(self):
105 |         """Should return a list of evenly spaced bins between min and max bin if they are set"""
106 |         hist = Histogram(range=(5, 10), bins=2)
107 |         self.assertListEqual([5, 7.5, 10], hist._calculate_bins())
108 | 
109 |     def test_calculate_bins_bins_set(self):
110 |         """Should just return the list of bins edges when this was set in the constructor"""
111 |         hist = Histogram(bins=[1, 2, 3])
112 |         self.assertListEqual([1, 2, 3], hist._calculate_bins())
113 | 
114 |     def test_calculate_bins_single_column(self):
115 |         """Should return the number of bins when there is only a single column, and no min and max is set"""
116 |         hist = Histogram(bins=5)
117 |         test_df = self.create_test_df()
118 |         hist.add_column(test_df.select(F.col('value')))
119 |         self.assertEqual(5, hist._calculate_bins())
120 | 
121 |     def test_calculate_bins_multiple_columns(self):
122 |         """Should return a list of evenly spaced bins between the smallest and highest value over all columns"""
123 |         hist = Histogram(bins=3)
124 |         test_df = self.create_test_df()   # The lowest value in this DF is 1, the highest is 4
125 |         hist.add_column(test_df.select(F.col('value')))
126 |         hist.add_column(test_df.select(F.col('value2')))
127 |         self.assertListEqual([1, 2, 3, 4], hist._calculate_bins())
128 | 
129 |     def test_add_hist_single_column(self):
130 |         """Should add a list of bin values (e.g. the number of values that fall in a bin) to the hist_dict, where
131 |         the key is the column name. If multiple columns have the same name a number is appended"""
132 |         hist = Histogram(bins=2)
133 |         test_df = self.create_test_df()
134 |         column_to_ad = test_df.select(F.col('value'))
135 |         hist.add_column(column_to_ad)
136 |         hist.bin_boundaries = hist._calculate_bins()
137 |         hist._add_hist(column_to_ad, 'value')
138 |         self.assertEqual(1, len(hist.hist_dict))
139 |         self.assertListEqual([1, 2], hist.hist_dict['value'])
140 | 
141 |     def test_add_hist_single_column_sets_bin_list(self):
142 |         """Should set the bin list if this is a single number"""
143 |         hist = Histogram(bins=2)
144 |         test_df = self.create_test_df()
145 |         column_to_ad = test_df.select(F.col('value'))
146 |         hist.add_column(column_to_ad)
147 |         hist.bin_boundaries = hist._calculate_bins()
148 |         hist._add_hist(column_to_ad, 'value')
149 |         self.assertEqual(3, len(hist.bin_boundaries))
150 | 
151 |     def test_add_hist_multiple_column(self):
152 |         """Should add a second list of bin values to the hist_dict"""
153 |         hist = Histogram(bins=2)
154 |         test_df = self.create_test_df()
155 |         column_to_ad = test_df.select(F.col('value'))
156 |         column_to_ad_2 = test_df.select(F.col('value2'))
157 |         hist.add_column(column_to_ad)
158 |         hist.add_column(column_to_ad_2)
159 |         hist.bin_boundaries = hist._calculate_bins()
160 |         hist._add_hist(column_to_ad, 'value')
161 |         hist._add_hist(column_to_ad_2, 'value2')
162 |         self.assertEqual(2, len(hist.hist_dict))
163 |         self.assertListEqual([1, 2], hist.hist_dict['value2'])
164 | 
165 |     def test_add_hist_multiple_column_rename_column(self):
166 |         """Should rename the column name if the same column name is added"""
167 |         hist = Histogram(bins=2)
168 |         test_df = self.create_test_df()
169 |         column_to_ad = test_df.select(F.col('value'))
170 |         column_to_ad_2 = test_df.select(F.col('value'))
171 |         hist.add_column(column_to_ad)
172 |         hist.add_column(column_to_ad_2)
173 |         hist.bin_boundaries = hist._calculate_bins()
174 |         hist._add_hist(column_to_ad, 'value')
175 |         hist._add_hist(column_to_ad_2, 'value')
176 |         self.assertEqual(2, len(hist.hist_dict))
177 |         self.assertTrue('value (1)' in hist.hist_dict)
178 | 
179 |     def test_add_hist_single_value(self):
180 |         """Should set the bin list to n (self.nr_bins) bins (n+1 bin borders) where the min bin border is the
181 |         single value -0.5 and the max bin border is the single value +0.5 incase a column is input with only a
182 |         single value"""
183 |         single_column_value = 1
184 |         nr_bins = 5
185 |         column_values = [single_column_value] * 100
186 |         test_df = self.sqlCtx.createDataFrame(pd.DataFrame({'foo': column_values}))
187 |         hist = Histogram(bins=nr_bins)
188 |         hist.add_column(test_df.select(F.col('foo')))
189 |         hist.build()
190 |         self.assertEqual(6, len(hist.bin_boundaries))
191 |         self.assertEqual(single_column_value - 0.5, min(hist.bin_boundaries))
192 |         self.assertEqual(single_column_value + 0.5, max(hist.bin_boundaries))
193 |         self.assertEqual(len(column_values), hist.hist_dict['foo'][math.floor(nr_bins/2)])
194 | 
195 |     def test_build(self):
196 |         """Should calculate the bin list, and hist values for each column in the Histogram, if the
197 |         histogram hasn't been build before"""
198 |         hist = Histogram(bins=2)
199 |         test_df = self.create_test_df()
200 |         column_to_ad = test_df.select(F.col('value'))
201 |         column_to_ad_2 = test_df.select(F.col('value2'))
202 |         hist.add_column(column_to_ad)
203 |         hist.add_column(column_to_ad_2)
204 |         hist.build()
205 |         self.assertEqual(3, len(hist.bin_boundaries))
206 |         self.assertEqual(2, len(hist.hist_dict))
207 |         self.assertTrue(hist.is_build)
208 | 
209 |     @mock.patch('pyspark_dist_explore.Histogram._add_hist')
210 |     @mock.patch('pyspark_dist_explore.Histogram._calculate_bins')
211 |     def test_build_already_build(self, calculate_bins_func, add_hist_func):
212 |         """Should not rebuild if Histogram was already build before"""
213 |         hist = Histogram()
214 |         hist.is_build = True
215 |         hist.build()
216 |         self.assertFalse(add_hist_func.called)
217 |         self.assertFalse(calculate_bins_func.called)
218 | 
219 |     def test_to_pandas_default(self):
220 |         """Should create a pandas dataframe from the Histogram object"""
221 |         hist = Histogram(bins=2)
222 |         test_df = self.create_test_df()
223 |         column_to_ad = test_df.select(F.col('value'))
224 |         column_to_ad_2 = test_df.select(F.col('value2'))
225 |         hist.add_column(column_to_ad)
226 |         hist.add_column(column_to_ad_2)
227 |         expected_df = pd.DataFrame({'value': [2, 1],
228 |                                     'value2': [1, 2]}).set_index([['1.00 - 2.50', '2.50 - 4.00']])
229 |         self.assertTrue(expected_df.equals(hist.to_pandas()))
230 | 
231 |     def test_to_pandas_density(self):
232 |         """Should create a pandas dataframe of a denisty plot of the histogram"""
233 |         hist = Histogram(bins=2)
234 |         test_df = self.create_test_df()
235 |         column_to_ad = test_df.select(F.col('value'))
236 |         column_to_ad_2 = test_df.select(F.col('value2'))
237 |         hist.add_column(column_to_ad)
238 |         hist.add_column(column_to_ad_2)
239 |         expected_df = pd.DataFrame({'value': [1.0, 0.5], 'value2': [0.5, 1.0]}).set_index([[1.75, 3.25]])
240 |         self.assertTrue(expected_df.equals(hist.to_pandas('density')))
241 | 
242 |     def test_add_data_single_column(self):
243 |         """Should add a single column of data to the Histogram"""
244 |         hist = Histogram()
245 |         test_df = self.create_test_df()
246 |         column_to_ad = test_df.select(F.col('value'))
247 |         hist.add_data(column_to_ad)
248 |         self.assertEqual(1, len(hist.col_list))
249 | 
250 |     def test_add_data_list_of_columns(self):
251 |         """Should add all columns from the list of columns to the Histogram"""
252 |         test_df = self.create_test_df()
253 |         column_to_ad = test_df.select(F.col('value'))
254 |         column_to_ad_2 = test_df.select(F.col('value2'))
255 |         hist = Histogram()
256 |         hist.add_data([column_to_ad, column_to_ad_2])
257 |         self.assertEqual(2, len(hist.col_list))
258 | 
259 |     def test_add_data_entire_dataframe(self):
260 |         """Should add all columns of a dataframe to the histogram"""
261 |         test_df = self.create_test_df()
262 |         hist = Histogram()
263 |         hist.add_data(test_df)
264 |         self.assertEqual(2, len(hist.col_list))
265 | 
266 | 
267 | class FunctionsTest(unittest.TestCase):
268 |     def test_create_histogram_object_default(self):
269 |         """Should return an histogram object with default settings"""
270 |         test_hist = create_histogram_object(dict())
271 |         self.assertEqual(10, test_hist.nr_bins)
272 |         self.assertIsNone(test_hist.min_value)
273 |         self.assertIsNone(test_hist.max_value)
274 | 
275 |     def test_create_histogram_object_non_default(self):
276 |         """Should return an histogram object with 'bins' and 'range' set"""
277 |         test_kwargs = dict(bins=11, range=(10, 20))
278 |         test_hist = create_histogram_object(test_kwargs)
279 |         self.assertEqual(11, test_hist.nr_bins)
280 |         self.assertEqual(10, test_hist.min_value)
281 |         self.assertEqual(20, test_hist.max_value)
282 | 
283 | 
284 | if __name__ == "__main__":
285 |     unittest.main()
286 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/alabaster.css:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 | 
  8 | 
  9 | 
 10 | 
 11 | 
 12 | 
 13 | 
 14 | 
 15 | 
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | 
 22 | 
 23 | 
 24 | 
 25 | 
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | 
 40 | 
 41 | 
 42 | 
 43 | 
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | 
 53 | @import url("basic.css");
 54 | 
 55 | /* -- page layout ----------------------------------------------------------- */
 56 | 
 57 | body {
 58 |     font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
 59 |     font-size: 17px;
 60 |     background-color: #fff;
 61 |     color: #000;
 62 |     margin: 0;
 63 |     padding: 0;
 64 | }
 65 | 
 66 | 
 67 | div.document {
 68 |     width: 940px;
 69 |     margin: 30px auto 0 auto;
 70 | }
 71 | 
 72 | div.documentwrapper {
 73 |     float: left;
 74 |     width: 100%;
 75 | }
 76 | 
 77 | div.bodywrapper {
 78 |     margin: 0 0 0 220px;
 79 | }
 80 | 
 81 | div.sphinxsidebar {
 82 |     width: 220px;
 83 |     font-size: 14px;
 84 |     line-height: 1.5;
 85 | }
 86 | 
 87 | hr {
 88 |     border: 1px solid #B1B4B6;
 89 | }
 90 | 
 91 | div.body {
 92 |     background-color: #fff;
 93 |     color: #3E4349;
 94 |     padding: 0 30px 0 30px;
 95 | }
 96 | 
 97 | div.body > .section {
 98 |     text-align: left;
 99 | }
100 | 
101 | div.footer {
102 |     width: 940px;
103 |     margin: 20px auto 30px auto;
104 |     font-size: 14px;
105 |     color: #888;
106 |     text-align: right;
107 | }
108 | 
109 | div.footer a {
110 |     color: #888;
111 | }
112 | 
113 | p.caption {
114 |     font-family: inherit;
115 |     font-size: inherit;
116 | }
117 | 
118 | 
119 | div.relations {
120 |     display: none;
121 | }
122 | 
123 | 
124 | div.sphinxsidebar a {
125 |     color: #444;
126 |     text-decoration: none;
127 |     border-bottom: 1px dotted #999;
128 | }
129 | 
130 | div.sphinxsidebar a:hover {
131 |     border-bottom: 1px solid #999;
132 | }
133 | 
134 | div.sphinxsidebarwrapper {
135 |     padding: 18px 10px;
136 | }
137 | 
138 | div.sphinxsidebarwrapper p.logo {
139 |     padding: 0;
140 |     margin: -10px 0 0 0px;
141 |     text-align: center;
142 | }
143 | 
144 | div.sphinxsidebarwrapper h1.logo {
145 |     margin-top: -10px;
146 |     text-align: center;
147 |     margin-bottom: 5px;
148 |     text-align: left;
149 | }
150 | 
151 | div.sphinxsidebarwrapper h1.logo-name {
152 |     margin-top: 0px;
153 | }
154 | 
155 | div.sphinxsidebarwrapper p.blurb {
156 |     margin-top: 0;
157 |     font-style: normal;
158 | }
159 | 
160 | div.sphinxsidebar h3,
161 | div.sphinxsidebar h4 {
162 |     font-family: 'Garamond', 'Georgia', serif;
163 |     color: #444;
164 |     font-size: 24px;
165 |     font-weight: normal;
166 |     margin: 0 0 5px 0;
167 |     padding: 0;
168 | }
169 | 
170 | div.sphinxsidebar h4 {
171 |     font-size: 20px;
172 | }
173 | 
174 | div.sphinxsidebar h3 a {
175 |     color: #444;
176 | }
177 | 
178 | div.sphinxsidebar p.logo a,
179 | div.sphinxsidebar h3 a,
180 | div.sphinxsidebar p.logo a:hover,
181 | div.sphinxsidebar h3 a:hover {
182 |     border: none;
183 | }
184 | 
185 | div.sphinxsidebar p {
186 |     color: #555;
187 |     margin: 10px 0;
188 | }
189 | 
190 | div.sphinxsidebar ul {
191 |     margin: 10px 0;
192 |     padding: 0;
193 |     color: #000;
194 | }
195 | 
196 | div.sphinxsidebar ul li.toctree-l1 > a {
197 |     font-size: 120%;
198 | }
199 | 
200 | div.sphinxsidebar ul li.toctree-l2 > a {
201 |     font-size: 110%;
202 | }
203 | 
204 | div.sphinxsidebar input {
205 |     border: 1px solid #CCC;
206 |     font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
207 |     font-size: 1em;
208 | }
209 | 
210 | div.sphinxsidebar hr {
211 |     border: none;
212 |     height: 1px;
213 |     color: #AAA;
214 |     background: #AAA;
215 | 
216 |     text-align: left;
217 |     margin-left: 0;
218 |     width: 50%;
219 | }
220 | 
221 | /* -- body styles ----------------------------------------------------------- */
222 | 
223 | a {
224 |     color: #004B6B;
225 |     text-decoration: underline;
226 | }
227 | 
228 | a:hover {
229 |     color: #6D4100;
230 |     text-decoration: underline;
231 | }
232 | 
233 | div.body h1,
234 | div.body h2,
235 | div.body h3,
236 | div.body h4,
237 | div.body h5,
238 | div.body h6 {
239 |     font-family: 'Garamond', 'Georgia', serif;
240 |     font-weight: normal;
241 |     margin: 30px 0px 10px 0px;
242 |     padding: 0;
243 | }
244 | 
245 | div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; }
246 | div.body h2 { font-size: 180%; }
247 | div.body h3 { font-size: 150%; }
248 | div.body h4 { font-size: 130%; }
249 | div.body h5 { font-size: 100%; }
250 | div.body h6 { font-size: 100%; }
251 | 
252 | a.headerlink {
253 |     color: #DDD;
254 |     padding: 0 4px;
255 |     text-decoration: none;
256 | }
257 | 
258 | a.headerlink:hover {
259 |     color: #444;
260 |     background: #EAEAEA;
261 | }
262 | 
263 | div.body p, div.body dd, div.body li {
264 |     line-height: 1.4em;
265 | }
266 | 
267 | div.admonition {
268 |     margin: 20px 0px;
269 |     padding: 10px 30px;
270 |     background-color: #EEE;
271 |     border: 1px solid #CCC;
272 | }
273 | 
274 | div.admonition tt.xref, div.admonition code.xref, div.admonition a tt {
275 |     background-color: #FBFBFB;
276 |     border-bottom: 1px solid #fafafa;
277 | }
278 | 
279 | div.admonition p.admonition-title {
280 |     font-family: 'Garamond', 'Georgia', serif;
281 |     font-weight: normal;
282 |     font-size: 24px;
283 |     margin: 0 0 10px 0;
284 |     padding: 0;
285 |     line-height: 1;
286 | }
287 | 
288 | div.admonition p.last {
289 |     margin-bottom: 0;
290 | }
291 | 
292 | div.highlight {
293 |     background-color: #fff;
294 | }
295 | 
296 | dt:target, .highlight {
297 |     background: #FAF3E8;
298 | }
299 | 
300 | div.warning {
301 |     background-color: #FCC;
302 |     border: 1px solid #FAA;
303 | }
304 | 
305 | div.danger {
306 |     background-color: #FCC;
307 |     border: 1px solid #FAA;
308 |     -moz-box-shadow: 2px 2px 4px #D52C2C;
309 |     -webkit-box-shadow: 2px 2px 4px #D52C2C;
310 |     box-shadow: 2px 2px 4px #D52C2C;
311 | }
312 | 
313 | div.error {
314 |     background-color: #FCC;
315 |     border: 1px solid #FAA;
316 |     -moz-box-shadow: 2px 2px 4px #D52C2C;
317 |     -webkit-box-shadow: 2px 2px 4px #D52C2C;
318 |     box-shadow: 2px 2px 4px #D52C2C;
319 | }
320 | 
321 | div.caution {
322 |     background-color: #FCC;
323 |     border: 1px solid #FAA;
324 | }
325 | 
326 | div.attention {
327 |     background-color: #FCC;
328 |     border: 1px solid #FAA;
329 | }
330 | 
331 | div.important {
332 |     background-color: #EEE;
333 |     border: 1px solid #CCC;
334 | }
335 | 
336 | div.note {
337 |     background-color: #EEE;
338 |     border: 1px solid #CCC;
339 | }
340 | 
341 | div.tip {
342 |     background-color: #EEE;
343 |     border: 1px solid #CCC;
344 | }
345 | 
346 | div.hint {
347 |     background-color: #EEE;
348 |     border: 1px solid #CCC;
349 | }
350 | 
351 | div.seealso {
352 |     background-color: #EEE;
353 |     border: 1px solid #CCC;
354 | }
355 | 
356 | div.topic {
357 |     background-color: #EEE;
358 | }
359 | 
360 | p.admonition-title {
361 |     display: inline;
362 | }
363 | 
364 | p.admonition-title:after {
365 |     content: ":";
366 | }
367 | 
368 | pre, tt, code {
369 |     font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
370 |     font-size: 0.9em;
371 | }
372 | 
373 | .hll {
374 |     background-color: #FFC;
375 |     margin: 0 -12px;
376 |     padding: 0 12px;
377 |     display: block;
378 | }
379 | 
380 | img.screenshot {
381 | }
382 | 
383 | tt.descname, tt.descclassname, code.descname, code.descclassname {
384 |     font-size: 0.95em;
385 | }
386 | 
387 | tt.descname, code.descname {
388 |     padding-right: 0.08em;
389 | }
390 | 
391 | img.screenshot {
392 |     -moz-box-shadow: 2px 2px 4px #EEE;
393 |     -webkit-box-shadow: 2px 2px 4px #EEE;
394 |     box-shadow: 2px 2px 4px #EEE;
395 | }
396 | 
397 | table.docutils {
398 |     border: 1px solid #888;
399 |     -moz-box-shadow: 2px 2px 4px #EEE;
400 |     -webkit-box-shadow: 2px 2px 4px #EEE;
401 |     box-shadow: 2px 2px 4px #EEE;
402 | }
403 | 
404 | table.docutils td, table.docutils th {
405 |     border: 1px solid #888;
406 |     padding: 0.25em 0.7em;
407 | }
408 | 
409 | table.field-list, table.footnote {
410 |     border: none;
411 |     -moz-box-shadow: none;
412 |     -webkit-box-shadow: none;
413 |     box-shadow: none;
414 | }
415 | 
416 | table.footnote {
417 |     margin: 15px 0;
418 |     width: 100%;
419 |     border: 1px solid #EEE;
420 |     background: #FDFDFD;
421 |     font-size: 0.9em;
422 | }
423 | 
424 | table.footnote + table.footnote {
425 |     margin-top: -15px;
426 |     border-top: none;
427 | }
428 | 
429 | table.field-list th {
430 |     padding: 0 0.8em 0 0;
431 | }
432 | 
433 | table.field-list td {
434 |     padding: 0;
435 | }
436 | 
437 | table.field-list p {
438 |     margin-bottom: 0.8em;
439 | }
440 | 
441 | /* Cloned from
442 |  * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68
443 |  */
444 | .field-name {
445 |     -moz-hyphens: manual;
446 |     -ms-hyphens: manual;
447 |     -webkit-hyphens: manual;
448 |     hyphens: manual;
449 | }
450 | 
451 | table.footnote td.label {
452 |     width: .1px;
453 |     padding: 0.3em 0 0.3em 0.5em;
454 | }
455 | 
456 | table.footnote td {
457 |     padding: 0.3em 0.5em;
458 | }
459 | 
460 | dl {
461 |     margin: 0;
462 |     padding: 0;
463 | }
464 | 
465 | dl dd {
466 |     margin-left: 30px;
467 | }
468 | 
469 | blockquote {
470 |     margin: 0 0 0 30px;
471 |     padding: 0;
472 | }
473 | 
474 | ul, ol {
475 |     /* Matches the 30px from the narrow-screen "li > ul" selector below */
476 |     margin: 10px 0 10px 30px;
477 |     padding: 0;
478 | }
479 | 
480 | pre {
481 |     background: #EEE;
482 |     padding: 7px 30px;
483 |     margin: 15px 0px;
484 |     line-height: 1.3em;
485 | }
486 | 
487 | div.viewcode-block:target {
488 |     background: #ffd;
489 | }
490 | 
491 | dl pre, blockquote pre, li pre {
492 |     margin-left: 0;
493 |     padding-left: 30px;
494 | }
495 | 
496 | tt, code {
497 |     background-color: #ecf0f3;
498 |     color: #222;
499 |     /* padding: 1px 2px; */
500 | }
501 | 
502 | tt.xref, code.xref, a tt {
503 |     background-color: #FBFBFB;
504 |     border-bottom: 1px solid #fff;
505 | }
506 | 
507 | a.reference {
508 |     text-decoration: none;
509 |     border-bottom: 1px dotted #004B6B;
510 | }
511 | 
512 | /* Don't put an underline on images */
513 | a.image-reference, a.image-reference:hover {
514 |     border-bottom: none;
515 | }
516 | 
517 | a.reference:hover {
518 |     border-bottom: 1px solid #6D4100;
519 | }
520 | 
521 | a.footnote-reference {
522 |     text-decoration: none;
523 |     font-size: 0.7em;
524 |     vertical-align: top;
525 |     border-bottom: 1px dotted #004B6B;
526 | }
527 | 
528 | a.footnote-reference:hover {
529 |     border-bottom: 1px solid #6D4100;
530 | }
531 | 
532 | a:hover tt, a:hover code {
533 |     background: #EEE;
534 | }
535 | 
536 | 
537 | @media screen and (max-width: 870px) {
538 | 
539 |     div.sphinxsidebar {
540 |     	display: none;
541 |     }
542 | 
543 |     div.document {
544 |        width: 100%;
545 | 
546 |     }
547 | 
548 |     div.documentwrapper {
549 |     	margin-left: 0;
550 |     	margin-top: 0;
551 |     	margin-right: 0;
552 |     	margin-bottom: 0;
553 |     }
554 | 
555 |     div.bodywrapper {
556 |     	margin-top: 0;
557 |     	margin-right: 0;
558 |     	margin-bottom: 0;
559 |     	margin-left: 0;
560 |     }
561 | 
562 |     ul {
563 |     	margin-left: 0;
564 |     }
565 | 
566 | 	li > ul {
567 |         /* Matches the 30px from the "ul, ol" selector above */
568 | 		margin-left: 30px;
569 | 	}
570 | 
571 |     .document {
572 |     	width: auto;
573 |     }
574 | 
575 |     .footer {
576 |     	width: auto;
577 |     }
578 | 
579 |     .bodywrapper {
580 |     	margin: 0;
581 |     }
582 | 
583 |     .footer {
584 |     	width: auto;
585 |     }
586 | 
587 |     .github {
588 |         display: none;
589 |     }
590 | 
591 | 
592 | 
593 | }
594 | 
595 | 
596 | 
597 | @media screen and (max-width: 875px) {
598 | 
599 |     body {
600 |         margin: 0;
601 |         padding: 20px 30px;
602 |     }
603 | 
604 |     div.documentwrapper {
605 |         float: none;
606 |         background: #fff;
607 |     }
608 | 
609 |     div.sphinxsidebar {
610 |         display: block;
611 |         float: none;
612 |         width: 102.5%;
613 |         margin: 50px -30px -20px -30px;
614 |         padding: 10px 20px;
615 |         background: #333;
616 |         color: #FFF;
617 |     }
618 | 
619 |     div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p,
620 |     div.sphinxsidebar h3 a {
621 |         color: #fff;
622 |     }
623 | 
624 |     div.sphinxsidebar a {
625 |         color: #AAA;
626 |     }
627 | 
628 |     div.sphinxsidebar p.logo {
629 |         display: none;
630 |     }
631 | 
632 |     div.document {
633 |         width: 100%;
634 |         margin: 0;
635 |     }
636 | 
637 |     div.footer {
638 |         display: none;
639 |     }
640 | 
641 |     div.bodywrapper {
642 |         margin: 0;
643 |     }
644 | 
645 |     div.body {
646 |         min-height: 0;
647 |         padding: 0;
648 |     }
649 | 
650 |     .rtd_doc_footer {
651 |         display: none;
652 |     }
653 | 
654 |     .document {
655 |         width: auto;
656 |     }
657 | 
658 |     .footer {
659 |         width: auto;
660 |     }
661 | 
662 |     .footer {
663 |         width: auto;
664 |     }
665 | 
666 |     .github {
667 |         display: none;
668 |     }
669 | }
670 | 
671 | 
672 | /* misc. */
673 | 
674 | .revsys-inline {
675 |     display: none!important;
676 | }
677 | 
678 | /* Make nested-list/multi-paragraph items look better in Releases changelog
679 |  * pages. Without this, docutils' magical list fuckery causes inconsistent
680 |  * formatting between different release sub-lists.
681 |  */
682 | div#changelog > div.section > ul > li > p:only-child {
683 |     margin-bottom: 0;
684 | }
685 | 
686 | /* Hide fugly table cell borders in ..bibliography:: directive output */
687 | table.docutils.citation, table.docutils.citation td, table.docutils.citation th {
688 |   border: none;
689 |   /* Below needed in some edge cases; if not applied, bottom shadows appear */
690 |   -moz-box-shadow: none;
691 |   -webkit-box-shadow: none;
692 |   box-shadow: none;
693 | }


--------------------------------------------------------------------------------
/pyspark_dist_explore/pyspark_dist_explore.py:
--------------------------------------------------------------------------------
  1 | from scipy.interpolate import interp1d
  2 | 
  3 | try:
  4 |     from pyspark.sql.types import NumericType
  5 | 
  6 |     import pyspark.sql.functions as F
  7 | except:
  8 |     pass
  9 | 
 10 | import pandas as pd
 11 | import numpy as np
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | from matplotlib.patches import Rectangle
 15 | 
 16 | 
 17 | def hist(axis, x, overlapping=False, formatted_yaxis=True, **kwargs):
 18 |     """Plots a histogram on an Axis object
 19 | 
 20 |     Args:
 21 |         :axis: (`Axes`)
 22 |             An matplotlib Axes object on which the histogram will be plot.
 23 |         :x: (`DataFrame` or `list` of `DataFrame`)
 24 |             A DataFrame with one or more numerical columns, or a list of single numerical column DataFrames
 25 |         :overlapping: (`bool`, optional)
 26 |             Generate overlapping histograms.
 27 | 
 28 |             If set to true, this will generate an overlapping plot.
 29 |             When set to False it will generate a normal grouped histogram. Defaults to False.
 30 |         :formatted_yaxis: (`bool`, optional)
 31 |             If set to true, the numbers on the yaxis will be formatted
 32 |             for better readability. E.g. 1500000 will become 1.5M. Defaults to True
 33 | 
 34 |         :\*\*kwargs:
 35 |             The keyword arguments as used in matplotlib.pyplot.hist
 36 | 
 37 |     Returns:
 38 |         :n: (`array` or `list` of `arrays`)
 39 |             The values of the histogram bins. See normed and weights for a description of the possible semantics.
 40 |             If input x is an array, then this is an array of length nbins. If input is a sequence arrays
 41 |             [data1, data2,..], then this is a list of arrays with the values of the histograms for each of the
 42 |             arrays in the same order.
 43 |         :bins: (`array`)
 44 |             The edges of the bins.
 45 |             Length nbins + 1 (nbins left edges and right edge of last bin). Always a single array even
 46 |             when multiple data sets are passed in.
 47 |         :patches: (`list` or `list` of `lists`)
 48 |             Silent list of individual patches used to create the histogram or list of such lists if multiple
 49 |             input datasets.
 50 | 
 51 |     """
 52 |     histogram = create_histogram_object(kwargs)
 53 |     histogram.add_data(x)
 54 |     return histogram.plot_hist(axis, overlapping, formatted_yaxis, **kwargs)
 55 | 
 56 | 
 57 | def distplot(axis, x, **kwargs):
 58 |     """Plots a normalised histogram and a density plot on an Axes object
 59 | 
 60 |     Args:
 61 |         :axis: (`Axes`)
 62 |             An matplotlib Axes object on which the histogram will be plot.
 63 |         :x: (`DataFrame` or `list` of `DataFrame`)
 64 |             A DataFrame with one or more numerical columns, or a list of single numerical column DataFrames
 65 |         :\*\*kwargs:
 66 |             The keyword arguments as used in matplotlib.pyplot.hist. Normed is set to True
 67 | 
 68 |     Returns:
 69 |         :n: (`array` or `list` of `arrays`)
 70 |             The values of the histogram bins. See normed and weights for a description of the possible semantics.
 71 |             If input x is an array, then this is an array of length nbins. If input is a sequence arrays
 72 |             [data1, data2,..], then this is a list of arrays with the values of the histograms for each of the
 73 |             arrays in the same order.
 74 |         :bins: (`array`)
 75 |             The edges of the bins.
 76 |             Length nbins + 1 (nbins left edges and right edge of last bin). Always a single array even
 77 |             when multiple data sets are passed in.
 78 |         :patches: (`list` or `list` of `lists`)
 79 |             Silent list of individual patches used to create the histogram or list of such lists if multiple
 80 |             input datasets.
 81 |     """
 82 |     histogram = create_histogram_object(kwargs)
 83 |     histogram.add_data(x)
 84 |     n, bins, patches = histogram.plot_hist(axis, density=True, **kwargs)
 85 | 
 86 |     # If working with a list of DataFrames as input, patches will be a list of lists with Rectangle objects
 87 |     # We will get the color of the first Rectangle object. If there is only one DataFrame patches is a single list
 88 |     # Of Rectangle objects
 89 |     if type(x) == list and len(x) > 1:
 90 |         colors = [patch[0].get_facecolor() for patch in patches]
 91 |     elif type(patches[0]) is Rectangle:
 92 |         colors = [patches[0].get_facecolor()]
 93 |     else:
 94 |         raise TypeError("Unexpected Patch Type. Expected Rectangle")
 95 | 
 96 |     histogram.plot_density(axis, color=colors)
 97 |     return n, bins, patches
 98 | 
 99 | 
100 | def pandas_histogram(x, bins=10, range=None):
101 |     """Returns a pandas DataFrame with histograms of the Spark DataFrame
102 | 
103 |     Bin ranges are formatted as text an put on the Index.
104 | 
105 |     Args:
106 |         :x: (`DataFrame` or `list` of `DataFrame`)
107 |             A DataFrame with one or more numerical columns, or a list of single numerical column DataFrames
108 |         :bins: (`integer` or `array_like`, optional)
109 |             If an integer is given, bins + 1 bin edges are returned, consistently with numpy.histogram() for
110 |             numpy version >= 1.3.
111 | 
112 |             Unequally spaced bins are supported if bins is a sequence.
113 | 
114 |             Default is 10
115 |         :range: (tuple or None, optional)
116 |             The lower and upper range of the bins. Lower and upper outliers are ignored.
117 |             If not provided, range is (x.min(), x.max()). Range has no effect if bins is a sequence.
118 | 
119 |             If bins is a sequence or range is specified, autoscaling is based on the specified bin range instead
120 |             of the range of x.
121 | 
122 |             Default is None
123 |     """
124 |     histogram = Histogram(bins=bins, range=range)
125 |     histogram.add_data(x)
126 |     return histogram.to_pandas()
127 | 
128 | 
129 | def create_histogram_object(kwargs):
130 |     bins = 10
131 |     b_range = None
132 | 
133 |     if 'bins' in kwargs:
134 |         bins = kwargs['bins']
135 |         del kwargs['bins']
136 | 
137 |     if 'range' in kwargs:
138 |         b_range = kwargs['range']
139 |         del kwargs['range']
140 | 
141 |     return Histogram(bins=bins, range=b_range)
142 | 
143 | 
144 | class Histogram(object):
145 |     """The Histogram object leverages Spark to calculate histograms, and matplotlib to visualize these.
146 | 
147 |     Args:
148 |         :range: (`tuple`, optional)
149 |             The lower and upper range of the bins.
150 | 
151 |             Lower and upper outliers are ignored. If not provided, range is (min(x), max(x)). Range has no
152 |             effect if bins is a sequence. If bins is a sequence or range is specified, autoscaling is
153 |             based on the specified bin range instead of the range of x.
154 |         :bins: (`int` or `list` of `str` or `list of `int`, optional)
155 |             If an integer is given: Number of bins in the histogram.
156 | 
157 |             Defaults to 10.
158 | 
159 |             If a list is given: Predefined list of bin boundaries.
160 | 
161 |             The bins are all open to the right except for the last which is closed. e.g. [1,10,20,50] means
162 |             the buckets are [1,10) [10,20) [20,50], which means 1<=x<10, 10<=x<20, 20<=x<=50.
163 | 
164 |     """
165 |     def __init__(self, bins=10, range=None):
166 |         self.col_list = []
167 |         self.bin_boundaries = []
168 |         self.hist_dict = {}  # column names: bin weight lists pairs
169 |         self.nr_bins = None
170 |         self.min_value = None
171 |         self.max_value = None
172 |         self.is_build = False
173 | 
174 |         if isinstance(bins, list):
175 |             self.bin_boundaries = [float(bin_border) for bin_border in bins]
176 |         else:
177 |             self.nr_bins = bins
178 | 
179 |         if range is not None:
180 |             self.min_value = range[0]
181 |             self.max_value = range[1]
182 | 
183 |     def add_column(self, table):
184 |         """Add single column DataFrame to the histogram object.
185 | 
186 |         If multiple columns share the same name, a (n) will be appended to the name, where n is
187 |         the next available number.
188 | 
189 |         Args:
190 |             :table: (:obj:`dataframe`)
191 |                 A PySpark DataFrame with a single column
192 | 
193 |         """
194 |         if len(table.columns) > 1:
195 |             raise ValueError('More then one column is being added, use add_data() to add multi-column DataFrames')
196 | 
197 |         column_name = table.columns[0]
198 | 
199 |         if not isinstance(table.schema.fields[0].dataType, NumericType):
200 |             raise ValueError('Column %s has a non-numeric type (%s), only numeric types are supported'
201 |                              % (column_name, str(table.schema.fields[0].dataType)))
202 | 
203 |         self.col_list.append((table, column_name))
204 | 
205 |     def _get_bin_centers(self):
206 |         result = []
207 |         for i in range(len(self.bin_boundaries) - 1):
208 |             result.append(((self.bin_boundaries[i + 1] - self.bin_boundaries[i]) / 2) + self.bin_boundaries[i])
209 |         return result
210 | 
211 |     def _get_col_names(self):
212 |         new_col_names = []
213 |         for i in range(len(self.bin_boundaries) - 1):
214 |             new_col_names.append('%.2f - %.2f' % (self.bin_boundaries[i], self.bin_boundaries[i + 1]))
215 |         return new_col_names
216 | 
217 |     def _check_col_name(self, column_name):
218 |         n = 0
219 |         col_name_new = column_name
220 |         while col_name_new in self.hist_dict.keys():
221 |             n += 1
222 |             col_name_new = '%s (%d)' % (column_name, n)
223 |         return col_name_new
224 | 
225 |     def _get_min_value(self):
226 |         if self.min_value is not None:
227 |             return self.min_value
228 |         return min([table.select(F.min(F.col(col_name))).collect()[0][0]
229 |                     for table, col_name in self.col_list])
230 | 
231 |     def _get_max_value(self):
232 |         if self.max_value is not None:
233 |             return self.max_value
234 |         return max([table.select(F.max(F.col(col_name))).collect()[0][0]
235 |                     for table, col_name in self.col_list])
236 | 
237 |     def _calculate_bins(self):
238 |         if len(self.bin_boundaries) > 0:
239 |             return self.bin_boundaries
240 | 
241 |         if len(self.bin_boundaries) == 0 and len(self.col_list) == 1 \
242 |                 and self.min_value is None and self.max_value is None:
243 |             # Only use the amount of bins as input For the histogram function
244 |             return self.nr_bins
245 | 
246 |         min_value = self._get_min_value()
247 |         max_value = self._get_max_value()
248 | 
249 |         # expand empty range to avoid empty graph
250 |         return Histogram._calc_n_bins_between(min_value, max_value, self.nr_bins)
251 | 
252 |     def _add_hist(self, table, column_name):
253 |         """Uses spark to calculate the hist values: for each column a list of weights, and if the bin_list is not set
254 |            a set of bin boundaries"""
255 |         bin_boundaries, bin_weights = table.select(column_name).rdd.flatMap(lambda x: x).histogram(self.bin_boundaries)
256 |         self.hist_dict[self._check_col_name(column_name)] = bin_weights
257 | 
258 |         if isinstance(self.bin_boundaries, int):  # the bin_list is not set
259 |             if len(bin_boundaries) == 2 and bin_boundaries[0] == bin_boundaries[1]:
260 |                 # In case of a column with 1 unique value we need to calculate the histogram ourselves.
261 |                 min_value = bin_boundaries[0]
262 |                 max_value = bin_boundaries[1]
263 |                 self.bin_boundaries = self._calc_n_bins_between(min_value, max_value, self.nr_bins)
264 |                 self.hist_dict[column_name] = Histogram._calc_weights(self.bin_boundaries, min_value, bin_weights)
265 |             else:
266 |                 self.bin_boundaries = bin_boundaries
267 | 
268 |     @staticmethod
269 |     def _calc_n_bins_between(min_value, max_value, nr_bins):
270 |         """Returns a list of bin borders between min_value and max_value"""
271 |         if min_value == max_value:
272 |             min_value = min_value - 0.5
273 |             max_value = max_value + 0.5
274 |         step = (float(max_value) - float(min_value)) / nr_bins
275 |         return [min_value + (step * float(bn_nr)) for bn_nr in range(nr_bins + 1)]
276 | 
277 |     @staticmethod
278 |     def _calc_weights(bins, value, value_count):
279 |         """Calculate weights given a bin list, value within that bin list and a count"""
280 |         # first we get a list of bin boundary tuples
281 |         weights = list()
282 |         bin_boundary_idx = [(idx, idx+2) for idx in range(len(bins)-1)]
283 |         bin_boundaries = [tuple(bins[left_idx:right_idx]) for (left_idx, right_idx) in bin_boundary_idx]
284 |         for left_boundary, right_boundary in bin_boundaries:
285 |             if left_boundary <= value < right_boundary:
286 |                 weights.append(value_count[0])
287 |             else:
288 |                 weights.append(0)
289 |         return weights
290 | 
291 |     @staticmethod
292 |     def _convert_number_bmk(axis_value, _):
293 |         """Converts the values on axes to Billions, Millions or Thousands"""
294 |         if axis_value >= 1e9:
295 |             return '{:1.1f}B'.format(axis_value * 1e-9)
296 |         if axis_value >= 1e6:
297 |             return '{:1.1f}M'.format(axis_value * 1e-6)
298 |         if axis_value >= 1e3:
299 |             return '{:1.1f}K'.format(axis_value * 1e-3)
300 |         if axis_value >= 1 or axis_value == 0:
301 |             return '{:1.0f}'.format(axis_value)
302 |         return axis_value
303 | 
304 |     def build(self):
305 |         """Calculates the histogram values for each of the columns.
306 | 
307 |         If the Histogram has already been build, it doesn't build it again.
308 |         """
309 |         if not self.is_build:
310 |             self.bin_boundaries = self._calculate_bins()
311 |             for table, column_name in self.col_list:
312 |                 self._add_hist(table, column_name)
313 |             self.is_build = True
314 | 
315 |     def to_pandas(self, kind='hist'):
316 |         """Returns a pandas dataframe from the Histogram object.
317 | 
318 |         This function calculates the Histogram function in Spark if it was not done yet.
319 | 
320 |         Args:
321 |             :kind: (:obj:`str`, optional):
322 |                 'hist' or 'density'. When using hist this returns the histogram object
323 |                 as pandas dataframe. When using density the index contains the bin centers, and the values in the
324 |                 DataFrame are the scaled values. Defaults to 'hist'
325 | 
326 |         Returns:
327 |             A pandas DataFrame from the Histogram object.
328 |         """
329 |         self.build()
330 |         if kind == 'hist':
331 |             return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()])
332 |         elif kind == 'density':
333 |             result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()])
334 |             return result.apply(lambda x: x / x.max(), axis=0)
335 | 
336 |     def plot_hist(self, ax, overlapping=False, formatted_yaxis=True, **kwargs):
337 |         """Returns a matplotlib style histogram (matplotlib.pyplot.hist)
338 | 
339 |         Uses the matplotlib object oriented interface to add a Histogram to an matplotlib Axes object.
340 |         All named arguments from pyplot.hist can be used. A new argument called "type" makes it possible to
341 |         make overlapping histogram plots.
342 | 
343 |         Args:
344 |             :ax: (`Axes`)
345 |                 An matplotlib Axes object on which the histogram will be plot
346 |             :overlapping (`bool`, optional):
347 |                 If set to true, this will generate an overlapping plot.
348 |                 When set to False it will generate a normal grouped histogram. Defaults to False.
349 |             :formatted_yaxis: (`bool`, optional).
350 |                 If set to true, the numbers on the yaxis will be formatted
351 |                 for better readability. E.g. 1500000 will become 1.5M. Defaults to True
352 |             :**kwargs:
353 |                 The keyword arguments as used in matplotlib.pyplot.hist
354 |         """
355 |         self.build()
356 | 
357 |         if formatted_yaxis:
358 |             # Round the y-axis value to nearest thousand, million, or billion for readable y-axis
359 |             formatter = plt.FuncFormatter(Histogram._convert_number_bmk)
360 |             ax.yaxis.set_major_formatter(formatter)
361 | 
362 |         if overlapping:
363 |             for colname in self.hist_dict:
364 |                 ax.hist(self._get_bin_centers(),
365 |                         bins=self.bin_boundaries,
366 |                         alpha=0.5,
367 |                         label=self.hist_dict.keys(),
368 |                         weights=self.hist_dict[colname],
369 |                         **kwargs
370 |                         )
371 |         else:
372 |             weights_multi = [self.hist_dict[colname] for colname in self.hist_dict]
373 |             return ax.hist([self._get_bin_centers()] * len(self.hist_dict),
374 |                            bins=self.bin_boundaries,
375 |                            weights=weights_multi,
376 |                            label=self.hist_dict.keys(),
377 |                            **kwargs)
378 |     
379 |     def plot_density(self, ax, num=300, **kwargs):
380 |         """Returns a density plot on an Pyplot Axes object.
381 | 
382 |         Args:
383 |             :ax: (`Axes`)
384 |                 An matplotlib Axes object on which the histogram will be plot
385 |             :num: (`int`)
386 |                 The number of x values the line is plotted on. Default: 300
387 |             :**kwargs:
388 |                 Keyword arguments that are passed on to the pyplot.plot function.
389 |         """
390 |         colors = []
391 | 
392 |         self.build()
393 |         bin_centers = np.asarray(self._get_bin_centers())
394 |         x_new = np.linspace(bin_centers.min(), bin_centers.max(), num)
395 | 
396 |         if 'color' in kwargs:
397 |             colors = kwargs['color']
398 |             del kwargs['color']
399 | 
400 |         power_smooth = []
401 | 
402 |         for (colname, bin_values) in self.hist_dict.items():
403 |             normed_values, ble = np.histogram(self._get_bin_centers(),
404 |                                               bins=self.bin_boundaries,
405 |                                               weights=bin_values,
406 |                                               density=True
407 |                                               )
408 |             interpolation_function = interp1d(bin_centers, normed_values, kind='quadratic')
409 | 
410 |             power_smooth.append(x_new)
411 |             power_smooth.append(interpolation_function(x_new))
412 | 
413 |         lines = ax.plot(*power_smooth, **kwargs)
414 | 
415 |         for i, line in enumerate(lines):
416 |             if len(colors) > 0:
417 |                 plt.setp(line, color=colors[i], label=list(self.hist_dict.keys())[i])
418 |             else:
419 |                 plt.setp(line, label=list(self.hist_dict.keys())[i])
420 | 
421 |         return lines
422 | 
423 |     def add_data(self, data):
424 |         """Ads 1 or more columns to a histogram.
425 | 
426 |         Multiple options are available:
427 |             * Add a single column dataframe
428 |             * Add a list of single column dataframes
429 |             * Add a dataframe with multiple columns
430 | 
431 |         Args:
432 |             :data:
433 |                 A single column Spark dataframe, a list of single column Spark
434 |                 dataframes, or a multi column Spark dataframe.
435 |         """
436 |         if isinstance(data, list):
437 |             for df_column in data:
438 |                 self.add_column(df_column)
439 | 
440 |         elif len(data.columns) > 1:
441 |             for col_name in data.columns:
442 |                 self.add_column(data.select(col_name))
443 | 
444 |         else:
445 |             self.add_column(data)
446 | 


--------------------------------------------------------------------------------
/docs/build/html/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Welcome to pyspark_histogram’s documentation! &#8212; pyspark_dist_explore 0.1.0 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.1.0',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="_static/doctools.js"></script>
 27 |     <link rel="index" title="Index" href="genindex.html" />
 28 |     <link rel="search" title="Search" href="search.html" />
 29 |    
 30 |   <link rel="stylesheet" href="_static/custom.css" type="text/css" />
 31 |   
 32 |   
 33 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 34 | 
 35 |   </head>
 36 |   <body role="document">
 37 |   
 38 | 
 39 |     <div class="document">
 40 |       <div class="documentwrapper">
 41 |         <div class="bodywrapper">
 42 |           <div class="body" role="main">
 43 |             
 44 |   <div class="section" id="welcome-to-pyspark-histogram-s-documentation">
 45 | <h1>Welcome to pyspark_histogram&#8217;s documentation!<a class="headerlink" href="#welcome-to-pyspark-histogram-s-documentation" title="Permalink to this headline">¶</a></h1>
 46 | <div class="toctree-wrapper compound">
 47 | </div>
 48 | </div>
 49 | <div class="section" id="indices-and-tables">
 50 | <h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Permalink to this headline">¶</a></h1>
 51 | <ul class="simple">
 52 | <li><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></li>
 53 | <li><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></li>
 54 | <li><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></li>
 55 | </ul>
 56 | <div class="section" id="module-pyspark_dist_explore">
 57 | <span id="documentation-for-the-code"></span><h2>Documentation for the Code<a class="headerlink" href="#module-pyspark_dist_explore" title="Permalink to this headline">¶</a></h2>
 58 | <dl class="function">
 59 | <dt id="pyspark_dist_explore.hist">
 60 | <code class="descclassname">pyspark_dist_explore.</code><code class="descname">hist</code><span class="sig-paren">(</span><em>axis</em>, <em>x</em>, <em>overlapping=False</em>, <em>formatted_yaxis=True</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#hist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.hist" title="Permalink to this definition">¶</a></dt>
 61 | <dd><p>Plots a histogram on an Axis object</p>
 62 | <dl class="docutils">
 63 | <dt>Args:</dt>
 64 | <dd><table class="first last docutils field-list" frame="void" rules="none">
 65 | <col class="field-name" />
 66 | <col class="field-body" />
 67 | <tbody valign="top">
 68 | <tr class="field-odd field"><th class="field-name">axis:</th><td class="field-body"><p class="first">(<cite>Axes</cite>)
 69 | An matplotlib Axes object on which the histogram will be plot.</p>
 70 | </td>
 71 | </tr>
 72 | <tr class="field-even field"><th class="field-name">x:</th><td class="field-body"><p class="first">(<cite>DataFrame</cite> or <cite>list</cite> of <cite>DataFrame</cite>)
 73 | A DataFrame with one or more numerical columns, or a list of single numerical column DataFrames</p>
 74 | </td>
 75 | </tr>
 76 | <tr class="field-odd field"><th class="field-name">overlapping:</th><td class="field-body"><p class="first">(<cite>bool</cite>, optional)
 77 | Generate overlapping histograms.</p>
 78 | <p>If set to true, this will generate an overlapping plot.
 79 | When set to False it will generate a normal grouped histogram. Defaults to False.</p>
 80 | </td>
 81 | </tr>
 82 | <tr class="field-even field"><th class="field-name" colspan="2">formatted_yaxis:</th></tr>
 83 | <tr class="field-even field"><td>&#160;</td><td class="field-body"><p class="first">(<cite>bool</cite>, optional)
 84 | If set to true, the numbers on the yaxis will be formatted
 85 | for better readability. E.g. 1500000 will become 1.5M. Defaults to True</p>
 86 | </td>
 87 | </tr>
 88 | <tr class="field-odd field"><th class="field-name">**kwargs:</th><td class="field-body"><p class="first last">The keyword arguments as used in matplotlib.pyplot.hist</p>
 89 | </td>
 90 | </tr>
 91 | </tbody>
 92 | </table>
 93 | </dd>
 94 | <dt>Returns:</dt>
 95 | <dd><table class="first last docutils field-list" frame="void" rules="none">
 96 | <col class="field-name" />
 97 | <col class="field-body" />
 98 | <tbody valign="top">
 99 | <tr class="field-odd field"><th class="field-name">n:</th><td class="field-body">(<cite>array</cite> or <cite>list</cite> of <cite>arrays</cite>)
100 | The values of the histogram bins. See normed and weights for a description of the possible semantics.
101 | If input x is an array, then this is an array of length nbins. If input is a sequence arrays
102 | [data1, data2,..], then this is a list of arrays with the values of the histograms for each of the
103 | arrays in the same order.</td>
104 | </tr>
105 | <tr class="field-even field"><th class="field-name">bins:</th><td class="field-body">(<cite>array</cite>)
106 | The edges of the bins.
107 | Length nbins + 1 (nbins left edges and right edge of last bin). Always a single array even
108 | when multiple data sets are passed in.</td>
109 | </tr>
110 | <tr class="field-odd field"><th class="field-name">patches:</th><td class="field-body">(<cite>list</cite> or <cite>list</cite> of <cite>lists</cite>)
111 | Silent list of individual patches used to create the histogram or list of such lists if multiple
112 | input datasets.</td>
113 | </tr>
114 | </tbody>
115 | </table>
116 | </dd>
117 | </dl>
118 | </dd></dl>
119 | 
120 | <dl class="function">
121 | <dt id="pyspark_dist_explore.distplot">
122 | <code class="descclassname">pyspark_dist_explore.</code><code class="descname">distplot</code><span class="sig-paren">(</span><em>axis</em>, <em>x</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#distplot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.distplot" title="Permalink to this definition">¶</a></dt>
123 | <dd><p>Plots a normalised histogram and a density plot on an Axes object</p>
124 | <dl class="docutils">
125 | <dt>Args:</dt>
126 | <dd><table class="first last docutils field-list" frame="void" rules="none">
127 | <col class="field-name" />
128 | <col class="field-body" />
129 | <tbody valign="top">
130 | <tr class="field-odd field"><th class="field-name">axis:</th><td class="field-body">(<cite>Axes</cite>)
131 | An matplotlib Axes object on which the histogram will be plot.</td>
132 | </tr>
133 | <tr class="field-even field"><th class="field-name">x:</th><td class="field-body">(<cite>DataFrame</cite> or <cite>list</cite> of <cite>DataFrame</cite>)
134 | A DataFrame with one or more numerical columns, or a list of single numerical column DataFrames</td>
135 | </tr>
136 | <tr class="field-odd field"><th class="field-name">**kwargs:</th><td class="field-body">The keyword arguments as used in matplotlib.pyplot.hist. Normed is set to True</td>
137 | </tr>
138 | </tbody>
139 | </table>
140 | </dd>
141 | <dt>Returns:</dt>
142 | <dd><table class="first last docutils field-list" frame="void" rules="none">
143 | <col class="field-name" />
144 | <col class="field-body" />
145 | <tbody valign="top">
146 | <tr class="field-odd field"><th class="field-name">n:</th><td class="field-body">(<cite>array</cite> or <cite>list</cite> of <cite>arrays</cite>)
147 | The values of the histogram bins. See normed and weights for a description of the possible semantics.
148 | If input x is an array, then this is an array of length nbins. If input is a sequence arrays
149 | [data1, data2,..], then this is a list of arrays with the values of the histograms for each of the
150 | arrays in the same order.</td>
151 | </tr>
152 | <tr class="field-even field"><th class="field-name">bins:</th><td class="field-body">(<cite>array</cite>)
153 | The edges of the bins.
154 | Length nbins + 1 (nbins left edges and right edge of last bin). Always a single array even
155 | when multiple data sets are passed in.</td>
156 | </tr>
157 | <tr class="field-odd field"><th class="field-name">patches:</th><td class="field-body">(<cite>list</cite> or <cite>list</cite> of <cite>lists</cite>)
158 | Silent list of individual patches used to create the histogram or list of such lists if multiple
159 | input datasets.</td>
160 | </tr>
161 | </tbody>
162 | </table>
163 | </dd>
164 | </dl>
165 | </dd></dl>
166 | 
167 | <dl class="function">
168 | <dt id="pyspark_dist_explore.pandas_histogram">
169 | <code class="descclassname">pyspark_dist_explore.</code><code class="descname">pandas_histogram</code><span class="sig-paren">(</span><em>x</em>, <em>bins=10</em>, <em>range=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#pandas_histogram"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.pandas_histogram" title="Permalink to this definition">¶</a></dt>
170 | <dd><p>Returns a pandas DataFrame with histograms of the Spark DataFrame</p>
171 | <p>Bin ranges are formatted as text an put on the Index.</p>
172 | <dl class="docutils">
173 | <dt>Args:</dt>
174 | <dd><table class="first last docutils field-list" frame="void" rules="none">
175 | <col class="field-name" />
176 | <col class="field-body" />
177 | <tbody valign="top">
178 | <tr class="field-odd field"><th class="field-name">x:</th><td class="field-body"><p class="first">(<cite>DataFrame</cite> or <cite>list</cite> of <cite>DataFrame</cite>)
179 | A DataFrame with one or more numerical columns, or a list of single numerical column DataFrames</p>
180 | </td>
181 | </tr>
182 | <tr class="field-even field"><th class="field-name">bins:</th><td class="field-body"><p class="first">(<cite>integer</cite> or <cite>array_like</cite>, optional)
183 | If an integer is given, bins + 1 bin edges are returned, consistently with numpy.histogram() for
184 | numpy version &gt;= 1.3.</p>
185 | <p>Unequally spaced bins are supported if bins is a sequence.</p>
186 | <p>Default is 10</p>
187 | </td>
188 | </tr>
189 | <tr class="field-odd field"><th class="field-name">range:</th><td class="field-body"><p class="first">(tuple or None, optional)
190 | The lower and upper range of the bins. Lower and upper outliers are ignored.
191 | If not provided, range is (x.min(), x.max()). Range has no effect if bins is a sequence.</p>
192 | <p>If bins is a sequence or range is specified, autoscaling is based on the specified bin range instead
193 | of the range of x.</p>
194 | <p class="last">Default is None</p>
195 | </td>
196 | </tr>
197 | </tbody>
198 | </table>
199 | </dd>
200 | </dl>
201 | </dd></dl>
202 | 
203 | <dl class="class">
204 | <dt id="pyspark_dist_explore.Histogram">
205 | <em class="property">class </em><code class="descclassname">pyspark_dist_explore.</code><code class="descname">Histogram</code><span class="sig-paren">(</span><em>bins=10</em>, <em>range=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#Histogram"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.Histogram" title="Permalink to this definition">¶</a></dt>
206 | <dd><p>The Histogram object leverages Spark to calculate histograms, and matplotlib to visualize these.</p>
207 | <dl class="docutils">
208 | <dt>Args:</dt>
209 | <dd><table class="first last docutils field-list" frame="void" rules="none">
210 | <col class="field-name" />
211 | <col class="field-body" />
212 | <tbody valign="top">
213 | <tr class="field-odd field"><th class="field-name">range:</th><td class="field-body">(<cite>tuple</cite>, optional)
214 | The lower and upper range of the bins.
215 | Lower and upper outliers are ignored. If not provided, range is (min(x), max(x)). Range has no
216 | effect if bins is a sequence. If bins is a sequence or range is specified, autoscaling is
217 | based on the specified bin range instead of the range of x.</td>
218 | </tr>
219 | <tr class="field-even field"><th class="field-name">bins:</th><td class="field-body">(<cite>int</cite> or <cite>list</cite> of <cite>str</cite> or <cite>list of `int</cite>, optional)
220 | If an integer is given: Number of bins in the histogram. Defaults to 10.
221 | If a list is given: Predefined list of bin boundaries.
222 | The bins are all open to the right except for the last which is closed. e.g. [1,10,20,50] means
223 | the buckets are [1,10) [10,20) [20,50], which means 1&lt;=x&lt;10, 10&lt;=x&lt;20, 20&lt;=x&lt;=50.</td>
224 | </tr>
225 | </tbody>
226 | </table>
227 | </dd>
228 | </dl>
229 | <dl class="method">
230 | <dt id="pyspark_dist_explore.Histogram.add_column">
231 | <code class="descname">add_column</code><span class="sig-paren">(</span><em>table</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#Histogram.add_column"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.Histogram.add_column" title="Permalink to this definition">¶</a></dt>
232 | <dd><p>Add single column DataFrame to the histogram object.</p>
233 | <p>If multiple columns share the same name, a (n) will be appended to the name, where n is
234 | the next available number.</p>
235 | <dl class="docutils">
236 | <dt>Args:</dt>
237 | <dd><table class="first last docutils field-list" frame="void" rules="none">
238 | <col class="field-name" />
239 | <col class="field-body" />
240 | <tbody valign="top">
241 | <tr class="field-odd field"><th class="field-name">table:</th><td class="field-body">(<code class="xref py py-obj docutils literal"><span class="pre">dataframe</span></code>)
242 | A PySpark DataFrame with a single column</td>
243 | </tr>
244 | </tbody>
245 | </table>
246 | </dd>
247 | </dl>
248 | </dd></dl>
249 | 
250 | <dl class="method">
251 | <dt id="pyspark_dist_explore.Histogram.add_data">
252 | <code class="descname">add_data</code><span class="sig-paren">(</span><em>data</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#Histogram.add_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.Histogram.add_data" title="Permalink to this definition">¶</a></dt>
253 | <dd><p>Ads 1 or more columns to a histogram.</p>
254 | <dl class="docutils">
255 | <dt>Multiple options are available:</dt>
256 | <dd><ul class="first last simple">
257 | <li>Add a single column dataframe</li>
258 | <li>Add a list of single column dataframes</li>
259 | <li>Add a dataframe with multiple columns</li>
260 | </ul>
261 | </dd>
262 | <dt>Args:</dt>
263 | <dd><table class="first last docutils field-list" frame="void" rules="none">
264 | <col class="field-name" />
265 | <col class="field-body" />
266 | <tbody valign="top">
267 | <tr class="field-odd field"><th class="field-name">data:</th><td class="field-body">A single column Spark dataframe, a list of single column Spark
268 | dataframes, or a multi column Spark dataframe.</td>
269 | </tr>
270 | </tbody>
271 | </table>
272 | </dd>
273 | </dl>
274 | </dd></dl>
275 | 
276 | <dl class="method">
277 | <dt id="pyspark_dist_explore.Histogram.build">
278 | <code class="descname">build</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#Histogram.build"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.Histogram.build" title="Permalink to this definition">¶</a></dt>
279 | <dd><p>Calculates the histogram values for each of the columns.</p>
280 | <p>If the Histogram has already been build, it doesn&#8217;t build it again.</p>
281 | </dd></dl>
282 | 
283 | <dl class="method">
284 | <dt id="pyspark_dist_explore.Histogram.plot_density">
285 | <code class="descname">plot_density</code><span class="sig-paren">(</span><em>ax</em>, <em>num=300</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#Histogram.plot_density"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.Histogram.plot_density" title="Permalink to this definition">¶</a></dt>
286 | <dd><p>Returns a density plot on an Pyplot Axes object.</p>
287 | <dl class="docutils">
288 | <dt>Args:</dt>
289 | <dd><table class="first last docutils field-list" frame="void" rules="none">
290 | <col class="field-name" />
291 | <col class="field-body" />
292 | <tbody valign="top">
293 | <tr class="field-odd field"><th class="field-name">ax:</th><td class="field-body">(<cite>Axes</cite>)
294 | An matplotlib Axes object on which the histogram will be plot</td>
295 | </tr>
296 | <tr class="field-even field"><th class="field-name">num:</th><td class="field-body">(<cite>int</cite>)
297 | The number of x values the line is plotted on. Default: 300</td>
298 | </tr>
299 | <tr class="field-odd field"><th class="field-name"><a href="#id1"><span class="problematic" id="id2">**</span></a>kwargs:</th><td class="field-body">Keyword arguments that are passed on to the pyplot.plot function.</td>
300 | </tr>
301 | </tbody>
302 | </table>
303 | </dd>
304 | </dl>
305 | </dd></dl>
306 | 
307 | <dl class="method">
308 | <dt id="pyspark_dist_explore.Histogram.plot_hist">
309 | <code class="descname">plot_hist</code><span class="sig-paren">(</span><em>ax</em>, <em>overlapping=False</em>, <em>formatted_yaxis=True</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#Histogram.plot_hist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.Histogram.plot_hist" title="Permalink to this definition">¶</a></dt>
310 | <dd><p>Returns a matplotlib style histogram (matplotlib.pyplot.hist)</p>
311 | <p>Uses the matplotlib object oriented interface to add a Histogram to an matplotlib Axes object.
312 | All named arguments from pyplot.hist can be used. A new argument called &#8220;type&#8221; makes it possible to
313 | make overlapping histogram plots.</p>
314 | <dl class="docutils">
315 | <dt>Args:</dt>
316 | <dd><table class="first last docutils field-list" frame="void" rules="none">
317 | <col class="field-name" />
318 | <col class="field-body" />
319 | <tbody valign="top">
320 | <tr class="field-odd field"><th class="field-name">ax:</th><td class="field-body">(<cite>Axes</cite>)
321 | An matplotlib Axes object on which the histogram will be plot</td>
322 | </tr>
323 | <tr class="field-even field"><th class="field-name" colspan="2">overlapping (<cite>bool</cite>, optional):</th></tr>
324 | <tr class="field-even field"><td>&#160;</td><td class="field-body">If set to true, this will generate an overlapping plot.
325 | When set to False it will generate a normal grouped histogram. Defaults to False.</td>
326 | </tr>
327 | <tr class="field-odd field"><th class="field-name" colspan="2">formatted_yaxis:</th></tr>
328 | <tr class="field-odd field"><td>&#160;</td><td class="field-body">(<cite>bool</cite>, optional).
329 | If set to true, the numbers on the yaxis will be formatted
330 | for better readability. E.g. 1500000 will become 1.5M. Defaults to True</td>
331 | </tr>
332 | <tr class="field-even field"><th class="field-name">**kwargs:</th><td class="field-body">The keyword arguments as used in matplotlib.pyplot.hist</td>
333 | </tr>
334 | </tbody>
335 | </table>
336 | </dd>
337 | </dl>
338 | </dd></dl>
339 | 
340 | <dl class="method">
341 | <dt id="pyspark_dist_explore.Histogram.to_pandas">
342 | <code class="descname">to_pandas</code><span class="sig-paren">(</span><em>kind='hist'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark_dist_explore/pyspark_dist_explore.html#Histogram.to_pandas"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark_dist_explore.Histogram.to_pandas" title="Permalink to this definition">¶</a></dt>
343 | <dd><p>Returns a pandas dataframe from the Histogram object.</p>
344 | <p>This function calculates the Histogram function in Spark if it was not done yet.</p>
345 | <dl class="docutils">
346 | <dt>Args:</dt>
347 | <dd><table class="first last docutils field-list" frame="void" rules="none">
348 | <col class="field-name" />
349 | <col class="field-body" />
350 | <tbody valign="top">
351 | <tr class="field-odd field"><th class="field-name">kind:</th><td class="field-body">(<a class="reference external" href="https://docs.python.org/2/library/functions.html#str" title="(in Python v2.7)"><code class="xref py py-obj docutils literal"><span class="pre">str</span></code></a>, optional):
352 | &#8216;hist&#8217; or &#8216;density&#8217;. When using hist this returns the histogram object
353 | as pandas dataframe. When using density the index contains the bin centers, and the values in the
354 | DataFrame are the scaled values. Defaults to &#8216;hist&#8217;</td>
355 | </tr>
356 | </tbody>
357 | </table>
358 | </dd>
359 | <dt>Returns:</dt>
360 | <dd>A pandas DataFrame from the Histogram object.</dd>
361 | </dl>
362 | </dd></dl>
363 | 
364 | </dd></dl>
365 | 
366 | </div>
367 | </div>
368 | 
369 | 
370 |           </div>
371 |         </div>
372 |       </div>
373 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
374 |         <div class="sphinxsidebarwrapper">
375 |   <h3><a href="#">Table Of Contents</a></h3>
376 |   <ul>
377 | <li><a class="reference internal" href="#">Welcome to pyspark_histogram&#8217;s documentation!</a></li>
378 | <li><a class="reference internal" href="#indices-and-tables">Indices and tables</a><ul>
379 | <li><a class="reference internal" href="#module-pyspark_dist_explore">Documentation for the Code</a></li>
380 | </ul>
381 | </li>
382 | </ul>
383 | <div class="relations">
384 | <h3>Related Topics</h3>
385 | <ul>
386 |   <li><a href="#">Documentation overview</a><ul>
387 |   </ul></li>
388 | </ul>
389 | </div>
390 |   <div role="note" aria-label="source link">
391 |     <h3>This Page</h3>
392 |     <ul class="this-page-menu">
393 |       <li><a href="_sources/index.rst.txt"
394 |             rel="nofollow">Show Source</a></li>
395 |     </ul>
396 |    </div>
397 | <div id="searchbox" style="display: none" role="search">
398 |   <h3>Quick search</h3>
399 |     <form class="search" action="search.html" method="get">
400 |       <div><input type="text" name="q" /></div>
401 |       <div><input type="submit" value="Go" /></div>
402 |       <input type="hidden" name="check_keywords" value="yes" />
403 |       <input type="hidden" name="area" value="default" />
404 |     </form>
405 | </div>
406 | <script type="text/javascript">$('#searchbox').show(0);</script>
407 |         </div>
408 |       </div>
409 |       <div class="clearer"></div>
410 |     </div>
411 |     <div class="footer">
412 |       &copy;2017, Chris van den Berg.
413 |       
414 |       |
415 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.5.6</a>
416 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.10</a>
417 |       
418 |       |
419 |       <a href="_sources/index.rst.txt"
420 |           rel="nofollow">Page source</a>
421 |     </div>
422 | 
423 |     
424 | 
425 |     
426 |   </body>
427 | </html>


--------------------------------------------------------------------------------
/docs/build/html/_static/searchtools.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * searchtools.js_t
  3 |  * ~~~~~~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx JavaScript utilities for the full-text search.
  6 |  *
  7 |  * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | 
 13 | /* Non-minified version JS is _stemmer.js if file is provided */ 
 14 | /**
 15 |  * Porter Stemmer
 16 |  */
 17 | var Stemmer = function() {
 18 | 
 19 |   var step2list = {
 20 |     ational: 'ate',
 21 |     tional: 'tion',
 22 |     enci: 'ence',
 23 |     anci: 'ance',
 24 |     izer: 'ize',
 25 |     bli: 'ble',
 26 |     alli: 'al',
 27 |     entli: 'ent',
 28 |     eli: 'e',
 29 |     ousli: 'ous',
 30 |     ization: 'ize',
 31 |     ation: 'ate',
 32 |     ator: 'ate',
 33 |     alism: 'al',
 34 |     iveness: 'ive',
 35 |     fulness: 'ful',
 36 |     ousness: 'ous',
 37 |     aliti: 'al',
 38 |     iviti: 'ive',
 39 |     biliti: 'ble',
 40 |     logi: 'log'
 41 |   };
 42 | 
 43 |   var step3list = {
 44 |     icate: 'ic',
 45 |     ative: '',
 46 |     alize: 'al',
 47 |     iciti: 'ic',
 48 |     ical: 'ic',
 49 |     ful: '',
 50 |     ness: ''
 51 |   };
 52 | 
 53 |   var c = "[^aeiou]";          // consonant
 54 |   var v = "[aeiouy]";          // vowel
 55 |   var C = c + "[^aeiouy]*";    // consonant sequence
 56 |   var V = v + "[aeiou]*";      // vowel sequence
 57 | 
 58 |   var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
 59 |   var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
 60 |   var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
 61 |   var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
 62 | 
 63 |   this.stemWord = function (w) {
 64 |     var stem;
 65 |     var suffix;
 66 |     var firstch;
 67 |     var origword = w;
 68 | 
 69 |     if (w.length < 3)
 70 |       return w;
 71 | 
 72 |     var re;
 73 |     var re2;
 74 |     var re3;
 75 |     var re4;
 76 | 
 77 |     firstch = w.substr(0,1);
 78 |     if (firstch == "y")
 79 |       w = firstch.toUpperCase() + w.substr(1);
 80 | 
 81 |     // Step 1a
 82 |     re = /^(.+?)(ss|i)es$/;
 83 |     re2 = /^(.+?)([^s])s$/;
 84 | 
 85 |     if (re.test(w))
 86 |       w = w.replace(re,"$1$2");
 87 |     else if (re2.test(w))
 88 |       w = w.replace(re2,"$1$2");
 89 | 
 90 |     // Step 1b
 91 |     re = /^(.+?)eed$/;
 92 |     re2 = /^(.+?)(ed|ing)$/;
 93 |     if (re.test(w)) {
 94 |       var fp = re.exec(w);
 95 |       re = new RegExp(mgr0);
 96 |       if (re.test(fp[1])) {
 97 |         re = /.$/;
 98 |         w = w.replace(re,"");
 99 |       }
100 |     }
101 |     else if (re2.test(w)) {
102 |       var fp = re2.exec(w);
103 |       stem = fp[1];
104 |       re2 = new RegExp(s_v);
105 |       if (re2.test(stem)) {
106 |         w = stem;
107 |         re2 = /(at|bl|iz)$/;
108 |         re3 = new RegExp("([^aeiouylsz])\\1$");
109 |         re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
110 |         if (re2.test(w))
111 |           w = w + "e";
112 |         else if (re3.test(w)) {
113 |           re = /.$/;
114 |           w = w.replace(re,"");
115 |         }
116 |         else if (re4.test(w))
117 |           w = w + "e";
118 |       }
119 |     }
120 | 
121 |     // Step 1c
122 |     re = /^(.+?)y$/;
123 |     if (re.test(w)) {
124 |       var fp = re.exec(w);
125 |       stem = fp[1];
126 |       re = new RegExp(s_v);
127 |       if (re.test(stem))
128 |         w = stem + "i";
129 |     }
130 | 
131 |     // Step 2
132 |     re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
133 |     if (re.test(w)) {
134 |       var fp = re.exec(w);
135 |       stem = fp[1];
136 |       suffix = fp[2];
137 |       re = new RegExp(mgr0);
138 |       if (re.test(stem))
139 |         w = stem + step2list[suffix];
140 |     }
141 | 
142 |     // Step 3
143 |     re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
144 |     if (re.test(w)) {
145 |       var fp = re.exec(w);
146 |       stem = fp[1];
147 |       suffix = fp[2];
148 |       re = new RegExp(mgr0);
149 |       if (re.test(stem))
150 |         w = stem + step3list[suffix];
151 |     }
152 | 
153 |     // Step 4
154 |     re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
155 |     re2 = /^(.+?)(s|t)(ion)$/;
156 |     if (re.test(w)) {
157 |       var fp = re.exec(w);
158 |       stem = fp[1];
159 |       re = new RegExp(mgr1);
160 |       if (re.test(stem))
161 |         w = stem;
162 |     }
163 |     else if (re2.test(w)) {
164 |       var fp = re2.exec(w);
165 |       stem = fp[1] + fp[2];
166 |       re2 = new RegExp(mgr1);
167 |       if (re2.test(stem))
168 |         w = stem;
169 |     }
170 | 
171 |     // Step 5
172 |     re = /^(.+?)e$/;
173 |     if (re.test(w)) {
174 |       var fp = re.exec(w);
175 |       stem = fp[1];
176 |       re = new RegExp(mgr1);
177 |       re2 = new RegExp(meq1);
178 |       re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
179 |       if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
180 |         w = stem;
181 |     }
182 |     re = /ll$/;
183 |     re2 = new RegExp(mgr1);
184 |     if (re.test(w) && re2.test(w)) {
185 |       re = /.$/;
186 |       w = w.replace(re,"");
187 |     }
188 | 
189 |     // and turn initial Y back to y
190 |     if (firstch == "y")
191 |       w = firstch.toLowerCase() + w.substr(1);
192 |     return w;
193 |   }
194 | }
195 | 
196 | 
197 | 
198 | /**
199 |  * Simple result scoring code.
200 |  */
201 | var Scorer = {
202 |   // Implement the following function to further tweak the score for each result
203 |   // The function takes a result array [filename, title, anchor, descr, score]
204 |   // and returns the new score.
205 |   /*
206 |   score: function(result) {
207 |     return result[4];
208 |   },
209 |   */
210 | 
211 |   // query matches the full name of an object
212 |   objNameMatch: 11,
213 |   // or matches in the last dotted part of the object name
214 |   objPartialMatch: 6,
215 |   // Additive scores depending on the priority of the object
216 |   objPrio: {0:  15,   // used to be importantResults
217 |             1:  5,   // used to be objectResults
218 |             2: -5},  // used to be unimportantResults
219 |   //  Used when the priority is not in the mapping.
220 |   objPrioDefault: 0,
221 | 
222 |   // query found in title
223 |   title: 15,
224 |   // query found in terms
225 |   term: 5
226 | };
227 | 
228 | 
229 | 
230 | 
231 | 
232 | var splitChars = (function() {
233 |     var result = {};
234 |     var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648,
235 |          1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702,
236 |          2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971,
237 |          2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345,
238 |          3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761,
239 |          3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823,
240 |          4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125,
241 |          8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695,
242 |          11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587,
243 |          43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141];
244 |     var i, j, start, end;
245 |     for (i = 0; i < singles.length; i++) {
246 |         result[singles[i]] = true;
247 |     }
248 |     var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709],
249 |          [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161],
250 |          [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568],
251 |          [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807],
252 |          [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047],
253 |          [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383],
254 |          [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450],
255 |          [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547],
256 |          [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673],
257 |          [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820],
258 |          [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946],
259 |          [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023],
260 |          [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173],
261 |          [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332],
262 |          [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481],
263 |          [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718],
264 |          [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791],
265 |          [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095],
266 |          [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205],
267 |          [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687],
268 |          [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968],
269 |          [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869],
270 |          [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102],
271 |          [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271],
272 |          [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592],
273 |          [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822],
274 |          [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167],
275 |          [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959],
276 |          [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143],
277 |          [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318],
278 |          [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483],
279 |          [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101],
280 |          [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567],
281 |          [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292],
282 |          [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444],
283 |          [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783],
284 |          [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311],
285 |          [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511],
286 |          [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774],
287 |          [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071],
288 |          [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263],
289 |          [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519],
290 |          [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647],
291 |          [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967],
292 |          [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295],
293 |          [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274],
294 |          [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007],
295 |          [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381],
296 |          [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]];
297 |     for (i = 0; i < ranges.length; i++) {
298 |         start = ranges[i][0];
299 |         end = ranges[i][1];
300 |         for (j = start; j <= end; j++) {
301 |             result[j] = true;
302 |         }
303 |     }
304 |     return result;
305 | })();
306 | 
307 | function splitQuery(query) {
308 |     var result = [];
309 |     var start = -1;
310 |     for (var i = 0; i < query.length; i++) {
311 |         if (splitChars[query.charCodeAt(i)]) {
312 |             if (start !== -1) {
313 |                 result.push(query.slice(start, i));
314 |                 start = -1;
315 |             }
316 |         } else if (start === -1) {
317 |             start = i;
318 |         }
319 |     }
320 |     if (start !== -1) {
321 |         result.push(query.slice(start));
322 |     }
323 |     return result;
324 | }
325 | 
326 | 
327 | 
328 | 
329 | /**
330 |  * Search Module
331 |  */
332 | var Search = {
333 | 
334 |   _index : null,
335 |   _queued_query : null,
336 |   _pulse_status : -1,
337 | 
338 |   init : function() {
339 |       var params = $.getQueryParameters();
340 |       if (params.q) {
341 |           var query = params.q[0];
342 |           $('input[name="q"]')[0].value = query;
343 |           this.performSearch(query);
344 |       }
345 |   },
346 | 
347 |   loadIndex : function(url) {
348 |     $.ajax({type: "GET", url: url, data: null,
349 |             dataType: "script", cache: true,
350 |             complete: function(jqxhr, textstatus) {
351 |               if (textstatus != "success") {
352 |                 document.getElementById("searchindexloader").src = url;
353 |               }
354 |             }});
355 |   },
356 | 
357 |   setIndex : function(index) {
358 |     var q;
359 |     this._index = index;
360 |     if ((q = this._queued_query) !== null) {
361 |       this._queued_query = null;
362 |       Search.query(q);
363 |     }
364 |   },
365 | 
366 |   hasIndex : function() {
367 |       return this._index !== null;
368 |   },
369 | 
370 |   deferQuery : function(query) {
371 |       this._queued_query = query;
372 |   },
373 | 
374 |   stopPulse : function() {
375 |       this._pulse_status = 0;
376 |   },
377 | 
378 |   startPulse : function() {
379 |     if (this._pulse_status >= 0)
380 |         return;
381 |     function pulse() {
382 |       var i;
383 |       Search._pulse_status = (Search._pulse_status + 1) % 4;
384 |       var dotString = '';
385 |       for (i = 0; i < Search._pulse_status; i++)
386 |         dotString += '.';
387 |       Search.dots.text(dotString);
388 |       if (Search._pulse_status > -1)
389 |         window.setTimeout(pulse, 500);
390 |     }
391 |     pulse();
392 |   },
393 | 
394 |   /**
395 |    * perform a search for something (or wait until index is loaded)
396 |    */
397 |   performSearch : function(query) {
398 |     // create the required interface elements
399 |     this.out = $('#search-results');
400 |     this.title = $('<h2>' + _('Searching') + '</h2>').appendTo(this.out);
401 |     this.dots = $('<span></span>').appendTo(this.title);
402 |     this.status = $('<p style="display: none"></p>').appendTo(this.out);
403 |     this.output = $('<ul class="search"/>').appendTo(this.out);
404 | 
405 |     $('#search-progress').text(_('Preparing search...'));
406 |     this.startPulse();
407 | 
408 |     // index already loaded, the browser was quick!
409 |     if (this.hasIndex())
410 |       this.query(query);
411 |     else
412 |       this.deferQuery(query);
413 |   },
414 | 
415 |   /**
416 |    * execute search (requires search index to be loaded)
417 |    */
418 |   query : function(query) {
419 |     var i;
420 |     var stopwords = ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"];
421 | 
422 |     // stem the searchterms and add them to the correct list
423 |     var stemmer = new Stemmer();
424 |     var searchterms = [];
425 |     var excluded = [];
426 |     var hlterms = [];
427 |     var tmp = splitQuery(query);
428 |     var objectterms = [];
429 |     for (i = 0; i < tmp.length; i++) {
430 |       if (tmp[i] !== "") {
431 |           objectterms.push(tmp[i].toLowerCase());
432 |       }
433 | 
434 |       if ($u.indexOf(stopwords, tmp[i].toLowerCase()) != -1 || tmp[i].match(/^\d+$/) ||
435 |           tmp[i] === "") {
436 |         // skip this "word"
437 |         continue;
438 |       }
439 |       // stem the word
440 |       var word = stemmer.stemWord(tmp[i].toLowerCase());
441 |       // prevent stemmer from cutting word smaller than two chars
442 |       if(word.length < 3 && tmp[i].length >= 3) {
443 |         word = tmp[i];
444 |       }
445 |       var toAppend;
446 |       // select the correct list
447 |       if (word[0] == '-') {
448 |         toAppend = excluded;
449 |         word = word.substr(1);
450 |       }
451 |       else {
452 |         toAppend = searchterms;
453 |         hlterms.push(tmp[i].toLowerCase());
454 |       }
455 |       // only add if not already in the list
456 |       if (!$u.contains(toAppend, word))
457 |         toAppend.push(word);
458 |     }
459 |     var highlightstring = '?highlight=' + $.urlencode(hlterms.join(" "));
460 | 
461 |     // console.debug('SEARCH: searching for:');
462 |     // console.info('required: ', searchterms);
463 |     // console.info('excluded: ', excluded);
464 | 
465 |     // prepare search
466 |     var terms = this._index.terms;
467 |     var titleterms = this._index.titleterms;
468 | 
469 |     // array of [filename, title, anchor, descr, score]
470 |     var results = [];
471 |     $('#search-progress').empty();
472 | 
473 |     // lookup as object
474 |     for (i = 0; i < objectterms.length; i++) {
475 |       var others = [].concat(objectterms.slice(0, i),
476 |                              objectterms.slice(i+1, objectterms.length));
477 |       results = results.concat(this.performObjectSearch(objectterms[i], others));
478 |     }
479 | 
480 |     // lookup as search terms in fulltext
481 |     results = results.concat(this.performTermsSearch(searchterms, excluded, terms, titleterms));
482 | 
483 |     // let the scorer override scores with a custom scoring function
484 |     if (Scorer.score) {
485 |       for (i = 0; i < results.length; i++)
486 |         results[i][4] = Scorer.score(results[i]);
487 |     }
488 | 
489 |     // now sort the results by score (in opposite order of appearance, since the
490 |     // display function below uses pop() to retrieve items) and then
491 |     // alphabetically
492 |     results.sort(function(a, b) {
493 |       var left = a[4];
494 |       var right = b[4];
495 |       if (left > right) {
496 |         return 1;
497 |       } else if (left < right) {
498 |         return -1;
499 |       } else {
500 |         // same score: sort alphabetically
501 |         left = a[1].toLowerCase();
502 |         right = b[1].toLowerCase();
503 |         return (left > right) ? -1 : ((left < right) ? 1 : 0);
504 |       }
505 |     });
506 | 
507 |     // for debugging
508 |     //Search.lastresults = results.slice();  // a copy
509 |     //console.info('search results:', Search.lastresults);
510 | 
511 |     // print the results
512 |     var resultCount = results.length;
513 |     function displayNextItem() {
514 |       // results left, load the summary and display it
515 |       if (results.length) {
516 |         var item = results.pop();
517 |         var listItem = $('<li style="display:none"></li>');
518 |         if (DOCUMENTATION_OPTIONS.FILE_SUFFIX === '') {
519 |           // dirhtml builder
520 |           var dirname = item[0] + '/';
521 |           if (dirname.match(/\/index\/$/)) {
522 |             dirname = dirname.substring(0, dirname.length-6);
523 |           } else if (dirname == 'index/') {
524 |             dirname = '';
525 |           }
526 |           listItem.append($('<a/>').attr('href',
527 |             DOCUMENTATION_OPTIONS.URL_ROOT + dirname +
528 |             highlightstring + item[2]).html(item[1]));
529 |         } else {
530 |           // normal html builders
531 |           listItem.append($('<a/>').attr('href',
532 |             item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
533 |             highlightstring + item[2]).html(item[1]));
534 |         }
535 |         if (item[3]) {
536 |           listItem.append($('<span> (' + item[3] + ')</span>'));
537 |           Search.output.append(listItem);
538 |           listItem.slideDown(5, function() {
539 |             displayNextItem();
540 |           });
541 |         } else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) {
542 |           var suffix = DOCUMENTATION_OPTIONS.SOURCELINK_SUFFIX;
543 |           $.ajax({url: DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' + item[5] + (item[5].slice(-suffix.length) === suffix ? '' : suffix),
544 |                   dataType: "text",
545 |                   complete: function(jqxhr, textstatus) {
546 |                     var data = jqxhr.responseText;
547 |                     if (data !== '' && data !== undefined) {
548 |                       listItem.append(Search.makeSearchSummary(data, searchterms, hlterms));
549 |                     }
550 |                     Search.output.append(listItem);
551 |                     listItem.slideDown(5, function() {
552 |                       displayNextItem();
553 |                     });
554 |                   }});
555 |         } else {
556 |           // no source available, just display title
557 |           Search.output.append(listItem);
558 |           listItem.slideDown(5, function() {
559 |             displayNextItem();
560 |           });
561 |         }
562 |       }
563 |       // search finished, update title and status message
564 |       else {
565 |         Search.stopPulse();
566 |         Search.title.text(_('Search Results'));
567 |         if (!resultCount)
568 |           Search.status.text(_('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.'));
569 |         else
570 |             Search.status.text(_('Search finished, found %s page(s) matching the search query.').replace('%s', resultCount));
571 |         Search.status.fadeIn(500);
572 |       }
573 |     }
574 |     displayNextItem();
575 |   },
576 | 
577 |   /**
578 |    * search for object names
579 |    */
580 |   performObjectSearch : function(object, otherterms) {
581 |     var filenames = this._index.filenames;
582 |     var docnames = this._index.docnames;
583 |     var objects = this._index.objects;
584 |     var objnames = this._index.objnames;
585 |     var titles = this._index.titles;
586 | 
587 |     var i;
588 |     var results = [];
589 | 
590 |     for (var prefix in objects) {
591 |       for (var name in objects[prefix]) {
592 |         var fullname = (prefix ? prefix + '.' : '') + name;
593 |         if (fullname.toLowerCase().indexOf(object) > -1) {
594 |           var score = 0;
595 |           var parts = fullname.split('.');
596 |           // check for different match types: exact matches of full name or
597 |           // "last name" (i.e. last dotted part)
598 |           if (fullname == object || parts[parts.length - 1] == object) {
599 |             score += Scorer.objNameMatch;
600 |           // matches in last name
601 |           } else if (parts[parts.length - 1].indexOf(object) > -1) {
602 |             score += Scorer.objPartialMatch;
603 |           }
604 |           var match = objects[prefix][name];
605 |           var objname = objnames[match[1]][2];
606 |           var title = titles[match[0]];
607 |           // If more than one term searched for, we require other words to be
608 |           // found in the name/title/description
609 |           if (otherterms.length > 0) {
610 |             var haystack = (prefix + ' ' + name + ' ' +
611 |                             objname + ' ' + title).toLowerCase();
612 |             var allfound = true;
613 |             for (i = 0; i < otherterms.length; i++) {
614 |               if (haystack.indexOf(otherterms[i]) == -1) {
615 |                 allfound = false;
616 |                 break;
617 |               }
618 |             }
619 |             if (!allfound) {
620 |               continue;
621 |             }
622 |           }
623 |           var descr = objname + _(', in ') + title;
624 | 
625 |           var anchor = match[3];
626 |           if (anchor === '')
627 |             anchor = fullname;
628 |           else if (anchor == '-')
629 |             anchor = objnames[match[1]][1] + '-' + fullname;
630 |           // add custom score for some objects according to scorer
631 |           if (Scorer.objPrio.hasOwnProperty(match[2])) {
632 |             score += Scorer.objPrio[match[2]];
633 |           } else {
634 |             score += Scorer.objPrioDefault;
635 |           }
636 |           results.push([docnames[match[0]], fullname, '#'+anchor, descr, score, filenames[match[0]]]);
637 |         }
638 |       }
639 |     }
640 | 
641 |     return results;
642 |   },
643 | 
644 |   /**
645 |    * search for full-text terms in the index
646 |    */
647 |   performTermsSearch : function(searchterms, excluded, terms, titleterms) {
648 |     var docnames = this._index.docnames;
649 |     var filenames = this._index.filenames;
650 |     var titles = this._index.titles;
651 | 
652 |     var i, j, file;
653 |     var fileMap = {};
654 |     var scoreMap = {};
655 |     var results = [];
656 | 
657 |     // perform the search on the required terms
658 |     for (i = 0; i < searchterms.length; i++) {
659 |       var word = searchterms[i];
660 |       var files = [];
661 |       var _o = [
662 |         {files: terms[word], score: Scorer.term},
663 |         {files: titleterms[word], score: Scorer.title}
664 |       ];
665 | 
666 |       // no match but word was a required one
667 |       if ($u.every(_o, function(o){return o.files === undefined;})) {
668 |         break;
669 |       }
670 |       // found search word in contents
671 |       $u.each(_o, function(o) {
672 |         var _files = o.files;
673 |         if (_files === undefined)
674 |           return
675 | 
676 |         if (_files.length === undefined)
677 |           _files = [_files];
678 |         files = files.concat(_files);
679 | 
680 |         // set score for the word in each file to Scorer.term
681 |         for (j = 0; j < _files.length; j++) {
682 |           file = _files[j];
683 |           if (!(file in scoreMap))
684 |             scoreMap[file] = {}
685 |           scoreMap[file][word] = o.score;
686 |         }
687 |       });
688 | 
689 |       // create the mapping
690 |       for (j = 0; j < files.length; j++) {
691 |         file = files[j];
692 |         if (file in fileMap)
693 |           fileMap[file].push(word);
694 |         else
695 |           fileMap[file] = [word];
696 |       }
697 |     }
698 | 
699 |     // now check if the files don't contain excluded terms
700 |     for (file in fileMap) {
701 |       var valid = true;
702 | 
703 |       // check if all requirements are matched
704 |       if (fileMap[file].length != searchterms.length)
705 |           continue;
706 | 
707 |       // ensure that none of the excluded terms is in the search result
708 |       for (i = 0; i < excluded.length; i++) {
709 |         if (terms[excluded[i]] == file ||
710 |             titleterms[excluded[i]] == file ||
711 |             $u.contains(terms[excluded[i]] || [], file) ||
712 |             $u.contains(titleterms[excluded[i]] || [], file)) {
713 |           valid = false;
714 |           break;
715 |         }
716 |       }
717 | 
718 |       // if we have still a valid result we can add it to the result list
719 |       if (valid) {
720 |         // select one (max) score for the file.
721 |         // for better ranking, we should calculate ranking by using words statistics like basic tf-idf...
722 |         var score = $u.max($u.map(fileMap[file], function(w){return scoreMap[file][w]}));
723 |         results.push([docnames[file], titles[file], '', null, score, filenames[file]]);
724 |       }
725 |     }
726 |     return results;
727 |   },
728 | 
729 |   /**
730 |    * helper function to return a node containing the
731 |    * search summary for a given text. keywords is a list
732 |    * of stemmed words, hlwords is the list of normal, unstemmed
733 |    * words. the first one is used to find the occurrence, the
734 |    * latter for highlighting it.
735 |    */
736 |   makeSearchSummary : function(text, keywords, hlwords) {
737 |     var textLower = text.toLowerCase();
738 |     var start = 0;
739 |     $.each(keywords, function() {
740 |       var i = textLower.indexOf(this.toLowerCase());
741 |       if (i > -1)
742 |         start = i;
743 |     });
744 |     start = Math.max(start - 120, 0);
745 |     var excerpt = ((start > 0) ? '...' : '') +
746 |       $.trim(text.substr(start, 240)) +
747 |       ((start + 240 - text.length) ? '...' : '');
748 |     var rv = $('<div class="context"></div>').text(excerpt);
749 |     $.each(hlwords, function() {
750 |       rv = rv.highlightText(this, 'highlighted');
751 |     });
752 |     return rv;
753 |   }
754 | };
755 | 
756 | $(document).ready(function() {
757 |   Search.init();
758 | });


--------------------------------------------------------------------------------
/docs/build/html/_static/websupport.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * websupport.js
  3 |  * ~~~~~~~~~~~~~
  4 |  *
  5 |  * sphinx.websupport utilities for all documentation.
  6 |  *
  7 |  * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | (function($) {
 13 |   $.fn.autogrow = function() {
 14 |     return this.each(function() {
 15 |     var textarea = this;
 16 | 
 17 |     $.fn.autogrow.resize(textarea);
 18 | 
 19 |     $(textarea)
 20 |       .focus(function() {
 21 |         textarea.interval = setInterval(function() {
 22 |           $.fn.autogrow.resize(textarea);
 23 |         }, 500);
 24 |       })
 25 |       .blur(function() {
 26 |         clearInterval(textarea.interval);
 27 |       });
 28 |     });
 29 |   };
 30 | 
 31 |   $.fn.autogrow.resize = function(textarea) {
 32 |     var lineHeight = parseInt($(textarea).css('line-height'), 10);
 33 |     var lines = textarea.value.split('\n');
 34 |     var columns = textarea.cols;
 35 |     var lineCount = 0;
 36 |     $.each(lines, function() {
 37 |       lineCount += Math.ceil(this.length / columns) || 1;
 38 |     });
 39 |     var height = lineHeight * (lineCount + 1);
 40 |     $(textarea).css('height', height);
 41 |   };
 42 | })(jQuery);
 43 | 
 44 | (function($) {
 45 |   var comp, by;
 46 | 
 47 |   function init() {
 48 |     initEvents();
 49 |     initComparator();
 50 |   }
 51 | 
 52 |   function initEvents() {
 53 |     $(document).on("click", 'a.comment-close', function(event) {
 54 |       event.preventDefault();
 55 |       hide($(this).attr('id').substring(2));
 56 |     });
 57 |     $(document).on("click", 'a.vote', function(event) {
 58 |       event.preventDefault();
 59 |       handleVote($(this));
 60 |     });
 61 |     $(document).on("click", 'a.reply', function(event) {
 62 |       event.preventDefault();
 63 |       openReply($(this).attr('id').substring(2));
 64 |     });
 65 |     $(document).on("click", 'a.close-reply', function(event) {
 66 |       event.preventDefault();
 67 |       closeReply($(this).attr('id').substring(2));
 68 |     });
 69 |     $(document).on("click", 'a.sort-option', function(event) {
 70 |       event.preventDefault();
 71 |       handleReSort($(this));
 72 |     });
 73 |     $(document).on("click", 'a.show-proposal', function(event) {
 74 |       event.preventDefault();
 75 |       showProposal($(this).attr('id').substring(2));
 76 |     });
 77 |     $(document).on("click", 'a.hide-proposal', function(event) {
 78 |       event.preventDefault();
 79 |       hideProposal($(this).attr('id').substring(2));
 80 |     });
 81 |     $(document).on("click", 'a.show-propose-change', function(event) {
 82 |       event.preventDefault();
 83 |       showProposeChange($(this).attr('id').substring(2));
 84 |     });
 85 |     $(document).on("click", 'a.hide-propose-change', function(event) {
 86 |       event.preventDefault();
 87 |       hideProposeChange($(this).attr('id').substring(2));
 88 |     });
 89 |     $(document).on("click", 'a.accept-comment', function(event) {
 90 |       event.preventDefault();
 91 |       acceptComment($(this).attr('id').substring(2));
 92 |     });
 93 |     $(document).on("click", 'a.delete-comment', function(event) {
 94 |       event.preventDefault();
 95 |       deleteComment($(this).attr('id').substring(2));
 96 |     });
 97 |     $(document).on("click", 'a.comment-markup', function(event) {
 98 |       event.preventDefault();
 99 |       toggleCommentMarkupBox($(this).attr('id').substring(2));
100 |     });
101 |   }
102 | 
103 |   /**
104 |    * Set comp, which is a comparator function used for sorting and
105 |    * inserting comments into the list.
106 |    */
107 |   function setComparator() {
108 |     // If the first three letters are "asc", sort in ascending order
109 |     // and remove the prefix.
110 |     if (by.substring(0,3) == 'asc') {
111 |       var i = by.substring(3);
112 |       comp = function(a, b) { return a[i] - b[i]; };
113 |     } else {
114 |       // Otherwise sort in descending order.
115 |       comp = function(a, b) { return b[by] - a[by]; };
116 |     }
117 | 
118 |     // Reset link styles and format the selected sort option.
119 |     $('a.sel').attr('href', '#').removeClass('sel');
120 |     $('a.by' + by).removeAttr('href').addClass('sel');
121 |   }
122 | 
123 |   /**
124 |    * Create a comp function. If the user has preferences stored in
125 |    * the sortBy cookie, use those, otherwise use the default.
126 |    */
127 |   function initComparator() {
128 |     by = 'rating'; // Default to sort by rating.
129 |     // If the sortBy cookie is set, use that instead.
130 |     if (document.cookie.length > 0) {
131 |       var start = document.cookie.indexOf('sortBy=');
132 |       if (start != -1) {
133 |         start = start + 7;
134 |         var end = document.cookie.indexOf(";", start);
135 |         if (end == -1) {
136 |           end = document.cookie.length;
137 |           by = unescape(document.cookie.substring(start, end));
138 |         }
139 |       }
140 |     }
141 |     setComparator();
142 |   }
143 | 
144 |   /**
145 |    * Show a comment div.
146 |    */
147 |   function show(id) {
148 |     $('#ao' + id).hide();
149 |     $('#ah' + id).show();
150 |     var context = $.extend({id: id}, opts);
151 |     var popup = $(renderTemplate(popupTemplate, context)).hide();
152 |     popup.find('textarea[name="proposal"]').hide();
153 |     popup.find('a.by' + by).addClass('sel');
154 |     var form = popup.find('#cf' + id);
155 |     form.submit(function(event) {
156 |       event.preventDefault();
157 |       addComment(form);
158 |     });
159 |     $('#s' + id).after(popup);
160 |     popup.slideDown('fast', function() {
161 |       getComments(id);
162 |     });
163 |   }
164 | 
165 |   /**
166 |    * Hide a comment div.
167 |    */
168 |   function hide(id) {
169 |     $('#ah' + id).hide();
170 |     $('#ao' + id).show();
171 |     var div = $('#sc' + id);
172 |     div.slideUp('fast', function() {
173 |       div.remove();
174 |     });
175 |   }
176 | 
177 |   /**
178 |    * Perform an ajax request to get comments for a node
179 |    * and insert the comments into the comments tree.
180 |    */
181 |   function getComments(id) {
182 |     $.ajax({
183 |      type: 'GET',
184 |      url: opts.getCommentsURL,
185 |      data: {node: id},
186 |      success: function(data, textStatus, request) {
187 |        var ul = $('#cl' + id);
188 |        var speed = 100;
189 |        $('#cf' + id)
190 |          .find('textarea[name="proposal"]')
191 |          .data('source', data.source);
192 | 
193 |        if (data.comments.length === 0) {
194 |          ul.html('<li>No comments yet.</li>');
195 |          ul.data('empty', true);
196 |        } else {
197 |          // If there are comments, sort them and put them in the list.
198 |          var comments = sortComments(data.comments);
199 |          speed = data.comments.length * 100;
200 |          appendComments(comments, ul);
201 |          ul.data('empty', false);
202 |        }
203 |        $('#cn' + id).slideUp(speed + 200);
204 |        ul.slideDown(speed);
205 |      },
206 |      error: function(request, textStatus, error) {
207 |        showError('Oops, there was a problem retrieving the comments.');
208 |      },
209 |      dataType: 'json'
210 |     });
211 |   }
212 | 
213 |   /**
214 |    * Add a comment via ajax and insert the comment into the comment tree.
215 |    */
216 |   function addComment(form) {
217 |     var node_id = form.find('input[name="node"]').val();
218 |     var parent_id = form.find('input[name="parent"]').val();
219 |     var text = form.find('textarea[name="comment"]').val();
220 |     var proposal = form.find('textarea[name="proposal"]').val();
221 | 
222 |     if (text == '') {
223 |       showError('Please enter a comment.');
224 |       return;
225 |     }
226 | 
227 |     // Disable the form that is being submitted.
228 |     form.find('textarea,input').attr('disabled', 'disabled');
229 | 
230 |     // Send the comment to the server.
231 |     $.ajax({
232 |       type: "POST",
233 |       url: opts.addCommentURL,
234 |       dataType: 'json',
235 |       data: {
236 |         node: node_id,
237 |         parent: parent_id,
238 |         text: text,
239 |         proposal: proposal
240 |       },
241 |       success: function(data, textStatus, error) {
242 |         // Reset the form.
243 |         if (node_id) {
244 |           hideProposeChange(node_id);
245 |         }
246 |         form.find('textarea')
247 |           .val('')
248 |           .add(form.find('input'))
249 |           .removeAttr('disabled');
250 | 	var ul = $('#cl' + (node_id || parent_id));
251 |         if (ul.data('empty')) {
252 |           $(ul).empty();
253 |           ul.data('empty', false);
254 |         }
255 |         insertComment(data.comment);
256 |         var ao = $('#ao' + node_id);
257 |         ao.find('img').attr({'src': opts.commentBrightImage});
258 |         if (node_id) {
259 |           // if this was a "root" comment, remove the commenting box
260 |           // (the user can get it back by reopening the comment popup)
261 |           $('#ca' + node_id).slideUp();
262 |         }
263 |       },
264 |       error: function(request, textStatus, error) {
265 |         form.find('textarea,input').removeAttr('disabled');
266 |         showError('Oops, there was a problem adding the comment.');
267 |       }
268 |     });
269 |   }
270 | 
271 |   /**
272 |    * Recursively append comments to the main comment list and children
273 |    * lists, creating the comment tree.
274 |    */
275 |   function appendComments(comments, ul) {
276 |     $.each(comments, function() {
277 |       var div = createCommentDiv(this);
278 |       ul.append($(document.createElement('li')).html(div));
279 |       appendComments(this.children, div.find('ul.comment-children'));
280 |       // To avoid stagnating data, don't store the comments children in data.
281 |       this.children = null;
282 |       div.data('comment', this);
283 |     });
284 |   }
285 | 
286 |   /**
287 |    * After adding a new comment, it must be inserted in the correct
288 |    * location in the comment tree.
289 |    */
290 |   function insertComment(comment) {
291 |     var div = createCommentDiv(comment);
292 | 
293 |     // To avoid stagnating data, don't store the comments children in data.
294 |     comment.children = null;
295 |     div.data('comment', comment);
296 | 
297 |     var ul = $('#cl' + (comment.node || comment.parent));
298 |     var siblings = getChildren(ul);
299 | 
300 |     var li = $(document.createElement('li'));
301 |     li.hide();
302 | 
303 |     // Determine where in the parents children list to insert this comment.
304 |     for(i=0; i < siblings.length; i++) {
305 |       if (comp(comment, siblings[i]) <= 0) {
306 |         $('#cd' + siblings[i].id)
307 |           .parent()
308 |           .before(li.html(div));
309 |         li.slideDown('fast');
310 |         return;
311 |       }
312 |     }
313 | 
314 |     // If we get here, this comment rates lower than all the others,
315 |     // or it is the only comment in the list.
316 |     ul.append(li.html(div));
317 |     li.slideDown('fast');
318 |   }
319 | 
320 |   function acceptComment(id) {
321 |     $.ajax({
322 |       type: 'POST',
323 |       url: opts.acceptCommentURL,
324 |       data: {id: id},
325 |       success: function(data, textStatus, request) {
326 |         $('#cm' + id).fadeOut('fast');
327 |         $('#cd' + id).removeClass('moderate');
328 |       },
329 |       error: function(request, textStatus, error) {
330 |         showError('Oops, there was a problem accepting the comment.');
331 |       }
332 |     });
333 |   }
334 | 
335 |   function deleteComment(id) {
336 |     $.ajax({
337 |       type: 'POST',
338 |       url: opts.deleteCommentURL,
339 |       data: {id: id},
340 |       success: function(data, textStatus, request) {
341 |         var div = $('#cd' + id);
342 |         if (data == 'delete') {
343 |           // Moderator mode: remove the comment and all children immediately
344 |           div.slideUp('fast', function() {
345 |             div.remove();
346 |           });
347 |           return;
348 |         }
349 |         // User mode: only mark the comment as deleted
350 |         div
351 |           .find('span.user-id:first')
352 |           .text('[deleted]').end()
353 |           .find('div.comment-text:first')
354 |           .text('[deleted]').end()
355 |           .find('#cm' + id + ', #dc' + id + ', #ac' + id + ', #rc' + id +
356 |                 ', #sp' + id + ', #hp' + id + ', #cr' + id + ', #rl' + id)
357 |           .remove();
358 |         var comment = div.data('comment');
359 |         comment.username = '[deleted]';
360 |         comment.text = '[deleted]';
361 |         div.data('comment', comment);
362 |       },
363 |       error: function(request, textStatus, error) {
364 |         showError('Oops, there was a problem deleting the comment.');
365 |       }
366 |     });
367 |   }
368 | 
369 |   function showProposal(id) {
370 |     $('#sp' + id).hide();
371 |     $('#hp' + id).show();
372 |     $('#pr' + id).slideDown('fast');
373 |   }
374 | 
375 |   function hideProposal(id) {
376 |     $('#hp' + id).hide();
377 |     $('#sp' + id).show();
378 |     $('#pr' + id).slideUp('fast');
379 |   }
380 | 
381 |   function showProposeChange(id) {
382 |     $('#pc' + id).hide();
383 |     $('#hc' + id).show();
384 |     var textarea = $('#pt' + id);
385 |     textarea.val(textarea.data('source'));
386 |     $.fn.autogrow.resize(textarea[0]);
387 |     textarea.slideDown('fast');
388 |   }
389 | 
390 |   function hideProposeChange(id) {
391 |     $('#hc' + id).hide();
392 |     $('#pc' + id).show();
393 |     var textarea = $('#pt' + id);
394 |     textarea.val('').removeAttr('disabled');
395 |     textarea.slideUp('fast');
396 |   }
397 | 
398 |   function toggleCommentMarkupBox(id) {
399 |     $('#mb' + id).toggle();
400 |   }
401 | 
402 |   /** Handle when the user clicks on a sort by link. */
403 |   function handleReSort(link) {
404 |     var classes = link.attr('class').split(/\s+/);
405 |     for (var i=0; i<classes.length; i++) {
406 |       if (classes[i] != 'sort-option') {
407 | 	by = classes[i].substring(2);
408 |       }
409 |     }
410 |     setComparator();
411 |     // Save/update the sortBy cookie.
412 |     var expiration = new Date();
413 |     expiration.setDate(expiration.getDate() + 365);
414 |     document.cookie= 'sortBy=' + escape(by) +
415 |                      ';expires=' + expiration.toUTCString();
416 |     $('ul.comment-ul').each(function(index, ul) {
417 |       var comments = getChildren($(ul), true);
418 |       comments = sortComments(comments);
419 |       appendComments(comments, $(ul).empty());
420 |     });
421 |   }
422 | 
423 |   /**
424 |    * Function to process a vote when a user clicks an arrow.
425 |    */
426 |   function handleVote(link) {
427 |     if (!opts.voting) {
428 |       showError("You'll need to login to vote.");
429 |       return;
430 |     }
431 | 
432 |     var id = link.attr('id');
433 |     if (!id) {
434 |       // Didn't click on one of the voting arrows.
435 |       return;
436 |     }
437 |     // If it is an unvote, the new vote value is 0,
438 |     // Otherwise it's 1 for an upvote, or -1 for a downvote.
439 |     var value = 0;
440 |     if (id.charAt(1) != 'u') {
441 |       value = id.charAt(0) == 'u' ? 1 : -1;
442 |     }
443 |     // The data to be sent to the server.
444 |     var d = {
445 |       comment_id: id.substring(2),
446 |       value: value
447 |     };
448 | 
449 |     // Swap the vote and unvote links.
450 |     link.hide();
451 |     $('#' + id.charAt(0) + (id.charAt(1) == 'u' ? 'v' : 'u') + d.comment_id)
452 |       .show();
453 | 
454 |     // The div the comment is displayed in.
455 |     var div = $('div#cd' + d.comment_id);
456 |     var data = div.data('comment');
457 | 
458 |     // If this is not an unvote, and the other vote arrow has
459 |     // already been pressed, unpress it.
460 |     if ((d.value !== 0) && (data.vote === d.value * -1)) {
461 |       $('#' + (d.value == 1 ? 'd' : 'u') + 'u' + d.comment_id).hide();
462 |       $('#' + (d.value == 1 ? 'd' : 'u') + 'v' + d.comment_id).show();
463 |     }
464 | 
465 |     // Update the comments rating in the local data.
466 |     data.rating += (data.vote === 0) ? d.value : (d.value - data.vote);
467 |     data.vote = d.value;
468 |     div.data('comment', data);
469 | 
470 |     // Change the rating text.
471 |     div.find('.rating:first')
472 |       .text(data.rating + ' point' + (data.rating == 1 ? '' : 's'));
473 | 
474 |     // Send the vote information to the server.
475 |     $.ajax({
476 |       type: "POST",
477 |       url: opts.processVoteURL,
478 |       data: d,
479 |       error: function(request, textStatus, error) {
480 |         showError('Oops, there was a problem casting that vote.');
481 |       }
482 |     });
483 |   }
484 | 
485 |   /**
486 |    * Open a reply form used to reply to an existing comment.
487 |    */
488 |   function openReply(id) {
489 |     // Swap out the reply link for the hide link
490 |     $('#rl' + id).hide();
491 |     $('#cr' + id).show();
492 | 
493 |     // Add the reply li to the children ul.
494 |     var div = $(renderTemplate(replyTemplate, {id: id})).hide();
495 |     $('#cl' + id)
496 |       .prepend(div)
497 |       // Setup the submit handler for the reply form.
498 |       .find('#rf' + id)
499 |       .submit(function(event) {
500 |         event.preventDefault();
501 |         addComment($('#rf' + id));
502 |         closeReply(id);
503 |       })
504 |       .find('input[type=button]')
505 |       .click(function() {
506 |         closeReply(id);
507 |       });
508 |     div.slideDown('fast', function() {
509 |       $('#rf' + id).find('textarea').focus();
510 |     });
511 |   }
512 | 
513 |   /**
514 |    * Close the reply form opened with openReply.
515 |    */
516 |   function closeReply(id) {
517 |     // Remove the reply div from the DOM.
518 |     $('#rd' + id).slideUp('fast', function() {
519 |       $(this).remove();
520 |     });
521 | 
522 |     // Swap out the hide link for the reply link
523 |     $('#cr' + id).hide();
524 |     $('#rl' + id).show();
525 |   }
526 | 
527 |   /**
528 |    * Recursively sort a tree of comments using the comp comparator.
529 |    */
530 |   function sortComments(comments) {
531 |     comments.sort(comp);
532 |     $.each(comments, function() {
533 |       this.children = sortComments(this.children);
534 |     });
535 |     return comments;
536 |   }
537 | 
538 |   /**
539 |    * Get the children comments from a ul. If recursive is true,
540 |    * recursively include childrens' children.
541 |    */
542 |   function getChildren(ul, recursive) {
543 |     var children = [];
544 |     ul.children().children("[id^='cd']")
545 |       .each(function() {
546 |         var comment = $(this).data('comment');
547 |         if (recursive)
548 |           comment.children = getChildren($(this).find('#cl' + comment.id), true);
549 |         children.push(comment);
550 |       });
551 |     return children;
552 |   }
553 | 
554 |   /** Create a div to display a comment in. */
555 |   function createCommentDiv(comment) {
556 |     if (!comment.displayed && !opts.moderator) {
557 |       return $('<div class="moderate">Thank you!  Your comment will show up '
558 |                + 'once it is has been approved by a moderator.</div>');
559 |     }
560 |     // Prettify the comment rating.
561 |     comment.pretty_rating = comment.rating + ' point' +
562 |       (comment.rating == 1 ? '' : 's');
563 |     // Make a class (for displaying not yet moderated comments differently)
564 |     comment.css_class = comment.displayed ? '' : ' moderate';
565 |     // Create a div for this comment.
566 |     var context = $.extend({}, opts, comment);
567 |     var div = $(renderTemplate(commentTemplate, context));
568 | 
569 |     // If the user has voted on this comment, highlight the correct arrow.
570 |     if (comment.vote) {
571 |       var direction = (comment.vote == 1) ? 'u' : 'd';
572 |       div.find('#' + direction + 'v' + comment.id).hide();
573 |       div.find('#' + direction + 'u' + comment.id).show();
574 |     }
575 | 
576 |     if (opts.moderator || comment.text != '[deleted]') {
577 |       div.find('a.reply').show();
578 |       if (comment.proposal_diff)
579 |         div.find('#sp' + comment.id).show();
580 |       if (opts.moderator && !comment.displayed)
581 |         div.find('#cm' + comment.id).show();
582 |       if (opts.moderator || (opts.username == comment.username))
583 |         div.find('#dc' + comment.id).show();
584 |     }
585 |     return div;
586 |   }
587 | 
588 |   /**
589 |    * A simple template renderer. Placeholders such as <%id%> are replaced
590 |    * by context['id'] with items being escaped. Placeholders such as <#id#>
591 |    * are not escaped.
592 |    */
593 |   function renderTemplate(template, context) {
594 |     var esc = $(document.createElement('div'));
595 | 
596 |     function handle(ph, escape) {
597 |       var cur = context;
598 |       $.each(ph.split('.'), function() {
599 |         cur = cur[this];
600 |       });
601 |       return escape ? esc.text(cur || "").html() : cur;
602 |     }
603 | 
604 |     return template.replace(/<([%#])([\w\.]*)\1>/g, function() {
605 |       return handle(arguments[2], arguments[1] == '%' ? true : false);
606 |     });
607 |   }
608 | 
609 |   /** Flash an error message briefly. */
610 |   function showError(message) {
611 |     $(document.createElement('div')).attr({'class': 'popup-error'})
612 |       .append($(document.createElement('div'))
613 |                .attr({'class': 'error-message'}).text(message))
614 |       .appendTo('body')
615 |       .fadeIn("slow")
616 |       .delay(2000)
617 |       .fadeOut("slow");
618 |   }
619 | 
620 |   /** Add a link the user uses to open the comments popup. */
621 |   $.fn.comment = function() {
622 |     return this.each(function() {
623 |       var id = $(this).attr('id').substring(1);
624 |       var count = COMMENT_METADATA[id];
625 |       var title = count + ' comment' + (count == 1 ? '' : 's');
626 |       var image = count > 0 ? opts.commentBrightImage : opts.commentImage;
627 |       var addcls = count == 0 ? ' nocomment' : '';
628 |       $(this)
629 |         .append(
630 |           $(document.createElement('a')).attr({
631 |             href: '#',
632 |             'class': 'sphinx-comment-open' + addcls,
633 |             id: 'ao' + id
634 |           })
635 |             .append($(document.createElement('img')).attr({
636 |               src: image,
637 |               alt: 'comment',
638 |               title: title
639 |             }))
640 |             .click(function(event) {
641 |               event.preventDefault();
642 |               show($(this).attr('id').substring(2));
643 |             })
644 |         )
645 |         .append(
646 |           $(document.createElement('a')).attr({
647 |             href: '#',
648 |             'class': 'sphinx-comment-close hidden',
649 |             id: 'ah' + id
650 |           })
651 |             .append($(document.createElement('img')).attr({
652 |               src: opts.closeCommentImage,
653 |               alt: 'close',
654 |               title: 'close'
655 |             }))
656 |             .click(function(event) {
657 |               event.preventDefault();
658 |               hide($(this).attr('id').substring(2));
659 |             })
660 |         );
661 |     });
662 |   };
663 | 
664 |   var opts = {
665 |     processVoteURL: '/_process_vote',
666 |     addCommentURL: '/_add_comment',
667 |     getCommentsURL: '/_get_comments',
668 |     acceptCommentURL: '/_accept_comment',
669 |     deleteCommentURL: '/_delete_comment',
670 |     commentImage: '/static/_static/comment.png',
671 |     closeCommentImage: '/static/_static/comment-close.png',
672 |     loadingImage: '/static/_static/ajax-loader.gif',
673 |     commentBrightImage: '/static/_static/comment-bright.png',
674 |     upArrow: '/static/_static/up.png',
675 |     downArrow: '/static/_static/down.png',
676 |     upArrowPressed: '/static/_static/up-pressed.png',
677 |     downArrowPressed: '/static/_static/down-pressed.png',
678 |     voting: false,
679 |     moderator: false
680 |   };
681 | 
682 |   if (typeof COMMENT_OPTIONS != "undefined") {
683 |     opts = jQuery.extend(opts, COMMENT_OPTIONS);
684 |   }
685 | 
686 |   var popupTemplate = '\
687 |     <div class="sphinx-comments" id="sc<%id%>">\
688 |       <p class="sort-options">\
689 |         Sort by:\
690 |         <a href="#" class="sort-option byrating">best rated</a>\
691 |         <a href="#" class="sort-option byascage">newest</a>\
692 |         <a href="#" class="sort-option byage">oldest</a>\
693 |       </p>\
694 |       <div class="comment-header">Comments</div>\
695 |       <div class="comment-loading" id="cn<%id%>">\
696 |         loading comments... <img src="<%loadingImage%>" alt="" /></div>\
697 |       <ul id="cl<%id%>" class="comment-ul"></ul>\
698 |       <div id="ca<%id%>">\
699 |       <p class="add-a-comment">Add a comment\
700 |         (<a href="#" class="comment-markup" id="ab<%id%>">markup</a>):</p>\
701 |       <div class="comment-markup-box" id="mb<%id%>">\
702 |         reStructured text markup: <i>*emph*</i>, <b>**strong**</b>, \
703 |         <code>``code``</code>, \
704 |         code blocks: <code>::</code> and an indented block after blank line</div>\
705 |       <form method="post" id="cf<%id%>" class="comment-form" action="">\
706 |         <textarea name="comment" cols="80"></textarea>\
707 |         <p class="propose-button">\
708 |           <a href="#" id="pc<%id%>" class="show-propose-change">\
709 |             Propose a change &#9657;\
710 |           </a>\
711 |           <a href="#" id="hc<%id%>" class="hide-propose-change">\
712 |             Propose a change &#9663;\
713 |           </a>\
714 |         </p>\
715 |         <textarea name="proposal" id="pt<%id%>" cols="80"\
716 |                   spellcheck="false"></textarea>\
717 |         <input type="submit" value="Add comment" />\
718 |         <input type="hidden" name="node" value="<%id%>" />\
719 |         <input type="hidden" name="parent" value="" />\
720 |       </form>\
721 |       </div>\
722 |     </div>';
723 | 
724 |   var commentTemplate = '\
725 |     <div id="cd<%id%>" class="sphinx-comment<%css_class%>">\
726 |       <div class="vote">\
727 |         <div class="arrow">\
728 |           <a href="#" id="uv<%id%>" class="vote" title="vote up">\
729 |             <img src="<%upArrow%>" />\
730 |           </a>\
731 |           <a href="#" id="uu<%id%>" class="un vote" title="vote up">\
732 |             <img src="<%upArrowPressed%>" />\
733 |           </a>\
734 |         </div>\
735 |         <div class="arrow">\
736 |           <a href="#" id="dv<%id%>" class="vote" title="vote down">\
737 |             <img src="<%downArrow%>" id="da<%id%>" />\
738 |           </a>\
739 |           <a href="#" id="du<%id%>" class="un vote" title="vote down">\
740 |             <img src="<%downArrowPressed%>" />\
741 |           </a>\
742 |         </div>\
743 |       </div>\
744 |       <div class="comment-content">\
745 |         <p class="tagline comment">\
746 |           <span class="user-id"><%username%></span>\
747 |           <span class="rating"><%pretty_rating%></span>\
748 |           <span class="delta"><%time.delta%></span>\
749 |         </p>\
750 |         <div class="comment-text comment"><#text#></div>\
751 |         <p class="comment-opts comment">\
752 |           <a href="#" class="reply hidden" id="rl<%id%>">reply &#9657;</a>\
753 |           <a href="#" class="close-reply" id="cr<%id%>">reply &#9663;</a>\
754 |           <a href="#" id="sp<%id%>" class="show-proposal">proposal &#9657;</a>\
755 |           <a href="#" id="hp<%id%>" class="hide-proposal">proposal &#9663;</a>\
756 |           <a href="#" id="dc<%id%>" class="delete-comment hidden">delete</a>\
757 |           <span id="cm<%id%>" class="moderation hidden">\
758 |             <a href="#" id="ac<%id%>" class="accept-comment">accept</a>\
759 |           </span>\
760 |         </p>\
761 |         <pre class="proposal" id="pr<%id%>">\
762 | <#proposal_diff#>\
763 |         </pre>\
764 |           <ul class="comment-children" id="cl<%id%>"></ul>\
765 |         </div>\
766 |         <div class="clearleft"></div>\
767 |       </div>\
768 |     </div>';
769 | 
770 |   var replyTemplate = '\
771 |     <li>\
772 |       <div class="reply-div" id="rd<%id%>">\
773 |         <form id="rf<%id%>">\
774 |           <textarea name="comment" cols="80"></textarea>\
775 |           <input type="submit" value="Add reply" />\
776 |           <input type="button" value="Cancel" />\
777 |           <input type="hidden" name="parent" value="<%id%>" />\
778 |           <input type="hidden" name="node" value="" />\
779 |         </form>\
780 |       </div>\
781 |     </li>';
782 | 
783 |   $(document).ready(function() {
784 |     init();
785 |   });
786 | })(jQuery);
787 | 
788 | $(document).ready(function() {
789 |   // add comment anchors for all paragraphs that are commentable
790 |   $('.sphinx-has-comment').comment();
791 | 
792 |   // highlight search words in search results
793 |   $("div.context").each(function() {
794 |     var params = $.getQueryParameters();
795 |     var terms = (params.q) ? params.q[0].split(/\s+/) : [];
796 |     var result = $(this);
797 |     $.each(terms, function() {
798 |       result.highlightText(this.toLowerCase(), 'highlighted');
799 |     });
800 |   });
801 | 
802 |   // directly open comment window if requested
803 |   var anchor = document.location.hash;
804 |   if (anchor.substring(0, 9) == '#comment-') {
805 |     $('#ao' + anchor.substring(9)).click();
806 |     document.location.hash = '#s' + anchor.substring(9);
807 |   }
808 | });
809 | 


--------------------------------------------------------------------------------