WinPython Portable is the WinPython Python-distribution packaged with a PortableApps.com launcher as a portable app, so you can use Python on your iPod, USB flash drive, portable hard drive, etc. It has all the same features as WinPython, plus, it leaves no personal information behind on the machine you run it on, so you can take it with you wherever you go. Learn more about WinPython...
131 |
132 |
- Support PortableApps.com's Hosting and Development
You can read about advanced configuration options for the PortableApps.com Launcher in its readme file.
147 |
148 |
149 |
150 |
151 |
--------------------------------------------------------------------------------
/App/DefaultData/notebooks/docs/test_data_access.py:
--------------------------------------------------------------------------------
1 | # pyodbc
2 | import pyodbc
3 |
4 | # look for pyodbc providers
5 | sources = pyodbc.dataSources()
6 | dsns = list(sources.keys())
7 | sl = [' %s [%s]' % (dsn, sources[dsn]) for dsn in dsns]
8 | print("pyodbc Providers: (beware 32/64 bit driver and python version must match)\n", '\n'.join(sl))
9 |
10 | # odbc to EXCEL .xls via pyodbc (beware 32/64 bit driver and pytho version must match)
11 | import pyodbc, os
12 | filename = os.path.join(os.getcwd(), 'test.xls')
13 | todo = "select * from [Sheet1$]"
14 | print("\nusing pyodbc to read an Excel .xls file:\n\t", filename)
15 | if os.path.exists(filename):
16 | CNXNSTRING = 'Driver={Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)};DBQ=%s;READONLY=FALSE' % filename
17 | try:
18 | cnxn = pyodbc.connect(CNXNSTRING, autocommit=True)
19 | cursor = cnxn.cursor()
20 | rows = cursor.execute(todo).fetchall()
21 | print([column[0] for column in cursor.description])
22 | print(rows)
23 | cursor.close()
24 | cnxn.close()
25 | except:
26 | print("\n *** failed ***\n")
27 | # odbc to ACCESS .mdb via pyodbc (beware 32/64 bit driver and python version must match)
28 | import pyodbc, os
29 | filename = os.path.join(os.getcwd(), 'test.mdb')
30 | print("\nusing pyodbc to read an ACCESS .mdb file:\n\t", filename)
31 | if os.path.exists(filename):
32 | CNXNSTRING = 'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=%s;READONLY=FALSE' % filename
33 | try:
34 | cnxn = pyodbc.connect(CNXNSTRING, autocommit=False)
35 | cursor = cnxn.cursor()
36 | rows = cursor.execute("select * from users").fetchall()
37 | print([column[0] for column in cursor.description])
38 | print(rows)
39 | cursor.close()
40 | cnxn.close()
41 | except:
42 | print("\n *** failed ***\n")
43 |
44 | # pythonnet
45 | import clr
46 | clr.AddReference("System.Data")
47 | import System.Data.OleDb as ADONET
48 | import System.Data.Odbc as ODBCNET
49 | import System.Data.Common as DATACOM
50 |
51 | table = DATACOM.DbProviderFactories.GetFactoryClasses()
52 | print("\n .NET Providers: (beware 32/64 bit driver and pytho version must match)")
53 | for row in table.Rows:
54 | print(" %s" % row[table.Columns[0]])
55 | print(" ",[row[column] for column in table.Columns if column != table.Columns[0]])
56 |
57 |
58 | # odbc to EXCEL .xls via pythonnet
59 | import clr, os
60 | clr.AddReference("System.Data")
61 | import System.Data.OleDb as ADONET
62 | import System.Data.Odbc as ODBCNET
63 | import System.Data.Common as DATACOM
64 |
65 | filename = os.path.join(os.getcwd(), 'test.xls')
66 | todo = "select * from [Sheet1$]"
67 | print("\nusing pythonnet to read an excel .xls file:\n\t", filename , "\n\t", todo)
68 | if os.path.exists(filename):
69 | CNXNSTRING = 'Driver={Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)};DBQ=%s;READONLY=FALSE' % filename
70 | cnxn = ODBCNET.OdbcConnection(CNXNSTRING)
71 | try:
72 | cnxn.Open()
73 | command = cnxn.CreateCommand()
74 | command.CommandText = "select * from [Sheet1$]"
75 | rows = command.ExecuteReader()
76 | print ([rows.GetName(i) for i in range(rows.FieldCount)])
77 | for row in rows:
78 | print([row[i] for i in range(rows.FieldCount)])
79 | command.Dispose()
80 | cnxn.Close()
81 | except:
82 | print("\n *** failed ***\n")
83 |
84 |
85 | # odbc to ACCESS .mdb via pythonnet
86 | import clr, os
87 | clr.AddReference("System.Data")
88 | import System.Data.OleDb as ADONET
89 | import System.Data.Odbc as ODBCNET
90 | import System.Data.Common as DATACOM
91 |
92 | filename = os.path.join(os.getcwd(), 'test.mdb')
93 | todo = "select * from users"
94 | print("\nusing odbc via pythonnet to read an ACCESS .mdb file:\n\t", filename , "\n\t", todo)
95 |
96 | if os.path.exists(filename):
97 | CNXNSTRING = 'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=%s;READONLY=FALSE' % filename
98 | cnxn = ODBCNET.OdbcConnection(CNXNSTRING)
99 | try:
100 | cnxn.Open()
101 | command = cnxn.CreateCommand()
102 | command.CommandText = "select * from users"
103 | rows = command.ExecuteReader()
104 | print ([rows.GetName(i) for i in range(rows.FieldCount)])
105 | for row in rows:
106 | print([row[i] for i in range(rows.FieldCount)])
107 | command.Dispose()
108 | cnxn.Close()
109 | except:
110 | print("\n *** failed ***\n")
111 |
112 | # DAO via pythonnet: works ONLY if you have the 32 (or 64 bit) driver.
113 | import clr, os
114 | clr.AddReference("System.Data")
115 | import System.Data.OleDb as ADONET
116 | import System.Data.Odbc as ODBCNET
117 | import System.Data.Common as DATACOM
118 |
119 | filename = os.path.join(os.getcwd(), 'test.accdb')
120 | todo = "select * from users"
121 | print("\nusing DAO via pythonnet to read an ACCESS .mdb file:\n\t", filename , "\n\t", todo)
122 | if os.path.exists(filename):
123 | # needs a driver in 32 or 64 bit like your running python
124 | # https://www.microsoft.com/download/details.aspx?id=13255
125 | CNXNSTRING = 'Provider=Microsoft.ACE.OLEDB.12.0; Data Source=%s;READONLY=FALSE' % filename
126 | cnxn = ADONET.OleDbConnection(CNXNSTRING)
127 | try:
128 | cnxn.Open()
129 | command = cnxn.CreateCommand()
130 | command.CommandText = todo
131 | # command.CommandText = 'select id, name from people where group_id = @group_id'
132 | # command.Parameters.Add(SqlParameter('group_id', 23))
133 | rows = command.ExecuteReader()
134 | print ([rows.GetName(i) for i in range(rows.FieldCount)])
135 | for row in rows:
136 | print([row[i] for i in range(rows.FieldCount)])
137 | command.Dispose()
138 | cnxn.Close()
139 | except:
140 | print("\n *** failed ***\n")
141 |
--------------------------------------------------------------------------------
/App/DefaultData/notebooks/docs/Beginner's FAQ.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Experimenting your Winpython installation\n",
8 | "\n",
9 | " . [Winpython_checker test, to see various packages](Winpython_checker.ipynb) \n",
10 | " \n",
11 | " . [Seaborn visualization Example](seaborn_demo_from_jakevdp.ipynb)\n",
12 | " \n",
13 | " . [QT libraries Example](Qt_libraries_demo.ipynb)\n",
14 | "\n",
15 | " . [Pandas Data-science example](dplyr_pandas.ipynb)"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "# Tutorials and Demonstrations on Internet\n",
23 | "\n",
24 | "\n",
25 | "## Introduction to DataScience\n",
26 | " . [Python Data Science Handbook](https://github.com/jakevdp/PythonDataScienceHandbook/blob/master/README.md)\n"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## Ipython Notebook Documentation\n",
34 | " \n",
35 | " . [IPython notebook-based online documentation](https://nbviewer.ipython.org/github/ipython/ipython/blob/master/examples/Index.ipynb)\n",
36 | " \n",
37 | " . [Galery of Interesting Notebooks](https://github.com/ipython/ipython/wiki/A-gallery-of-interesting-IPython-Notebooks)\n",
38 | " \n",
39 | " . Videos of Conferences and Trainings: [Python Various Conferences](https://pyvideo.org/), [Pydata Conferences](https://www.youtube.com/user/PyDataTV) , [Scipy Conferences](https://www.youtube.com/user/EnthoughtMedia)\n",
40 | " "
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Pandas\n",
48 | "\n",
49 | ". Beginners Training Video: [\"Brandon Rhodes - Pandas From The Ground Up - PyCon 2015 \"](https://www.youtube.com/watch?v=5JnMutdy6Fw)\n",
50 | "\n",
51 | ". Pandas [API reference](https://pandas.pydata.org/pandas-docs/stable/api.html)\n"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "## Graphics :\n",
59 | "\n",
60 | " . Matplotlib : [Beginner's guide](https://matplotlib.org/users/beginner.html) , [Gallery](https://matplotlib.org/gallery.html) , [General Content](https://matplotlib.org/contents.html) \n",
61 | " \n",
62 | " . seaborn : [Tutorial](https://stanford.edu/~mwaskom/software/seaborn/tutorial.html) , [Gallery](https://stanford.edu/~mwaskom/software/seaborn/examples/index.html)\n",
63 | " \n",
64 | " . scikit-image : [Gallery](https://scikit-image.org/docs/dev/auto_examples/), [User Guide](https://scikit-image.org/docs/dev/user_guide.html)\n",
65 | " \n",
66 | " . holoviews : [Introduction](https://ioam.github.io/holoviews) , [Tutorials](https://ioam.github.io/holoviews/Tutorials/index.html)\n",
67 | " \n",
68 | " . bqplot: [Introduction](https://bqplot.readthedocs.io/en/stable/introduction.html)\n",
69 | " \n",
70 | " . Altair: [Introduction]](https://altair-viz.github.io/)\n",
71 | " \n",
72 | " . mpld3 : [Gallery](https://mpld3.github.io/examples/index.html#example-gallery) \n",
73 | "\n",
74 | " "
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "## SQL\n",
82 | " . IPython-SQL : [Tutorial](https://nbviewer.ipython.org/gist/catherinedevlin/6588378)\n",
83 | " \n",
84 | " . db.py : [Tutorial](https://nbviewer.ipython.org/github/yhat/db.py/blob/master/examples/db-example.ipynb)\n",
85 | " \n",
86 | " . baresql : [Tutorial](https://pypi.python.org/pypi/baresql)\n"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "\n",
94 | "\n",
95 | "## Machine learning / Deep Learning\n",
96 | " . scikit-learn : [Tutorial](https://scikit-learn.org/stable/tutorial/index.html) , [Gallery](https://scikit-learn.org/stable/auto_examples/index.html)\n",
97 | " \n",
98 | " . Theano: [Tutorial](https://deeplearning.net/software/theano/tutorial/), [Related Projects](https://github.com/Theano/Theano/wiki/Related-projects)\n",
99 | " \n",
100 | " . Keras: [Introduction]](https://keras.io/)\n",
101 | "\n",
102 | " . Tensorflow: [Tutorial](https://github.com/Hvass-Labs/TensorFlow-Tutorials) with [videos](https://www.youtube.com/playlist?list=PL9Hr9sNUjfsmEu1ZniY0XpHSzl5uihcXZ)"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "\n",
110 | "\n",
111 | "## Qt User Interface Development :\n",
112 | "\n",
113 | " . PyQt4 tutorial: https://zetcode.com/gui/pyqt4/firstprograms/\n",
114 | " \n",
115 | " . PyQt5 tutorial: https://zetcode.com/gui/pyqt5/firstprograms/\n",
116 | " \n",
117 | " . guiqwt tutorial: https://pythonhosted.org/guiqwt/examples.html .\n",
118 | " \n",
119 | " . switching from guiqwt 2 to 3: https://github.com/PierreRaybaut/guiqwt/blob/master/doc/migrating_from_v2_to_v3.rst)\n",
120 | " \n",
121 | " . guidata: https://pythonhosted.org/guidata/examples.html\n",
122 | " \n",
123 | " "
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "metadata": {
129 | "collapsed": false
130 | },
131 | "source": [
132 | "\n",
133 | "## Winpython\n",
134 | "\n",
135 | ". [Winpython Discussion Group](https://groups.google.com/forum/#!forum/winpython)\n",
136 | " \n",
137 | ". [Other Winpython examples](http://nbviewer.ipython.org/github/winpython/winpython_afterdoc/tree/master/)\n"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {
144 | "collapsed": true
145 | },
146 | "outputs": [],
147 | "source": []
148 | }
149 | ],
150 | "metadata": {
151 | "kernelspec": {
152 | "display_name": "Python 3",
153 | "language": "python",
154 | "name": "python3"
155 | },
156 | "language_info": {
157 | "codemirror_mode": {
158 | "name": "ipython",
159 | "version": 3
160 | },
161 | "file_extension": ".py",
162 | "mimetype": "text/x-python",
163 | "name": "python",
164 | "nbconvert_exporter": "python",
165 | "pygments_lexer": "ipython3",
166 | "version": "3.6.0"
167 | }
168 | },
169 | "nbformat": 4,
170 | "nbformat_minor": 0
171 | }
172 |
--------------------------------------------------------------------------------
/App/DefaultData/notebooks/docs/seaborn_demo_from_jakevdp.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Seaborn demo per Jake VanderPlas below"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "from __future__ import print_function, division\n",
19 | "\n",
20 | "%matplotlib inline\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "import numpy as np\n",
23 | "import pandas as pd"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {
30 | "collapsed": false
31 | },
32 | "outputs": [],
33 | "source": [
34 | "plt.style.use('ggplot')\n",
35 | "x = np.linspace(0, 10, 1000)\n",
36 | "plt.plot(x, np.sin(x), x, np.cos(x));"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {
43 | "collapsed": false
44 | },
45 | "outputs": [],
46 | "source": [
47 | "import seaborn as sns\n",
48 | "sns.set()\n",
49 | "plt.plot(x, np.sin(x), x, np.cos(x));"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {
56 | "collapsed": false
57 | },
58 | "outputs": [],
59 | "source": [
60 | "data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000)\n",
61 | "data = pd.DataFrame(data, columns=['x', 'y'])\n",
62 | "\n",
63 | "for col in 'xy':\n",
64 | " plt.hist(data[col], normed=True, alpha=0.5)"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": [
75 | "for col in 'xy':\n",
76 | " sns.kdeplot(data[col], shade=True)"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {
83 | "collapsed": false
84 | },
85 | "outputs": [],
86 | "source": [
87 | "sns.distplot(data['x']);"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "metadata": {
94 | "collapsed": false
95 | },
96 | "outputs": [],
97 | "source": [
98 | "sns.kdeplot(data);"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {
105 | "collapsed": false
106 | },
107 | "outputs": [],
108 | "source": [
109 | "with sns.axes_style('white'):\n",
110 | " sns.jointplot(\"x\", \"y\", data, kind='kde');"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {
117 | "collapsed": false
118 | },
119 | "outputs": [],
120 | "source": [
121 | "with sns.axes_style('white'):\n",
122 | " sns.jointplot(\"x\", \"y\", data, kind='hex')"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {
129 | "collapsed": false
130 | },
131 | "outputs": [],
132 | "source": [
133 | "iris = sns.load_dataset(\"iris\")\n",
134 | "iris.head()"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "collapsed": false
142 | },
143 | "outputs": [],
144 | "source": [
145 | "tips = sns.load_dataset('tips')\n",
146 | "tips.head()"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {
153 | "collapsed": false
154 | },
155 | "outputs": [],
156 | "source": [
157 | "tips['tip_pct'] = 100 * tips['tip'] / tips['total_bill']\n",
158 | "\n",
159 | "grid = sns.FacetGrid(tips, row=\"sex\", col=\"time\", margin_titles=True)\n",
160 | "grid.map(plt.hist, \"tip_pct\", bins=np.linspace(0, 40, 15));"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": null,
166 | "metadata": {
167 | "collapsed": false
168 | },
169 | "outputs": [],
170 | "source": [
171 | "with sns.axes_style(style='ticks'):\n",
172 | " g = sns.factorplot(\"day\", \"total_bill\", \"sex\", data=tips, kind=\"box\")\n",
173 | " g.set_axis_labels(\"Day\", \"Total Bill\");"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": null,
179 | "metadata": {
180 | "collapsed": false
181 | },
182 | "outputs": [],
183 | "source": [
184 | "with sns.axes_style('white'):\n",
185 | " sns.jointplot(\"total_bill\", \"tip\", data=tips, kind='hex')"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {
192 | "collapsed": false
193 | },
194 | "outputs": [],
195 | "source": [
196 | "sns.jointplot(\"total_bill\", \"tip\", data=tips, kind='reg');"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": null,
202 | "metadata": {
203 | "collapsed": false
204 | },
205 | "outputs": [],
206 | "source": [
207 | "planets = sns.load_dataset('planets')\n",
208 | "planets.head()"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "collapsed": false
216 | },
217 | "outputs": [],
218 | "source": [
219 | "with sns.axes_style('white'):\n",
220 | " g = sns.factorplot(\"year\", data=planets, aspect=1.5)\n",
221 | " g.set_xticklabels(step=5)"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": null,
227 | "metadata": {
228 | "collapsed": false
229 | },
230 | "outputs": [],
231 | "source": [
232 | "with sns.axes_style('white'):\n",
233 | " g = sns.factorplot(\"year\", data=planets, aspect=4.0,\n",
234 | " hue='method', order=range(2001, 2015), kind=\"count\")\n",
235 | " g.set_ylabels('Number of Planets Discovered')"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "## Scikit-learn tutorial from pycon 2015 Jake VanderPlas [here](http://nbviewer.ipython.org/github/jakevdp/sklearn_pycon2015/blob/master/notebooks/Index.ipynb)"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": null,
248 | "metadata": {
249 | "collapsed": true
250 | },
251 | "outputs": [],
252 | "source": []
253 | }
254 | ],
255 | "metadata": {
256 | "kernelspec": {
257 | "display_name": "Python 3",
258 | "language": "python",
259 | "name": "python3"
260 | },
261 | "language_info": {
262 | "codemirror_mode": {
263 | "name": "ipython",
264 | "version": 3
265 | },
266 | "file_extension": ".py",
267 | "mimetype": "text/x-python",
268 | "name": "python",
269 | "nbconvert_exporter": "python",
270 | "pygments_lexer": "ipython3",
271 | "version": "3.4.4"
272 | }
273 | },
274 | "nbformat": 4,
275 | "nbformat_minor": 0
276 | }
277 |
--------------------------------------------------------------------------------
/Other/Source/LauncherLicense.txt:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 |
294 | Copyright (C)
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | , 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
--------------------------------------------------------------------------------
/App/DefaultData/notebooks/docs/Winpython_checker.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Winpython Default checker"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import warnings\n",
19 | "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
20 | "warnings.filterwarnings(\"ignore\", category=UserWarning)\n",
21 | "# warnings.filterwarnings(\"ignore\") # would silence all warnings"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {
28 | "collapsed": true
29 | },
30 | "outputs": [],
31 | "source": [
32 | "%matplotlib inline"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Compilers: Numba and Cython\n",
40 | "\n",
41 | "##### Requirement\n",
42 | "To get Cython working, Winpython 3.5 users should install \"Microsoft Visual C++ Build Tools 2015\" (visualcppbuildtools_full.exe, a 4 Go installation) at https://beta.visualstudio.com/download-visual-studio-vs/\n",
43 | "\n",
44 | "To get Numba working, not-windows10 users may have to install \"Microsoft Visual C++ 2015 Redistributable\" (vc_redist) at \n",
45 | "\n",
46 | "#### Compiler toolchains"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": [
57 | "# checking Numba JIT toolchain\n",
58 | "import numpy as np\n",
59 | "image = np.zeros((1024, 1536), dtype = np.uint8)\n",
60 | "\n",
61 | "from pylab import imshow, show\n",
62 | "from timeit import default_timer as timer\n",
63 | "\n",
64 | "def create_fractal(min_x, max_x, min_y, max_y, image, iters , mandelx):\n",
65 | " height = image.shape[0]\n",
66 | " width = image.shape[1]\n",
67 | " pixel_size_x = (max_x - min_x) / width\n",
68 | " pixel_size_y = (max_y - min_y) / height\n",
69 | " \n",
70 | " for x in range(width):\n",
71 | " real = min_x + x * pixel_size_x\n",
72 | " for y in range(height):\n",
73 | " imag = min_y + y * pixel_size_y\n",
74 | " color = mandelx(real, imag, iters)\n",
75 | " image[y, x] = color"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "##### Numba (a JIT Compiler)"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [],
92 | "source": [
93 | "from numba import autojit\n",
94 | "\n",
95 | "@autojit\n",
96 | "def mandel(x, y, max_iters):\n",
97 | " c = complex(x, y)\n",
98 | " z = 0.0j\n",
99 | " for i in range(max_iters):\n",
100 | " z = z*z + c\n",
101 | " if (z.real*z.real + z.imag*z.imag) >= 4:\n",
102 | " return i\n",
103 | " return max_iters\n",
104 | "\n",
105 | "start = timer()\n",
106 | "create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20 , mandel) \n",
107 | "dt = timer() - start\n",
108 | "\n",
109 | "print (\"Mandelbrot created by numba in %f s\" % dt)\n",
110 | "imshow(image)\n",
111 | "show()"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "##### Cython (a compiler for writing C extensions for the Python language)\n",
119 | "WinPython 3.5 and 3.6 users may not have mingwpy available, and so need \"VisualStudio C++ Community Edition 2015\" https://www.visualstudio.com/downloads/download-visual-studio-vs#d-visual-c "
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "collapsed": false
127 | },
128 | "outputs": [],
129 | "source": [
130 | "# Cython + Mingwpy compiler toolchain test\n",
131 | "%load_ext Cython"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": false
139 | },
140 | "outputs": [],
141 | "source": [
142 | "%%cython -a\n",
143 | "# with %%cython -a , full C-speed lines are shown in white, slowest python-speed lines are shown in dark yellow lines \n",
144 | "# ==> put your cython rewrite effort on dark yellow lines\n",
145 | "def mandel_cython(x, y, max_iters):\n",
146 | " cdef int i \n",
147 | " cdef double cx, cy , zx, zy\n",
148 | " cx , cy = x, y \n",
149 | " zx , zy =0 ,0 \n",
150 | " for i in range(max_iters):\n",
151 | " zx , zy = zx*zx - zy*zy + cx , zx*zy*2 + cy\n",
152 | " if (zx*zx + zy*zy) >= 4:\n",
153 | " return i\n",
154 | " return max_iters"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "collapsed": false
162 | },
163 | "outputs": [],
164 | "source": [
165 | "start = timer()\n",
166 | "create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20 , mandel_cython) \n",
167 | "dt = timer() - start\n",
168 | "\n",
169 | "print (\"Mandelbrot created by cython in %f s\" % dt)\n",
170 | "imshow(image)"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "## Graphics: Matplotlib, Pandas, Seaborn, bqplot, Bokeh, Holoviews"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "metadata": {
184 | "collapsed": false
185 | },
186 | "outputs": [],
187 | "source": [
188 | "# Matplotlib\n",
189 | "# for more examples, see: http://matplotlib.org/gallery.html\n",
190 | "from mpl_toolkits.mplot3d import axes3d\n",
191 | "import matplotlib.pyplot as plt\n",
192 | "from matplotlib import cm\n",
193 | "\n",
194 | "fig = plt.figure()\n",
195 | "ax = fig.gca(projection='3d')\n",
196 | "X, Y, Z = axes3d.get_test_data(0.05)\n",
197 | "ax.plot_surface(X, Y, Z, rstride=8, cstride=8, alpha=0.3)\n",
198 | "cset = ax.contourf(X, Y, Z, zdir='z', offset=-100, cmap=cm.coolwarm)\n",
199 | "cset = ax.contourf(X, Y, Z, zdir='x', offset=-40, cmap=cm.coolwarm)\n",
200 | "cset = ax.contourf(X, Y, Z, zdir='y', offset=40, cmap=cm.coolwarm)\n",
201 | "\n",
202 | "ax.set_xlabel('X')\n",
203 | "ax.set_xlim(-40, 40)\n",
204 | "ax.set_ylabel('Y')\n",
205 | "ax.set_ylim(-40, 40)\n",
206 | "ax.set_zlabel('Z')\n",
207 | "ax.set_zlim(-100, 100)\n",
208 | "\n",
209 | "plt.show()"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": null,
215 | "metadata": {
216 | "collapsed": false
217 | },
218 | "outputs": [],
219 | "source": [
220 | "# Seaborn\n",
221 | "# for more examples, see http://stanford.edu/~mwaskom/software/seaborn/examples/index.html\n",
222 | "import seaborn as sns\n",
223 | "sns.set()\n",
224 | "df = sns.load_dataset(\"iris\")\n",
225 | "sns.pairplot(df, hue=\"species\", size=1.5)"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": null,
231 | "metadata": {
232 | "collapsed": false
233 | },
234 | "outputs": [],
235 | "source": [
236 | "#bqplot\n",
237 | "from IPython.display import display\n",
238 | "from bqplot import (Figure, Map, Mercator, Orthographic, ColorScale, ColorAxis,\n",
239 | " AlbersUSA, topo_load, Tooltip)\n",
240 | "def_tt = Tooltip(fields=['id', 'name'])\n",
241 | "map_mark = Map(scales={'projection': Mercator()}, tooltip=def_tt)\n",
242 | "map_mark.interactions = {'click': 'select', 'hover': 'tooltip'}\n",
243 | "fig = Figure(marks=[map_mark], title='Interactions Example')\n",
244 | "display(fig)"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": null,
250 | "metadata": {
251 | "collapsed": false
252 | },
253 | "outputs": [],
254 | "source": [
255 | "# ipyleaflet (javascript library usage)\n",
256 | "from ipyleaflet import (\n",
257 | " Map, Marker, TileLayer, ImageOverlay, Polyline, Polygon,\n",
258 | " Rectangle, Circle, CircleMarker, GeoJSON, DrawControl\n",
259 | ")\n",
260 | "from traitlets import link\n",
261 | "center = [34.6252978589571, -77.34580993652344]\n",
262 | "m = Map(center=[34.6252978589571, -77.34580993652344], zoom=10)\n",
263 | "dc = DrawControl()\n",
264 | "\n",
265 | "def handle_draw(self, action, geo_json):\n",
266 | " print(action)\n",
267 | " print(geo_json)\n",
268 | "m\n",
269 | "m"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": null,
275 | "metadata": {
276 | "collapsed": true
277 | },
278 | "outputs": [],
279 | "source": [
280 | "dc.on_draw(handle_draw)\n",
281 | "m.add_control(dc)"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {
288 | "collapsed": false
289 | },
290 | "outputs": [],
291 | "source": [
292 | "# Bokeh 0.11.0\n",
293 | "# for more examples, see http://nbviewer.jupyter.org/github/bokeh/bokeh-notebooks/blob/master/index.ipynb\n",
294 | "import matplotlib.pyplot as plt\n",
295 | "import numpy as np\n",
296 | "import pandas as pd\n",
297 | "import os\n",
298 | "from bokeh import mpl\n",
299 | "from bokeh.plotting import output_notebook, show\n",
300 | "import matplotlib as mplc\n",
301 | "# Generate the pandas dataframe\n",
302 | "data = np.random.multivariate_normal([0, 0], [[1, 2], [2, 20]], size=100)\n",
303 | "data = pd.DataFrame(data, columns=[\"X\", \"Y\"])\n",
304 | "mplc.rc(\"figure\", figsize=(6, 6))\n",
305 | "\n",
306 | "# Just plot seaborn kde\n",
307 | "import seaborn as sns\n",
308 | "sns.kdeplot(data, cmap=\"BuGn_d\")\n",
309 | "\n",
310 | "plt.title(\"Seaborn kdeplot in bokeh.\")\n",
311 | "\n",
312 | "from bokeh.resources import INLINE\n",
313 | "# default solution output_notebook() relies on pydata.org (but spare 2Mo of inline jsscript in your notebook)\n",
314 | "# other method to get internal bokeh script can be\n",
315 | "# os.environ['BOKEH_RESOURCES'] = 'inline'\n",
316 | "output_notebook(resources=INLINE)\n",
317 | "\n",
318 | "show(mpl.to_bokeh())"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": null,
324 | "metadata": {
325 | "collapsed": false
326 | },
327 | "outputs": [],
328 | "source": [
329 | "# Holoviews \n",
330 | "# for more example, see http://holoviews.org/Tutorials/index.html\n",
331 | "import holoviews as hv\n",
332 | "%load_ext holoviews.ipython\n",
333 | "fractal = hv.Image(image)\n",
334 | "\n",
335 | "((fractal * hv.HLine(y=0.16)).hist() + fractal.sample(y=0.16))"
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "## Ipython Notebook: Interactivity & other"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": null,
348 | "metadata": {
349 | "collapsed": false
350 | },
351 | "outputs": [],
352 | "source": [
353 | "import IPython;IPython.__version__"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": null,
359 | "metadata": {
360 | "collapsed": false
361 | },
362 | "outputs": [],
363 | "source": [
364 | "# Audio Example : https://github.com/ipython/ipywidgets/blob/master/examples/Beat%20Frequencies.ipynb\n",
365 | "%matplotlib inline\n",
366 | "import matplotlib.pyplot as plt\n",
367 | "import numpy as np\n",
368 | "from ipywidgets import interactive\n",
369 | "from IPython.display import Audio, display\n",
370 | "def beat_freq(f1=220.0, f2=224.0):\n",
371 | " max_time = 3\n",
372 | " rate = 8000\n",
373 | " times = np.linspace(0,max_time,rate*max_time)\n",
374 | " signal = np.sin(2*np.pi*f1*times) + np.sin(2*np.pi*f2*times)\n",
375 | " print(f1, f2, abs(f1-f2))\n",
376 | " display(Audio(data=signal, rate=rate))\n",
377 | " return signal\n",
378 | "v = interactive(beat_freq, f1=(200.0,300.0), f2=(200.0,300.0))\n",
379 | "display(v)"
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "execution_count": null,
385 | "metadata": {
386 | "collapsed": false
387 | },
388 | "outputs": [],
389 | "source": [
390 | "# Networks graph Example : https://github.com/ipython/ipywidgets/blob/master/examples/Exploring%20Graphs.ipynb\n",
391 | "%matplotlib inline\n",
392 | "from ipywidgets import interact\n",
393 | "import matplotlib.pyplot as plt\n",
394 | "import networkx as nx\n",
395 | "# wrap a few graph generation functions so they have the same signature\n",
396 | "\n",
397 | "def random_lobster(n, m, k, p):\n",
398 | " return nx.random_lobster(n, p, p / m)\n",
399 | "\n",
400 | "def powerlaw_cluster(n, m, k, p):\n",
401 | " return nx.powerlaw_cluster_graph(n, m, p)\n",
402 | "\n",
403 | "def erdos_renyi(n, m, k, p):\n",
404 | " return nx.erdos_renyi_graph(n, p)\n",
405 | "\n",
406 | "def newman_watts_strogatz(n, m, k, p):\n",
407 | " return nx.newman_watts_strogatz_graph(n, k, p)\n",
408 | "\n",
409 | "@interact(n=(2,30), m=(1,10), k=(1,10), p=(0.0, 1.0, 0.001),\n",
410 | " generator={'lobster': random_lobster,\n",
411 | " 'power law': powerlaw_cluster,\n",
412 | " 'Newman-Watts-Strogatz': newman_watts_strogatz,\n",
413 | " u'Erdős-Rényi': erdos_renyi,\n",
414 | " })\n",
415 | "def plot_random_graph(n, m, k, p, generator):\n",
416 | " g = generator(n, m, k, p)\n",
417 | " nx.draw(g)\n",
418 | " plt.title(generator.__name__)\n",
419 | " plt.show()\n",
420 | " "
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": null,
426 | "metadata": {
427 | "collapsed": false
428 | },
429 | "outputs": [],
430 | "source": [
431 | "# checking nbconvert \n",
432 | "!jupyter nbconvert \"Beginner's FAQ.ipynb\" --to html"
433 | ]
434 | },
435 | {
436 | "cell_type": "code",
437 | "execution_count": null,
438 | "metadata": {
439 | "collapsed": false
440 | },
441 | "outputs": [],
442 | "source": [
443 | "%%HTML\n",
444 | ""
445 | ]
446 | },
447 | {
448 | "cell_type": "markdown",
449 | "metadata": {},
450 | "source": [
451 | "## Mathematical: statsmodels, lmfit, "
452 | ]
453 | },
454 | {
455 | "cell_type": "code",
456 | "execution_count": null,
457 | "metadata": {
458 | "collapsed": false
459 | },
460 | "outputs": [],
461 | "source": [
462 | "# checking statsmodels\n",
463 | "import numpy as np\n",
464 | "import matplotlib.pyplot as plt\n",
465 | "plt.style.use('ggplot')\n",
466 | "import statsmodels.api as sm\n",
467 | "data = sm.datasets.anes96.load_pandas()\n",
468 | "party_ID = np.arange(7)\n",
469 | "labels = [\"Strong Democrat\", \"Weak Democrat\", \"Independent-Democrat\",\n",
470 | " \"Independent-Independent\", \"Independent-Republican\",\n",
471 | " \"Weak Republican\", \"Strong Republican\"]\n",
472 | "plt.rcParams['figure.subplot.bottom'] = 0.23 # keep labels visible\n",
473 | "plt.rcParams['figure.figsize'] = (6.0, 4.0) # make plot larger in notebook\n",
474 | "age = [data.exog['age'][data.endog == id] for id in party_ID]\n",
475 | "fig = plt.figure()\n",
476 | "ax = fig.add_subplot(111)\n",
477 | "plot_opts={'cutoff_val':5, 'cutoff_type':'abs',\n",
478 | " 'label_fontsize':'small',\n",
479 | " 'label_rotation':30}\n",
480 | "sm.graphics.beanplot(age, ax=ax, labels=labels,\n",
481 | " plot_opts=plot_opts)\n",
482 | "ax.set_xlabel(\"Party identification of respondent\")\n",
483 | "ax.set_ylabel(\"Age\")"
484 | ]
485 | },
486 | {
487 | "cell_type": "code",
488 | "execution_count": null,
489 | "metadata": {
490 | "collapsed": false
491 | },
492 | "outputs": [],
493 | "source": [
494 | "# lmfit test (from http://nbviewer.ipython.org/github/lmfit/lmfit-py/blob/master/examples/lmfit-model.ipynb)\n",
495 | "import numpy as np\n",
496 | "import matplotlib.pyplot as plt\n",
497 | "def decay(t, N, tau):\n",
498 | " return N*np.exp(-t/tau)\n",
499 | "t = np.linspace(0, 5, num=1000)\n",
500 | "data = decay(t, 7, 3) + np.random.randn(*t.shape)\n",
501 | "\n",
502 | "from lmfit import Model\n",
503 | "\n",
504 | "model = Model(decay, independent_vars=['t'])\n",
505 | "result = model.fit(data, t=t, N=10, tau=1)\n",
506 | "plt.plot(t, data) # data\n",
507 | "plt.plot(t, decay(t=t, **result.values), color='orange', linewidth=5) # best-fit model"
508 | ]
509 | },
510 | {
511 | "cell_type": "markdown",
512 | "metadata": {},
513 | "source": [
514 | "## DataFrames: Pandas, Dask"
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": null,
520 | "metadata": {
521 | "collapsed": false
522 | },
523 | "outputs": [],
524 | "source": [
525 | "#Pandas \n",
526 | "import pandas as pd\n",
527 | "import numpy as np\n",
528 | "\n",
529 | "idx = pd.date_range('2000', '2005', freq='d', closed='left')\n",
530 | "datas = pd.DataFrame({'Color': [ 'green' if x> 1 else 'red' for x in np.random.randn(len(idx))], \n",
531 | " 'Measure': np.random.randn(len(idx)), 'Year': idx.year},\n",
532 | " index=idx.date)\n",
533 | "datas.head()"
534 | ]
535 | },
536 | {
537 | "cell_type": "markdown",
538 | "metadata": {},
539 | "source": [
540 | "### Split / Apply / Combine \n",
541 | " Split your data into multiple independent groups.\n",
542 | " Apply some function to each group.\n",
543 | " Combine your groups back into a single data object.\n"
544 | ]
545 | },
546 | {
547 | "cell_type": "code",
548 | "execution_count": null,
549 | "metadata": {
550 | "collapsed": false
551 | },
552 | "outputs": [],
553 | "source": [
554 | "datas.query('Measure > 0').groupby(['Color','Year']).size().unstack()"
555 | ]
556 | },
557 | {
558 | "cell_type": "markdown",
559 | "metadata": {},
560 | "source": [
561 | "## Web Scraping: Beautifulsoup"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": null,
567 | "metadata": {
568 | "collapsed": false
569 | },
570 | "outputs": [],
571 | "source": [
572 | "# checking Web Scraping: beautifulsoup and requests \n",
573 | "import requests\n",
574 | "from bs4 import BeautifulSoup\n",
575 | "\n",
576 | "URL = 'http://en.wikipedia.org/wiki/Franklin,_Tennessee'\n",
577 | "\n",
578 | "req = requests.get(URL, headers={'User-Agent' : \"Mining the Social Web\"})\n",
579 | "soup = BeautifulSoup(req.text, \"lxml\")\n",
580 | "\n",
581 | "geoTag = soup.find(True, 'geo')\n",
582 | "\n",
583 | "if geoTag and len(geoTag) > 1:\n",
584 | " lat = geoTag.find(True, 'latitude').string\n",
585 | " lon = geoTag.find(True, 'longitude').string\n",
586 | " print ('Location is at', lat, lon)\n",
587 | "elif geoTag and len(geoTag) == 1:\n",
588 | " (lat, lon) = geoTag.string.split(';')\n",
589 | " (lat, lon) = (lat.strip(), lon.strip())\n",
590 | " print ('Location is at', lat, lon)\n",
591 | "else:\n",
592 | " print ('No location found')"
593 | ]
594 | },
595 | {
596 | "cell_type": "markdown",
597 | "metadata": {},
598 | "source": [
599 | "## Operations Research: Pulp"
600 | ]
601 | },
602 | {
603 | "cell_type": "code",
604 | "execution_count": null,
605 | "metadata": {
606 | "collapsed": false,
607 | "scrolled": true
608 | },
609 | "outputs": [],
610 | "source": [
611 | "# Pulp example : minimizing the weight to carry 99 pennies\n",
612 | "# (from Philip I Thomas)\n",
613 | "# see https://www.youtube.com/watch?v=UmMn-N5w-lI#t=995\n",
614 | "# Import PuLP modeler functions\n",
615 | "from pulp import *\n",
616 | "# The prob variable is created to contain the problem data \n",
617 | "prob = LpProblem(\"99 pennies Problem\",LpMinimize)\n",
618 | "\n",
619 | "# Variables represent how many of each coin we want to carry\n",
620 | "pennies = LpVariable(\"Number of pennies\",0,None,LpInteger)\n",
621 | "nickels = LpVariable(\"Number of nickels\",0,None,LpInteger)\n",
622 | "dimes = LpVariable(\"Number of dimes\",0,None,LpInteger)\n",
623 | "quarters = LpVariable(\"Number of quarters\",0,None,LpInteger)\n",
624 | "\n",
625 | "# The objective function is added to 'prob' first\n",
626 | "\n",
627 | "# we want to minimize (LpMinimize) this \n",
628 | "prob += 2.5 * pennies + 5 * nickels + 2.268 * dimes + 5.670 * quarters, \"Total coins Weight\"\n",
629 | "\n",
630 | "# We want exactly 99 cents\n",
631 | "prob += 1 * pennies + 5 * nickels + 10 * dimes + 25 * quarters == 99, \"\"\n",
632 | "\n",
633 | "# The problem data is written to an .lp file\n",
634 | "prob.writeLP(\"99cents.lp\")\n",
635 | "prob.solve()\n",
636 | "\n",
637 | "# print (\"status\",LpStatus[prob.status] )\n",
638 | "print (\"Minimal Weight to carry exactly 99 pennies is %s grams\" % value(prob.objective))\n",
639 | "# Each of the variables is printed with it's resolved optimum value\n",
640 | "for v in prob.variables():\n",
641 | " print (v.name, \"=\", v.varValue)"
642 | ]
643 | },
644 | {
645 | "cell_type": "markdown",
646 | "metadata": {},
647 | "source": [
648 | "## Deep Learning: Theano"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": null,
654 | "metadata": {
655 | "collapsed": false
656 | },
657 | "outputs": [],
658 | "source": [
659 | "# Checking Theano\n",
660 | "import theano.tensor as T\n",
661 | "from theano import function\n",
662 | "x = T.dmatrix('x')\n",
663 | "y = T.dmatrix('y')\n",
664 | "z = x + y\n",
665 | "f = function([x, y], z)\n",
666 | "f([[1, 2], [3, 4]], [[10, 20], [30, 40]])"
667 | ]
668 | },
669 | {
670 | "cell_type": "markdown",
671 | "metadata": {},
672 | "source": [
673 | "## Symbolic Calculation: sympy"
674 | ]
675 | },
676 | {
677 | "cell_type": "code",
678 | "execution_count": null,
679 | "metadata": {
680 | "collapsed": false
681 | },
682 | "outputs": [],
683 | "source": [
684 | "# checking sympy \n",
685 | "import sympy\n",
686 | "a, b =sympy.symbols('a b')\n",
687 | "e=(a+b)**5\n",
688 | "e.expand()"
689 | ]
690 | },
691 | {
692 | "cell_type": "markdown",
693 | "metadata": {},
694 | "source": [
695 | "## SQL tools: sqlite, Ipython-sql, sqlite_bro, baresql, db.py"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": null,
701 | "metadata": {
702 | "collapsed": false
703 | },
704 | "outputs": [],
705 | "source": [
706 | "# checking Ipython-sql, sqlparse, SQLalchemy\n",
707 | "%load_ext sql"
708 | ]
709 | },
710 | {
711 | "cell_type": "code",
712 | "execution_count": null,
713 | "metadata": {
714 | "collapsed": false
715 | },
716 | "outputs": [],
717 | "source": [
718 | "%%sql sqlite:///.baresql.db\n",
719 | "DROP TABLE IF EXISTS writer;\n",
720 | "CREATE TABLE writer (first_name, last_name, year_of_death);\n",
721 | "INSERT INTO writer VALUES ('William', 'Shakespeare', 1616);\n",
722 | "INSERT INTO writer VALUES ('Bertold', 'Brecht', 1956);\n",
723 | "SELECT * , sqlite_version() as sqlite_version from Writer order by Year_of_death"
724 | ]
725 | },
726 | {
727 | "cell_type": "code",
728 | "execution_count": null,
729 | "metadata": {
730 | "collapsed": false
731 | },
732 | "outputs": [],
733 | "source": [
734 | "# checking baresql\n",
735 | "from __future__ import print_function, unicode_literals, division # line needed only if Python2.7\n",
736 | "from baresql import baresql\n",
737 | "bsql = baresql.baresql(connection=\"sqlite:///.baresql.db\")\n",
738 | "bsqldf = lambda q: bsql.df(q, dict(globals(),**locals()))\n",
739 | "\n",
740 | "users = ['Alexander', 'Billy', 'Charles', 'Danielle', 'Esmeralda', 'Franz', 'Greg']\n",
741 | "# We use the python 'users' list like a SQL table\n",
742 | "sql = \"select 'Welcome ' || c0 || ' !' as say_hello, length(c0) as name_length from users$$ where c0 like '%a%' \"\n",
743 | "bsqldf(sql)"
744 | ]
745 | },
746 | {
747 | "cell_type": "code",
748 | "execution_count": null,
749 | "metadata": {
750 | "collapsed": false
751 | },
752 | "outputs": [],
753 | "source": [
754 | "# Transfering Datas to sqlite, doing transformation in sql, going back to Pandas and Matplotlib\n",
755 | "bsqldf('''\n",
756 | "select Color, Year, count(*) as size \n",
757 | "from datas$$ \n",
758 | "where Measure > 0 \n",
759 | "group by Color, Year'''\n",
760 | " ).set_index(['Year', 'Color']).unstack().plot(kind='bar')"
761 | ]
762 | },
763 | {
764 | "cell_type": "code",
765 | "execution_count": null,
766 | "metadata": {
767 | "collapsed": false
768 | },
769 | "outputs": [],
770 | "source": [
771 | "# checking db.py\n",
772 | "from db import DB\n",
773 | "db=DB(dbtype=\"sqlite\", filename=\".baresql.db\")\n",
774 | "db.query(\"select sqlite_version() as sqlite_version ;\") "
775 | ]
776 | },
777 | {
778 | "cell_type": "code",
779 | "execution_count": null,
780 | "metadata": {
781 | "collapsed": false
782 | },
783 | "outputs": [],
784 | "source": [
785 | "db.tables"
786 | ]
787 | },
788 | {
789 | "cell_type": "code",
790 | "execution_count": null,
791 | "metadata": {
792 | "collapsed": false
793 | },
794 | "outputs": [],
795 | "source": [
796 | "# checking sqlite_bro: this should lanch a separate non-browser window with sqlite_bro's welcome\n",
797 | "!cmd start cmd /C sqlite_bro"
798 | ]
799 | },
800 | {
801 | "cell_type": "code",
802 | "execution_count": null,
803 | "metadata": {
804 | "collapsed": false
805 | },
806 | "outputs": [],
807 | "source": [
808 | "# pyodbc \n",
809 | "import pyodbc\n",
810 | "\n",
811 | "# look for pyodbc providers\n",
812 | "sources = pyodbc.dataSources()\n",
813 | "dsns = list(sources.keys())\n",
814 | "sl = [' %s [%s]' % (dsn, sources[dsn]) for dsn in dsns]\n",
815 | "print(\"pyodbc Providers: (beware 32/64 bit driver and python version must match)\\n\", '\\n'.join(sl))"
816 | ]
817 | },
818 | {
819 | "cell_type": "code",
820 | "execution_count": null,
821 | "metadata": {
822 | "collapsed": false
823 | },
824 | "outputs": [],
825 | "source": [
826 | "# pythonnet\n",
827 | "import clr\n",
828 | "clr.AddReference(\"System.Data\")\n",
829 | "import System.Data.OleDb as ADONET\n",
830 | "import System.Data.Odbc as ODBCNET\n",
831 | "import System.Data.Common as DATACOM\n",
832 | "\n",
833 | "table = DATACOM.DbProviderFactories.GetFactoryClasses()\n",
834 | "print(\"\\n .NET Providers: (beware 32/64 bit driver and python version must match)\")\n",
835 | "for row in table.Rows:\n",
836 | " print(\" %s\" % row[table.Columns[0]])\n",
837 | " print(\" \",[row[column] for column in table.Columns if column != table.Columns[0]])"
838 | ]
839 | },
840 | {
841 | "cell_type": "markdown",
842 | "metadata": {},
843 | "source": [
844 | "## Qt libraries Demo\n",
845 | "\n",
846 | " \n",
847 | "#### See [Dedicated Qt Libraries Demo](Qt_libraries_demo.ipynb)"
848 | ]
849 | },
850 | {
851 | "cell_type": "markdown",
852 | "metadata": {},
853 | "source": [
854 | "## Wrap-up"
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "execution_count": null,
860 | "metadata": {
861 | "collapsed": false
862 | },
863 | "outputs": [],
864 | "source": [
865 | "# optional scipy full test (takes up to 10 minutes)\n",
866 | "#!cmd /C start cmd /k python.exe -c \"import scipy;scipy.test()\""
867 | ]
868 | },
869 | {
870 | "cell_type": "code",
871 | "execution_count": null,
872 | "metadata": {
873 | "collapsed": true
874 | },
875 | "outputs": [],
876 | "source": []
877 | }
878 | ],
879 | "metadata": {
880 | "kernelspec": {
881 | "display_name": "Python 3",
882 | "language": "python",
883 | "name": "python3"
884 | },
885 | "language_info": {
886 | "codemirror_mode": {
887 | "name": "ipython",
888 | "version": 3
889 | },
890 | "file_extension": ".py",
891 | "mimetype": "text/x-python",
892 | "name": "python",
893 | "nbconvert_exporter": "python",
894 | "pygments_lexer": "ipython3",
895 | "version": "3.6.0"
896 | },
897 | "widgets": {
898 | "state": {
899 | "056d32c70f644417b86a152d3a2385bd": {
900 | "views": [
901 | {
902 | "cell_index": 14
903 | }
904 | ]
905 | },
906 | "2307e84bf81346d49818eef8862360ca": {
907 | "views": [
908 | {
909 | "cell_index": 22
910 | }
911 | ]
912 | },
913 | "4e7a6f5db8e74905a08d4636afa3b82f": {
914 | "views": [
915 | {
916 | "cell_index": 15
917 | }
918 | ]
919 | },
920 | "e762d7875083491eb2933958cc3331a9": {
921 | "views": [
922 | {
923 | "cell_index": 21
924 | }
925 | ]
926 | }
927 | },
928 | "version": "1.2.0"
929 | }
930 | },
931 | "nbformat": 4,
932 | "nbformat_minor": 0
933 | }
934 |
--------------------------------------------------------------------------------
/App/DefaultData/notebooks/docs/dplyr_pandas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Tom Augspurger Dplyr/Pandas comparison (copy of 2016-01-01)\n",
8 | "\n",
9 | "### See result there\n",
10 | "http://nbviewer.ipython.org/urls/gist.githubusercontent.com/TomAugspurger/6e052140eaa5fdb6e8c0/raw/627b77addb4bcfc39ab6be6d85cb461e956fb3a3/dplyr_pandas.ipynb\n",
11 | "\n",
12 | "### to reproduce on your WinPython you'll need to get flights.csv in this directory"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "This notebook compares [pandas](http://pandas.pydata.org)\n",
20 | "and [dplyr](http://cran.r-project.org/web/packages/dplyr/index.html).\n",
21 | "The comparison is just on syntax (verbage), not performance. Whether you're an R user looking to switch to pandas (or the other way around), I hope this guide will help ease the transition.\n",
22 | "\n",
23 | "We'll work through the [introductory dplyr vignette](http://cran.r-project.org/web/packages/dplyr/vignettes/introduction.html) to analyze some flight data.\n",
24 | "\n",
25 | "I'm working on a better layout to show the two packages side by side.\n",
26 | "But for now I'm just putting the ``dplyr`` code in a comment above each python call.\n",
27 | "\n"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "### using R steps to get flights.csv\n",
35 | "\n",
36 | "un-comment the next cell unless you have installed R and want to get Flights example from the source\n",
37 | "\n",
38 | "to install R on your Winpython:\n",
39 | "[how to install R](installing_R.ipynb)\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {
46 | "collapsed": true
47 | },
48 | "outputs": [],
49 | "source": [
50 | "#%load_ext rpy2.ipython\n",
51 | "#%R install.packages(\"nycflights13\", repos='http://cran.us.r-project.org')\n",
52 | "#%R library(nycflights13)\n",
53 | "#%R write.csv(flights, \"flights.csv\")"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "### using an internet download to get flight.qcsv"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {
67 | "collapsed": false
68 | },
69 | "outputs": [],
70 | "source": [
71 | "# Downloading and unzipg a file, without R method :\n",
72 | "# source= http://stackoverflow.com/a/34863053/3140336\n",
73 | "import io\n",
74 | "from zipfile import ZipFile\n",
75 | "import requests\n",
76 | "\n",
77 | "def get_zip(file_url):\n",
78 | " url = requests.get(file_url)\n",
79 | " zipfile = ZipFile(io.BytesIO(url.content))\n",
80 | " zip_names = zipfile.namelist()\n",
81 | " if len(zip_names) == 1:\n",
82 | " file_name = zip_names.pop()\n",
83 | " extracted_file = zipfile.open(file_name)\n",
84 | " return extracted_file\n",
85 | "\n",
86 | "url=r'https://github.com/winpython/winpython_afterdoc/raw/master/examples/nycflights13_datas/flights.zip'\n",
87 | "with io.open(\"flights.csv\", 'wb') as f:\n",
88 | " f.write(get_zip(url).read())\n"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {
95 | "collapsed": false
96 | },
97 | "outputs": [],
98 | "source": [
99 | "# Some prep work to get the data from R and into pandas\n",
100 | "%matplotlib inline\n",
101 | "import matplotlib.pyplot as plt\n",
102 | "#%load_ext rpy2.ipython\n",
103 | "\n",
104 | "import pandas as pd\n",
105 | "import seaborn as sns\n",
106 | "\n",
107 | "pd.set_option(\"display.max_rows\", 5)"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "# Data: nycflights13"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {
121 | "collapsed": false
122 | },
123 | "outputs": [],
124 | "source": [
125 | "flights = pd.read_csv(\"flights.csv\", index_col=0)"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "collapsed": false
133 | },
134 | "outputs": [],
135 | "source": [
136 | "# dim(flights) <--- The R code\n",
137 | "flights.shape # <--- The python code"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {
144 | "collapsed": false
145 | },
146 | "outputs": [],
147 | "source": [
148 | "# head(flights)\n",
149 | "flights.head()"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {},
155 | "source": [
156 | "# Single table verbs"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {},
162 | "source": [
163 | "``dplyr`` has a small set of nicely defined verbs. I've listed their closest pandas verbs.\n",
164 | "\n",
165 | "\n",
166 | "
\n",
167 | "
\n",
168 | "
dplyr
\n",
169 | "
pandas
\n",
170 | "
\n",
171 | "
\n",
172 | "
filter() (and slice())
\n",
173 | "
query() (and loc[], iloc[])
\n",
174 | "
\n",
175 | "
\n",
176 | "
arrange()
\n",
177 | "
sort_values and sort_index()
\n",
178 | "
\n",
179 | "
\n",
180 | "
select() (and rename())
\n",
181 | "
__getitem__ (and rename())
\n",
182 | "
\n",
183 | "
\n",
184 | "
distinct()
\n",
185 | "
drop_duplicates()
\n",
186 | "
\n",
187 | "
\n",
188 | "
mutate() (and transmute())
\n",
189 | "
assign
\n",
190 | "
\n",
191 | "
\n",
192 | "
summarise()
\n",
193 | "
None
\n",
194 | "
\n",
195 | "
\n",
196 | "
sample_n() and sample_frac()
\n",
197 | "
sample
\n",
198 | "
\n",
199 | "
\n",
200 | "
%>%
\n",
201 | "
pipe
\n",
202 | "
\n",
203 | "\n",
204 | "
\n",
205 | "\n",
206 | "\n",
207 | "Some of the \"missing\" verbs in pandas are because there are other, different ways of achieving the same goal. For example `summarise` is spread across `mean`, `std`, etc. It's closest analog is actually the `.agg` method on a `GroupBy` object, as it reduces a DataFrame to a single row (per group). This isn't quite what `.describe` does.\n",
208 | "\n",
209 | "I've also included the `pipe` operator from R (`%>%`), the `pipe` method from pandas, even though it isn't quite a verb."
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "metadata": {},
215 | "source": [
216 | "# Filter rows with filter(), query()"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {
223 | "collapsed": false
224 | },
225 | "outputs": [],
226 | "source": [
227 | "# filter(flights, month == 1, day == 1)\n",
228 | "flights.query(\"month == 1 & day == 1\")"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "We see the first big *language* difference between R and python.\n",
236 | "Many python programmers will shun the R code as too magical.\n",
237 | "How is the programmer supposed to know that `month` and `day` are supposed to represent columns in the DataFrame?\n",
238 | "On the other hand, to emulate this *very* convenient feature of R, python has to write the expression as a string, and evaluate the string in the context of the DataFrame."
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {},
244 | "source": [
245 | "The more verbose version:"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": null,
251 | "metadata": {
252 | "collapsed": false
253 | },
254 | "outputs": [],
255 | "source": [
256 | "# flights[flights$month == 1 & flights$day == 1, ]\n",
257 | "flights[(flights.month == 1) & (flights.day == 1)]"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": null,
263 | "metadata": {
264 | "collapsed": false
265 | },
266 | "outputs": [],
267 | "source": [
268 | "# slice(flights, 1:10)\n",
269 | "flights.iloc[:9]"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "# Arrange rows with arrange(), sort()"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": null,
282 | "metadata": {
283 | "collapsed": false
284 | },
285 | "outputs": [],
286 | "source": [
287 | "# arrange(flights, year, month, day) \n",
288 | "flights.sort_values(['year', 'month', 'day'])"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": null,
294 | "metadata": {
295 | "collapsed": false
296 | },
297 | "outputs": [],
298 | "source": [
299 | "# arrange(flights, desc(arr_delay))\n",
300 | "flights.sort_values('arr_delay', ascending=False)"
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "metadata": {},
306 | "source": [
307 | "It's worth mentioning the other common sorting method for pandas DataFrames, `sort_index`. Pandas puts much more emphasis on indicies, (or row labels) than R.\n",
308 | "This is a design decision that has positives and negatives, which we won't go into here. Suffice to say that when you need to sort a `DataFrame` by the index, use `DataFrame.sort_index`."
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "metadata": {},
314 | "source": [
315 | "# Select columns with select(), []"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "metadata": {
322 | "collapsed": false
323 | },
324 | "outputs": [],
325 | "source": [
326 | "# select(flights, year, month, day) \n",
327 | "flights[['year', 'month', 'day']]"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": null,
333 | "metadata": {
334 | "collapsed": false
335 | },
336 | "outputs": [],
337 | "source": [
338 | "# select(flights, year:day) \n",
339 | "flights.loc[:, 'year':'day']"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "metadata": {
346 | "collapsed": true
347 | },
348 | "outputs": [],
349 | "source": [
350 | "# select(flights, -(year:day)) \n",
351 | "\n",
352 | "# No direct equivalent here. I would typically use\n",
353 | "# flights.drop(cols_to_drop, axis=1)\n",
354 | "# or fligths[flights.columns.difference(pd.Index(cols_to_drop))]\n",
355 | "# point to dplyr!"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": null,
361 | "metadata": {
362 | "collapsed": false
363 | },
364 | "outputs": [],
365 | "source": [
366 | "# select(flights, tail_num = tailnum)\n",
367 | "flights.rename(columns={'tailnum': 'tail_num'})['tail_num']"
368 | ]
369 | },
370 | {
371 | "cell_type": "markdown",
372 | "metadata": {},
373 | "source": [
374 | "But like Hadley mentions, not that useful since it only returns the one column. ``dplyr`` and ``pandas`` compare well here."
375 | ]
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": null,
380 | "metadata": {
381 | "collapsed": false
382 | },
383 | "outputs": [],
384 | "source": [
385 | "# rename(flights, tail_num = tailnum)\n",
386 | "flights.rename(columns={'tailnum': 'tail_num'})"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {},
392 | "source": [
393 | "Pandas is more verbose, but the the argument to `columns` can be any mapping. So it's often used with a function to perform a common task, say `df.rename(columns=lambda x: x.replace('-', '_'))` to replace any dashes with underscores. Also, ``rename`` (the pandas version) can be applied to the Index."
394 | ]
395 | },
396 | {
397 | "cell_type": "markdown",
398 | "metadata": {},
399 | "source": [
400 | "One more note on the differences here.\n",
401 | "Pandas could easily include a `.select` method.\n",
402 | "[`xray`](http://xray.readthedocs.org/en/stable/), a library that builds on top of NumPy and pandas to offer labeled N-dimensional arrays (along with many other things) does [just that](http://xray.readthedocs.org/en/stable/indexing.html#indexing-with-labeled-dimensions).\n",
403 | "Pandas chooses the `.loc` and `.iloc` accessors because *any valid selection is also a valid assignment*. This makes it easier to modify the data.\n",
404 | "\n",
405 | "```python\n",
406 | "flights.loc[:, 'year':'day'] = data\n",
407 | "```\n",
408 | "\n",
409 | "where `data` is an object that is, or can be broadcast to, the correct shape."
410 | ]
411 | },
412 | {
413 | "cell_type": "markdown",
414 | "metadata": {},
415 | "source": [
416 | "# Extract distinct (unique) rows "
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": null,
422 | "metadata": {
423 | "collapsed": false
424 | },
425 | "outputs": [],
426 | "source": [
427 | "# distinct(select(flights, tailnum))\n",
428 | "flights.tailnum.unique()"
429 | ]
430 | },
431 | {
432 | "cell_type": "markdown",
433 | "metadata": {},
434 | "source": [
435 | "FYI this returns a numpy array instead of a Series."
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": null,
441 | "metadata": {
442 | "collapsed": false
443 | },
444 | "outputs": [],
445 | "source": [
446 | "# distinct(select(flights, origin, dest))\n",
447 | "flights[['origin', 'dest']].drop_duplicates()"
448 | ]
449 | },
450 | {
451 | "cell_type": "markdown",
452 | "metadata": {},
453 | "source": [
454 | "OK, so ``dplyr`` wins there from a consistency point of view. ``unique`` is only defined on Series, not DataFrames."
455 | ]
456 | },
457 | {
458 | "cell_type": "markdown",
459 | "metadata": {},
460 | "source": [
461 | "# Add new columns with mutate() "
462 | ]
463 | },
464 | {
465 | "cell_type": "markdown",
466 | "metadata": {},
467 | "source": [
468 | "We at pandas shamelessly stole this for [v0.16.0](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#whatsnew-0160-enhancements-assign)."
469 | ]
470 | },
471 | {
472 | "cell_type": "code",
473 | "execution_count": null,
474 | "metadata": {
475 | "collapsed": false
476 | },
477 | "outputs": [],
478 | "source": [
479 | "# mutate(flights,\n",
480 | "# gain = arr_delay - dep_delay,\n",
481 | "# speed = distance / air_time * 60)\n",
482 | "\n",
483 | "flights.assign(gain=flights.arr_delay - flights.dep_delay,\n",
484 | " speed=flights.distance / flights.air_time * 60)"
485 | ]
486 | },
487 | {
488 | "cell_type": "code",
489 | "execution_count": null,
490 | "metadata": {
491 | "collapsed": false
492 | },
493 | "outputs": [],
494 | "source": [
495 | "# mutate(flights,\n",
496 | "# gain = arr_delay - dep_delay,\n",
497 | "# gain_per_hour = gain / (air_time / 60)\n",
498 | "# )\n",
499 | "\n",
500 | "(flights.assign(gain=flights.arr_delay - flights.dep_delay)\n",
501 | " .assign(gain_per_hour = lambda df: df.gain / (df.air_time / 60)))\n"
502 | ]
503 | },
504 | {
505 | "cell_type": "markdown",
506 | "metadata": {},
507 | "source": [
508 | "The first example is pretty much identical (aside from the names, `mutate` vs. `assign`).\n",
509 | "\n",
510 | "The second example just comes down to language differences. In `R`, it's possible to implement a function like `mutate` where you can refer to `gain` in the line calcuating `gain_per_hour`, even though `gain` hasn't actually been calcuated yet.\n",
511 | "\n",
512 | "In Python, you can have arbitrary keyword arguments to functions (which we needed for `.assign`), but the order of the argumnets is arbitrary since `dict`s are unsorted and `**kwargs*` is a `dict`. So you can't have something like `df.assign(x=df.a / df.b, y=x **2)`, because you don't know whether `x` or `y` will come first (you'd also get an error saying `x` is undefined.\n",
513 | "\n",
514 | "To work around that with pandas, you'll need to split up the assigns, and pass in a *callable* to the second assign. The callable looks at itself to find a column named `gain`. Since the line above returns a DataFrame with the `gain` column added, the pipeline goes through just fine."
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": null,
520 | "metadata": {
521 | "collapsed": false
522 | },
523 | "outputs": [],
524 | "source": [
525 | "# transmute(flights,\n",
526 | "# gain = arr_delay - dep_delay,\n",
527 | "# gain_per_hour = gain / (air_time / 60)\n",
528 | "# )\n",
529 | "(flights.assign(gain=flights.arr_delay - flights.dep_delay)\n",
530 | " .assign(gain_per_hour = lambda df: df.gain / (df.air_time / 60))\n",
531 | " [['gain', 'gain_per_hour']])\n"
532 | ]
533 | },
534 | {
535 | "cell_type": "markdown",
536 | "metadata": {},
537 | "source": [
538 | "# Summarise values with summarise()"
539 | ]
540 | },
541 | {
542 | "cell_type": "code",
543 | "execution_count": null,
544 | "metadata": {
545 | "collapsed": false
546 | },
547 | "outputs": [],
548 | "source": [
549 | "# summarise(flights,\n",
550 | "# delay = mean(dep_delay, na.rm = TRUE))\n",
551 | "flights.dep_delay.mean()"
552 | ]
553 | },
554 | {
555 | "cell_type": "markdown",
556 | "metadata": {},
557 | "source": [
558 | "This is only roughly equivalent.\n",
559 | "`summarise` takes a callable (e.g. `mean`, `sum`) and evaluates that on the DataFrame. In pandas these are spread across `pd.DataFrame.mean`, `pd.DataFrame.sum`. This will come up again when we look at `groupby`."
560 | ]
561 | },
562 | {
563 | "cell_type": "markdown",
564 | "metadata": {},
565 | "source": [
566 | "# Randomly sample rows with sample_n() and sample_frac()"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": null,
572 | "metadata": {
573 | "collapsed": false
574 | },
575 | "outputs": [],
576 | "source": [
577 | "# sample_n(flights, 10)\n",
578 | "flights.sample(n=10)"
579 | ]
580 | },
581 | {
582 | "cell_type": "code",
583 | "execution_count": null,
584 | "metadata": {
585 | "collapsed": false
586 | },
587 | "outputs": [],
588 | "source": [
589 | "# sample_frac(flights, 0.01)\n",
590 | "flights.sample(frac=.01)"
591 | ]
592 | },
593 | {
594 | "cell_type": "markdown",
595 | "metadata": {},
596 | "source": [
597 | "# Grouped operations "
598 | ]
599 | },
600 | {
601 | "cell_type": "code",
602 | "execution_count": null,
603 | "metadata": {
604 | "collapsed": false
605 | },
606 | "outputs": [],
607 | "source": [
608 | "# planes <- group_by(flights, tailnum)\n",
609 | "# delay <- summarise(planes,\n",
610 | "# count = n(),\n",
611 | "# dist = mean(distance, na.rm = TRUE),\n",
612 | "# delay = mean(arr_delay, na.rm = TRUE))\n",
613 | "# delay <- filter(delay, count > 20, dist < 2000)\n",
614 | "\n",
615 | "planes = flights.groupby(\"tailnum\")\n",
616 | "delay = (planes.agg({\"year\": \"count\",\n",
617 | " \"distance\": \"mean\",\n",
618 | " \"arr_delay\": \"mean\"})\n",
619 | " .rename(columns={\"distance\": \"dist\",\n",
620 | " \"arr_delay\": \"delay\",\n",
621 | " \"year\": \"count\"})\n",
622 | " .query(\"count > 20 & dist < 2000\"))\n",
623 | "delay"
624 | ]
625 | },
626 | {
627 | "cell_type": "markdown",
628 | "metadata": {},
629 | "source": [
630 | "For me, dplyr's ``n()`` looked is a bit starge at first, but it's already growing on me.\n",
631 | "\n",
632 | "I think pandas is more difficult for this particular example.\n",
633 | "There isn't as natural a way to mix column-agnostic aggregations (like ``count``) with column-specific aggregations like the other two. You end up writing could like `.agg{'year': 'count'}` which reads, \"I want the count of `year`\", even though you don't care about `year` specifically. You could just as easily have said `.agg('distance': 'count')`.\n",
634 | "Additionally assigning names can't be done as cleanly in pandas; you have to just follow it up with a ``rename`` like before."
635 | ]
636 | },
637 | {
638 | "cell_type": "markdown",
639 | "metadata": {},
640 | "source": [
641 | "We may as well reproduce the graph. It looks like `ggplots` `geom_smooth` is some kind of lowess smoother. We can either us [seaborn](http://stanford.edu/~mwaskom/software/seaborn/):"
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": null,
647 | "metadata": {
648 | "collapsed": false
649 | },
650 | "outputs": [],
651 | "source": [
652 | "fig, ax = plt.subplots(figsize=(12, 6))\n",
653 | "\n",
654 | "sns.regplot(\"dist\", \"delay\", data=delay, lowess=True, ax=ax,\n",
655 | " scatter_kws={'color': 'k', 'alpha': .5, 's': delay['count'] / 10}, ci=90,\n",
656 | " line_kws={'linewidth': 3});"
657 | ]
658 | },
659 | {
660 | "cell_type": "markdown",
661 | "metadata": {},
662 | "source": [
663 | "Or using statsmodels directly for more control over the lowess, with an extremely lazy\n",
664 | "\"confidence interval\"."
665 | ]
666 | },
667 | {
668 | "cell_type": "code",
669 | "execution_count": null,
670 | "metadata": {
671 | "collapsed": true
672 | },
673 | "outputs": [],
674 | "source": [
675 | "import statsmodels.api as sm"
676 | ]
677 | },
678 | {
679 | "cell_type": "code",
680 | "execution_count": null,
681 | "metadata": {
682 | "collapsed": false
683 | },
684 | "outputs": [],
685 | "source": [
686 | "smooth = sm.nonparametric.lowess(delay.delay, delay.dist, frac=1/8)\n",
687 | "ax = delay.plot(kind='scatter', x='dist', y = 'delay', figsize=(12, 6),\n",
688 | " color='k', alpha=.5, s=delay['count'] / 10)\n",
689 | "ax.plot(smooth[:, 0], smooth[:, 1], linewidth=3);\n",
690 | "std = smooth[:, 1].std()\n",
691 | "ax.fill_between(smooth[:, 0], smooth[:, 1] - std, smooth[:, 1] + std, alpha=.25);"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": null,
697 | "metadata": {
698 | "collapsed": false
699 | },
700 | "outputs": [],
701 | "source": [
702 | "# destinations <- group_by(flights, dest)\n",
703 | "# summarise(destinations,\n",
704 | "# planes = n_distinct(tailnum),\n",
705 | "# flights = n()\n",
706 | "# )\n",
707 | "\n",
708 | "destinations = flights.groupby('dest')\n",
709 | "destinations.agg({\n",
710 | " 'tailnum': lambda x: len(x.unique()),\n",
711 | " 'year': 'count'\n",
712 | " }).rename(columns={'tailnum': 'planes',\n",
713 | " 'year': 'flights'})"
714 | ]
715 | },
716 | {
717 | "cell_type": "markdown",
718 | "metadata": {},
719 | "source": [
720 | "There's a little know feature to `groupby.agg`: it accepts a dict of dicts mapping\n",
721 | "columns to `{name: aggfunc}` pairs. Here's the result:"
722 | ]
723 | },
724 | {
725 | "cell_type": "code",
726 | "execution_count": null,
727 | "metadata": {
728 | "collapsed": false
729 | },
730 | "outputs": [],
731 | "source": [
732 | "destinations = flights.groupby('dest')\n",
733 | "r = destinations.agg({'tailnum': {'planes': lambda x: len(x.unique())},\n",
734 | " 'year': {'flights': 'count'}})\n",
735 | "r"
736 | ]
737 | },
738 | {
739 | "cell_type": "markdown",
740 | "metadata": {},
741 | "source": [
742 | "The result is a `MultiIndex` in the columns which can be a bit awkard to work with (you can drop a level with `r.columns.droplevel()`). Also the syntax going into the `.agg` may not be the clearest."
743 | ]
744 | },
745 | {
746 | "cell_type": "markdown",
747 | "metadata": {},
748 | "source": [
749 | "Similar to how ``dplyr`` provides optimized C++ versions of most of the `summarise` functions, pandas uses [cython](http://cython.org) optimized versions for most of the `agg` methods."
750 | ]
751 | },
752 | {
753 | "cell_type": "code",
754 | "execution_count": null,
755 | "metadata": {
756 | "collapsed": false
757 | },
758 | "outputs": [],
759 | "source": [
760 | "# daily <- group_by(flights, year, month, day)\n",
761 | "# (per_day <- summarise(daily, flights = n()))\n",
762 | "\n",
763 | "daily = flights.groupby(['year', 'month', 'day'])\n",
764 | "per_day = daily['distance'].count()\n",
765 | "per_day"
766 | ]
767 | },
768 | {
769 | "cell_type": "code",
770 | "execution_count": null,
771 | "metadata": {
772 | "collapsed": false
773 | },
774 | "outputs": [],
775 | "source": [
776 | "# (per_month <- summarise(per_day, flights = sum(flights)))\n",
777 | "per_month = per_day.groupby(level=['year', 'month']).sum()\n",
778 | "per_month"
779 | ]
780 | },
781 | {
782 | "cell_type": "code",
783 | "execution_count": null,
784 | "metadata": {
785 | "collapsed": false
786 | },
787 | "outputs": [],
788 | "source": [
789 | "# (per_year <- summarise(per_month, flights = sum(flights)))\n",
790 | "per_year = per_month.sum()\n",
791 | "per_year"
792 | ]
793 | },
794 | {
795 | "cell_type": "markdown",
796 | "metadata": {},
797 | "source": [
798 | "I'm not sure how ``dplyr`` is handling the other columns, like `year`, in the last example. With pandas, it's clear that we're grouping by them since they're included in the groupby. For the last example, we didn't group by anything, so they aren't included in the result."
799 | ]
800 | },
801 | {
802 | "cell_type": "markdown",
803 | "metadata": {},
804 | "source": [
805 | "# Chaining"
806 | ]
807 | },
808 | {
809 | "cell_type": "markdown",
810 | "metadata": {},
811 | "source": [
812 | "Any follower of Hadley's [twitter account](https://twitter.com/hadleywickham/) will know how much R users *love* the ``%>%`` (pipe) operator. And for good reason!"
813 | ]
814 | },
815 | {
816 | "cell_type": "code",
817 | "execution_count": null,
818 | "metadata": {
819 | "collapsed": false
820 | },
821 | "outputs": [],
822 | "source": [
823 | "# flights %>%\n",
824 | "# group_by(year, month, day) %>%\n",
825 | "# select(arr_delay, dep_delay) %>%\n",
826 | "# summarise(\n",
827 | "# arr = mean(arr_delay, na.rm = TRUE),\n",
828 | "# dep = mean(dep_delay, na.rm = TRUE)\n",
829 | "# ) %>%\n",
830 | "# filter(arr > 30 | dep > 30)\n",
831 | "(\n",
832 | "flights.groupby(['year', 'month', 'day'])\n",
833 | " [['arr_delay', 'dep_delay']]\n",
834 | " .mean()\n",
835 | " .query('arr_delay > 30 | dep_delay > 30')\n",
836 | ")"
837 | ]
838 | },
839 | {
840 | "cell_type": "markdown",
841 | "metadata": {},
842 | "source": [
843 | "A bit of soapboxing here if you'll indulge me.\n",
844 | "\n",
845 | "The example above is a bit contrived since it only uses methods on `DataFrame`. But what if you have some function to work into your pipeline that pandas hasn't (or won't) implement? In that case you're required to break up your pipeline by assigning your intermediate (probably uninteresting) DataFrame to a temporary variable you don't actually care about.\n",
846 | "\n",
847 | "`R` doesn't have this problem since the `%>%` operator works with any function that takes (and maybe returns) DataFrames.\n",
848 | "The python language doesn't have any notion of right to left function application (other than special cases like `__radd__` and `__rmul__`).\n",
849 | "It only allows the usual left to right `function(arguments)`, where you can think of the `()` as the \"call this function\" operator.\n",
850 | "\n",
851 | "Pandas wanted something like `%>%` and we did it in a farily pythonic way. The `pd.DataFrame.pipe` method takes a function and optionally some arguments, and calls that function with `self` (the DataFrame) as the first argument.\n",
852 | "\n",
853 | "So\n",
854 | "\n",
855 | "```R\n",
856 | "flights >%> my_function(my_argument=10)\n",
857 | "```\n",
858 | "\n",
859 | "becomes\n",
860 | "\n",
861 | "```python\n",
862 | "flights.pipe(my_function, my_argument=10)\n",
863 | "```\n",
864 | "\n",
865 | "We initially had grander visions for `.pipe`, but the wider python community didn't seem that interested."
866 | ]
867 | },
868 | {
869 | "cell_type": "markdown",
870 | "metadata": {},
871 | "source": [
872 | "# Other Data Sources"
873 | ]
874 | },
875 | {
876 | "cell_type": "markdown",
877 | "metadata": {},
878 | "source": [
879 | "Pandas has tons [IO tools](http://pandas.pydata.org/pandas-docs/version/0.15.0/io.html) to help you get data in and out, including SQL databases via [SQLAlchemy](http://www.sqlalchemy.org)."
880 | ]
881 | },
882 | {
883 | "cell_type": "markdown",
884 | "metadata": {},
885 | "source": [
886 | "# Summary"
887 | ]
888 | },
889 | {
890 | "cell_type": "markdown",
891 | "metadata": {},
892 | "source": [
893 | "I think pandas held up pretty well, considering this was a vignette written for dplyr. I found the degree of similarity more interesting than the differences. The most difficult task was renaming of columns within an operation; they had to be followed up with a call to ``rename`` *after* the operation, which isn't that burdensome honestly.\n",
894 | "\n",
895 | "More and more it looks like we're moving towards future where being a language or package partisan just doesn't make sense. Not when you can load up a [Jupyter](http://jupyter.org) (formerly IPython) notebook to call up a library written in R, and hand those results off to python or Julia or whatever for followup, before going back to R to make a cool [shiny](http://shiny.rstudio.com) web app.\n",
896 | "\n",
897 | "There will always be a place for your \"utility belt\" package like dplyr or pandas, but it wouldn't hurt to be familiar with both.\n",
898 | "\n",
899 | "If you want to contribute to pandas, we're always looking for help at https://github.com/pydata/pandas/.\n",
900 | "You can get ahold of me directly on [twitter](https://twitter.com/tomaugspurger)."
901 | ]
902 | }
903 | ],
904 | "metadata": {
905 | "kernelspec": {
906 | "display_name": "Python 3",
907 | "language": "python",
908 | "name": "python3"
909 | },
910 | "language_info": {
911 | "codemirror_mode": {
912 | "name": "ipython",
913 | "version": 3
914 | },
915 | "file_extension": ".py",
916 | "mimetype": "text/x-python",
917 | "name": "python",
918 | "nbconvert_exporter": "python",
919 | "pygments_lexer": "ipython3",
920 | "version": "3.4.4"
921 | }
922 | },
923 | "nbformat": 4,
924 | "nbformat_minor": 0
925 | }
926 |
--------------------------------------------------------------------------------