├── .gitignore
├── LICENSE
├── README.md
├── appendixC.ipynb
├── ch02.ipynb
├── ch03.ipynb
├── ch04.ipynb
├── ch05.ipynb
├── ch06.ipynb
├── ch07.ipynb
├── ch08.ipynb
├── ch09.ipynb
├── ch11.ipynb
├── ch12.ipynb
├── conda
├── xl310.yml
└── xl38.yml
├── csv
├── AAPL.csv
├── AMZN.csv
├── GOOGL.csv
└── MSFT.csv
├── debugging.py
├── environment.yml
├── excel.py
├── images
├── cover.png
├── python.bmp
└── python.png
├── packagetracker
├── database.py
├── packagetracker.db
├── packagetracker.py
└── packagetracker.xlsm
├── parallel_openpyxl.py
├── parallel_pandas.py
├── parallel_xlrd.py
├── pep8_sample.py
├── requirements.txt
├── sales_data
├── existing
│ ├── April.xls
│ ├── August.xls
│ ├── December.xls
│ ├── February.xls
│ ├── January.xls
│ ├── July.xls
│ ├── June.xls
│ ├── March.xls
│ ├── May.xls
│ ├── November.xls
│ ├── October.xls
│ └── September.xls
└── new
│ ├── April.xlsx
│ ├── August.xlsx
│ ├── December.xlsx
│ ├── February.xlsx
│ ├── January.xlsx
│ ├── July.xlsx
│ ├── June.xlsx
│ ├── March.xlsx
│ ├── May.xlsx
│ ├── November.xlsx
│ ├── October.xlsx
│ └── September.xlsx
├── sales_report_openpyxl.py
├── sales_report_pandas.py
├── sales_report_xlsxwriter.py
├── sales_report_xlwings.py
├── temperature.py
├── udfs
├── describe
│ ├── describe.py
│ └── describe.xlsm
├── first_udf
│ ├── first_udf.py
│ └── first_udf.xlsm
├── google_trends
│ ├── google_trends.py
│ └── google_trends.xlsm
├── google_trends_cache
│ ├── google_trends_cache.py
│ └── google_trends_cache.xlsm
├── importsub
│ ├── importsub.py
│ └── importsub.xlsm
├── raw_values
│ ├── raw_values.py
│ └── raw_values.xlsm
└── revenues
│ ├── revenues.py
│ └── revenues.xlsm
└── xl
├── array_calculations.xlsx
├── big.xlsx
├── course_participants.xlsx
├── currency_converter.xlsx
├── macro.xlsm
├── sales_report_template.xlsx
├── stores.xls
├── stores.xlsb
├── stores.xlsx
├── vba.xlsm
└── vbaProject.bin
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | .DS_Store
3 | ~$*.xls*
4 | *.pyc
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Zoomer Analytics GmbH
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python for Excel (O'Reilly, 2021)
2 |
3 |
4 |
5 | This is the companion repository for the O'Reilly book [Python for Excel](https://learning.oreilly.com/library/view/python-for-excel/9781492080992/).
6 |
7 | All notebooks can be run in the cloud except `ch09.ipynb` (requires a local installation of Excel):
8 | [](https://mybinder.org/v2/gh/fzumstein/python-for-excel/1st-edition?urlpath=tree)
9 |
--------------------------------------------------------------------------------
/appendixC.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Appendix C\n",
8 | "## Classes and Objects"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {
15 | "pycharm": {
16 | "name": "#%%\n"
17 | }
18 | },
19 | "outputs": [],
20 | "source": [
21 | "class Car:\n",
22 | " def __init__(self, color, speed=0):\n",
23 | " self.color = color\n",
24 | " self.speed = speed\n",
25 | "\n",
26 | " def accelerate(self, mph):\n",
27 | " self.speed += mph"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {
34 | "pycharm": {
35 | "name": "#%%\n"
36 | }
37 | },
38 | "outputs": [],
39 | "source": [
40 | "# Let's instantiate two car objects\n",
41 | "car1 = Car(\"red\")\n",
42 | "car2 = Car(color=\"blue\")"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {
49 | "pycharm": {
50 | "name": "#%%\n"
51 | }
52 | },
53 | "outputs": [],
54 | "source": [
55 | "# By default, an object prints its memory location\n",
56 | "car1"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {
63 | "pycharm": {
64 | "name": "#%%\n"
65 | }
66 | },
67 | "outputs": [],
68 | "source": [
69 | "# Attributes give you access to the data of an object\n",
70 | "car1.color"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {
77 | "pycharm": {
78 | "name": "#%%\n"
79 | }
80 | },
81 | "outputs": [],
82 | "source": [
83 | "car1.speed"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "pycharm": {
91 | "name": "#%%\n"
92 | }
93 | },
94 | "outputs": [],
95 | "source": [
96 | "# Calling the accelerate method on car1\n",
97 | "car1.accelerate(20)"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {
104 | "pycharm": {
105 | "name": "#%%\n"
106 | }
107 | },
108 | "outputs": [],
109 | "source": [
110 | "# The speed attribute of car1 changed\n",
111 | "car1.speed"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {
118 | "pycharm": {
119 | "name": "#%%\n"
120 | }
121 | },
122 | "outputs": [],
123 | "source": [
124 | "# The speed attribute of car2 remained the same\n",
125 | "car2.speed"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "pycharm": {
133 | "name": "#%%\n"
134 | }
135 | },
136 | "outputs": [],
137 | "source": [
138 | "car1.color = \"green\""
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {
145 | "pycharm": {
146 | "name": "#%%\n"
147 | }
148 | },
149 | "outputs": [],
150 | "source": [
151 | "car1.color"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "metadata": {
158 | "pycharm": {
159 | "name": "#%%\n"
160 | }
161 | },
162 | "outputs": [],
163 | "source": [
164 | "car2.color # unchanged"
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "metadata": {},
170 | "source": [
171 | "## Working with time-zone-aware datetime objects"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "metadata": {
178 | "pycharm": {
179 | "name": "#%%\n"
180 | }
181 | },
182 | "outputs": [],
183 | "source": [
184 | "import datetime as dt\n",
185 | "from dateutil import tz"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {
192 | "pycharm": {
193 | "name": "#%%\n"
194 | }
195 | },
196 | "outputs": [],
197 | "source": [
198 | "# Time-zone-naive datetime object\n",
199 | "timestamp = dt.datetime(2020, 1, 31, 14, 30)\n",
200 | "timestamp.isoformat()"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {
207 | "pycharm": {
208 | "name": "#%%\n"
209 | }
210 | },
211 | "outputs": [],
212 | "source": [
213 | "# Time-zone-aware datetime object\n",
214 | "timestamp_eastern = dt.datetime(2020, 1, 31, 14, 30,\n",
215 | " tzinfo=tz.gettz(\"US/Eastern\"))\n",
216 | "# Printing in isoformat makes it easy to\n",
217 | "# see the offset from UTC\n",
218 | "timestamp_eastern.isoformat()"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {
225 | "pycharm": {
226 | "name": "#%%\n"
227 | }
228 | },
229 | "outputs": [],
230 | "source": [
231 | "# Assign a time zone to a naive datetime object\n",
232 | "timestamp_eastern = timestamp.replace(tzinfo=tz.gettz(\"US/Eastern\"))\n",
233 | "timestamp_eastern.isoformat()"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {
240 | "pycharm": {
241 | "name": "#%%\n"
242 | }
243 | },
244 | "outputs": [],
245 | "source": [
246 | "# Convert from one time zone to another.\n",
247 | "# Since the UTC time zone is so common,\n",
248 | "# there is a shortcut: tz.UTC\n",
249 | "timestamp_utc = timestamp_eastern.astimezone(tz.UTC)\n",
250 | "timestamp_utc.isoformat()"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "metadata": {
257 | "pycharm": {
258 | "name": "#%%\n"
259 | }
260 | },
261 | "outputs": [],
262 | "source": [
263 | "# From time-zone-aware to naive\n",
264 | "timestamp_eastern.replace(tzinfo=None)"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "execution_count": null,
270 | "metadata": {
271 | "pycharm": {
272 | "name": "#%%\n"
273 | }
274 | },
275 | "outputs": [],
276 | "source": [
277 | "# Current time without time zone\n",
278 | "dt.datetime.now()"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": null,
284 | "metadata": {
285 | "pycharm": {
286 | "name": "#%%\n"
287 | }
288 | },
289 | "outputs": [],
290 | "source": [
291 | "# Current time in UTC time zone\n",
292 | "dt.datetime.now(tz.UTC)"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "## Mutable vs. Immutable Objects"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": null,
305 | "metadata": {
306 | "pycharm": {
307 | "name": "#%%\n"
308 | }
309 | },
310 | "outputs": [],
311 | "source": [
312 | "a = [1, 2, 3]\n",
313 | "b = a\n",
314 | "a[1] = 22\n",
315 | "print(a)\n",
316 | "print(b)"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "metadata": {
323 | "pycharm": {
324 | "name": "#%%\n"
325 | }
326 | },
327 | "outputs": [],
328 | "source": [
329 | "a = [1, 2, 3]\n",
330 | "b = a.copy()"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": null,
336 | "metadata": {
337 | "pycharm": {
338 | "name": "#%%\n"
339 | }
340 | },
341 | "outputs": [],
342 | "source": [
343 | "a"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": null,
349 | "metadata": {
350 | "pycharm": {
351 | "name": "#%%\n"
352 | }
353 | },
354 | "outputs": [],
355 | "source": [
356 | "b"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": null,
362 | "metadata": {
363 | "pycharm": {
364 | "name": "#%%\n"
365 | }
366 | },
367 | "outputs": [],
368 | "source": [
369 | "a[1] = 22 # Changing \"a\"..."
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": null,
375 | "metadata": {
376 | "pycharm": {
377 | "name": "#%%\n"
378 | }
379 | },
380 | "outputs": [],
381 | "source": [
382 | "a"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": null,
388 | "metadata": {
389 | "pycharm": {
390 | "name": "#%%\n"
391 | }
392 | },
393 | "outputs": [],
394 | "source": [
395 | "b # ...doesn't affect \"b\""
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": null,
401 | "metadata": {
402 | "pycharm": {
403 | "name": "#%%\n"
404 | }
405 | },
406 | "outputs": [],
407 | "source": [
408 | "import copy\n",
409 | "b = copy.deepcopy(a)"
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "execution_count": null,
415 | "metadata": {
416 | "pycharm": {
417 | "name": "#%%\n"
418 | }
419 | },
420 | "outputs": [],
421 | "source": [
422 | "def increment(x):\n",
423 | " x = x + 1\n",
424 | " return x"
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": null,
430 | "metadata": {
431 | "pycharm": {
432 | "name": "#%%\n"
433 | }
434 | },
435 | "outputs": [],
436 | "source": [
437 | "a = 1\n",
438 | "print(increment(a))\n",
439 | "print(a)"
440 | ]
441 | },
442 | {
443 | "cell_type": "code",
444 | "execution_count": null,
445 | "metadata": {
446 | "pycharm": {
447 | "name": "#%%\n"
448 | }
449 | },
450 | "outputs": [],
451 | "source": [
452 | "def increment(x):\n",
453 | " x[0] = x[0] + 1\n",
454 | " return x"
455 | ]
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": null,
460 | "metadata": {
461 | "pycharm": {
462 | "name": "#%%\n"
463 | }
464 | },
465 | "outputs": [],
466 | "source": [
467 | "a = [1]\n",
468 | "print(increment(a))\n",
469 | "print(a)"
470 | ]
471 | },
472 | {
473 | "cell_type": "code",
474 | "execution_count": null,
475 | "metadata": {
476 | "pycharm": {
477 | "name": "#%%\n"
478 | }
479 | },
480 | "outputs": [],
481 | "source": [
482 | "a = [1]\n",
483 | "print(increment(a.copy()))\n",
484 | "print(a)"
485 | ]
486 | },
487 | {
488 | "cell_type": "code",
489 | "execution_count": null,
490 | "metadata": {
491 | "pycharm": {
492 | "name": "#%%\n"
493 | }
494 | },
495 | "outputs": [],
496 | "source": [
497 | "# Don't do this:\n",
498 | "def add_one(x=[]):\n",
499 | " x.append(1)\n",
500 | " return x"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": null,
506 | "metadata": {
507 | "pycharm": {
508 | "name": "#%%\n"
509 | }
510 | },
511 | "outputs": [],
512 | "source": [
513 | "add_one()"
514 | ]
515 | },
516 | {
517 | "cell_type": "code",
518 | "execution_count": null,
519 | "metadata": {
520 | "pycharm": {
521 | "name": "#%%\n"
522 | }
523 | },
524 | "outputs": [],
525 | "source": [
526 | "add_one()"
527 | ]
528 | },
529 | {
530 | "cell_type": "code",
531 | "execution_count": null,
532 | "metadata": {
533 | "pycharm": {
534 | "name": "#%%\n"
535 | }
536 | },
537 | "outputs": [],
538 | "source": [
539 | "def add_one(x=None):\n",
540 | " if x is None:\n",
541 | " x = []\n",
542 | " x.append(1)\n",
543 | " return x"
544 | ]
545 | },
546 | {
547 | "cell_type": "code",
548 | "execution_count": null,
549 | "metadata": {
550 | "pycharm": {
551 | "name": "#%%\n"
552 | }
553 | },
554 | "outputs": [],
555 | "source": [
556 | "add_one()"
557 | ]
558 | },
559 | {
560 | "cell_type": "code",
561 | "execution_count": null,
562 | "metadata": {
563 | "pycharm": {
564 | "name": "#%%\n"
565 | }
566 | },
567 | "outputs": [],
568 | "source": [
569 | "add_one()"
570 | ]
571 | }
572 | ],
573 | "metadata": {
574 | "kernelspec": {
575 | "display_name": "Python 3",
576 | "language": "python",
577 | "name": "python3"
578 | },
579 | "language_info": {
580 | "codemirror_mode": {
581 | "name": "ipython",
582 | "version": 3
583 | },
584 | "file_extension": ".py",
585 | "mimetype": "text/x-python",
586 | "name": "python",
587 | "nbconvert_exporter": "python",
588 | "pygments_lexer": "ipython3",
589 | "version": "3.7.4"
590 | }
591 | },
592 | "nbformat": 4,
593 | "nbformat_minor": 4
594 | }
595 |
--------------------------------------------------------------------------------
/ch02.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "3 + 4"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# This is a first-level heading\n",
17 | "\n",
18 | "## This is a second-level heading\n",
19 | "\n",
20 | "You can make your text *italic* or **bold** or `monospaced`.\n",
21 | "\n",
22 | "* This is a bullet point\n",
23 | "* This is another bullet point"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "## Run Order Matters"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "a = 1"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "a"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "a = 2"
58 | ]
59 | }
60 | ],
61 | "metadata": {
62 | "kernelspec": {
63 | "display_name": "Python 3",
64 | "language": "python",
65 | "name": "python3"
66 | },
67 | "language_info": {
68 | "codemirror_mode": {
69 | "name": "ipython",
70 | "version": 3
71 | },
72 | "file_extension": ".py",
73 | "mimetype": "text/x-python",
74 | "name": "python",
75 | "nbconvert_exporter": "python",
76 | "pygments_lexer": "ipython3",
77 | "version": "3.7.4"
78 | }
79 | },
80 | "nbformat": 4,
81 | "nbformat_minor": 4
82 | }
83 |
--------------------------------------------------------------------------------
/ch04.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Foundations: NumPy\n",
8 | "## NumPy Array"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "matrix = [[1, 2, 3],\n",
18 | " [4, 5, 6],\n",
19 | " [7, 8, 9]]"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "[[i + 1 for i in row] for row in matrix]"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": null,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# First, let's import NumPy\n",
38 | "import numpy as np"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "# Constructing an array with a simple list results in a 1d array\n",
48 | "array1 = np.array([10, 100, 1000.])"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "# Constructing an array with a nested list results in a 2d array\n",
58 | "array2 = np.array([[1., 2., 3.],\n",
59 | " [4., 5., 6.]])"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "array1.dtype"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "float(array1[0])"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "## Vectorization and Broadcasting"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "metadata": {},
91 | "outputs": [],
92 | "source": [
93 | "array2 + 1"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {},
100 | "outputs": [],
101 | "source": [
102 | "array2 * array2"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": null,
108 | "metadata": {},
109 | "outputs": [],
110 | "source": [
111 | "array2 * array1"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "array2 @ array2.T # array2.T is a shortcut for array2.transpose()"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "## Universal Functions (ufunc)"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "import math"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {},
143 | "outputs": [],
144 | "source": [
145 | "math.sqrt(array2) # This will raise en Error"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": null,
151 | "metadata": {},
152 | "outputs": [],
153 | "source": [
154 | "np.array([[math.sqrt(i) for i in row] for row in array2])"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {},
161 | "outputs": [],
162 | "source": [
163 | "np.sqrt(array2)"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {},
170 | "outputs": [],
171 | "source": [
172 | "array2.sum(axis=0) # Returns a 1d array"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "array2.sum()"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "## Getting and Setting Array Elements"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "array1[2] # Returns a scalar"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": [
206 | "array2[0, 0] # Returns a scalar"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {},
213 | "outputs": [],
214 | "source": [
215 | "array2[:, 1:] # Returns a 2d array"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": null,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "array2[:, 1] # Returns a 1d array"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": null,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "array2[1, :2] # Returns a 1d array"
234 | ]
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {},
239 | "source": [
240 | "## Useful Array Constructors"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": null,
246 | "metadata": {},
247 | "outputs": [],
248 | "source": [
249 | "np.arange(2 * 5).reshape(2, 5) # 2 rows, 5 columns"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {},
256 | "outputs": [],
257 | "source": [
258 | "np.random.randn(2, 3) # 2 rows, 3 columns"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "## View vs. Copy"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": null,
271 | "metadata": {},
272 | "outputs": [],
273 | "source": [
274 | "array2"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": null,
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "subset = array2[:, :2]\n",
284 | "subset"
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": null,
290 | "metadata": {},
291 | "outputs": [],
292 | "source": [
293 | "subset[0, 0] = 1000"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": null,
299 | "metadata": {},
300 | "outputs": [],
301 | "source": [
302 | "subset"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": null,
308 | "metadata": {},
309 | "outputs": [],
310 | "source": [
311 | "array2"
312 | ]
313 | }
314 | ],
315 | "metadata": {
316 | "kernelspec": {
317 | "display_name": "Python 3",
318 | "language": "python",
319 | "name": "python3"
320 | },
321 | "language_info": {
322 | "codemirror_mode": {
323 | "name": "ipython",
324 | "version": 3
325 | },
326 | "file_extension": ".py",
327 | "mimetype": "text/x-python",
328 | "name": "python",
329 | "nbconvert_exporter": "python",
330 | "pygments_lexer": "ipython3",
331 | "version": "3.7.4"
332 | }
333 | },
334 | "nbformat": 4,
335 | "nbformat_minor": 4
336 | }
337 |
--------------------------------------------------------------------------------
/ch05.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# DataFrame and Series"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "pd.read_excel(\"xl/course_participants.xlsx\")"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "data = [[\"Mark\", 55, \"Italy\", 4.5, \"Europe\"],\n",
35 | " [\"John\", 33, \"USA\", 6.7, \"America\"],\n",
36 | " [\"Tim\", 41, \"USA\", 3.9, \"America\"],\n",
37 | " [\"Jenny\", 12, \"Germany\", 9.0, \"Europe\"]]\n",
38 | "df = pd.DataFrame(data=data,\n",
39 | " columns=[\"name\", \"age\", \"country\",\n",
40 | " \"score\", \"continent\"],\n",
41 | " index=[1001, 1000, 1002, 1003])\n",
42 | "df"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "df.info()"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "## Index"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "df.index"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "df.index.name = \"user_id\"\n",
77 | "df"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "# \"reset_index\" turns the index into a column, replacing the\n",
87 | "# index with the default index. This corresponds to the DataFrame\n",
88 | "# from the beginning that we loaded from Excel.\n",
89 | "df.reset_index()"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {},
96 | "outputs": [],
97 | "source": [
98 | "# \"reset_index\" turns \"user_id\" into a regular column and\n",
99 | "# \"set_index\" turns the column \"name\" into the index\n",
100 | "df.reset_index().set_index(\"name\")"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "metadata": {},
107 | "outputs": [],
108 | "source": [
109 | "df.reindex([999, 1000, 1001, 1004])"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "df.sort_index()"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "df.sort_values([\"continent\", \"age\"])"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "## Columns"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {},
141 | "outputs": [],
142 | "source": [
143 | "df.columns"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "df.columns.name = \"properties\"\n",
153 | "df"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": null,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "df.rename(columns={\"name\": \"First Name\", \"age\": \"Age\"})"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": null,
168 | "metadata": {},
169 | "outputs": [],
170 | "source": [
171 | "df.drop(columns=[\"name\", \"country\"],\n",
172 | " index=[1000, 1003])"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "df.T # Shortcut for df.transpose()"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": [
190 | "df.loc[:, [\"continent\", \"country\", \"name\", \"age\", \"score\"]]"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "# Data Manipulation\n",
198 | "## Selecting Data"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "# Using scalars for both row and column selection returns a scalar\n",
208 | "df.loc[1001, \"name\"]"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {},
215 | "outputs": [],
216 | "source": [
217 | "# Using a scalar on either the row or column selection returns a Series\n",
218 | "df.loc[[1001, 1002], \"age\"]"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {},
225 | "outputs": [],
226 | "source": [
227 | "# Selecting multiple rows and columns returns a DataFrame\n",
228 | "df.loc[:1002, [\"name\", \"country\"]]"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "df.iloc[0, 0] # Returns a Scalar"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": null,
243 | "metadata": {},
244 | "outputs": [],
245 | "source": [
246 | "df.iloc[[0, 2], 1] # Returns a Series"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": null,
252 | "metadata": {},
253 | "outputs": [],
254 | "source": [
255 | "df.iloc[:3, [0, 2]] # Returns a DataFrame"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "metadata": {},
262 | "outputs": [],
263 | "source": [
264 | "tf = (df[\"age\"] > 40) & (df[\"country\"] == \"USA\")\n",
265 | "tf # This is a Series with only True/False"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": null,
271 | "metadata": {},
272 | "outputs": [],
273 | "source": [
274 | "df.loc[tf, :]"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": null,
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "df.loc[df.index > 1001, :]"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": null,
289 | "metadata": {},
290 | "outputs": [],
291 | "source": [
292 | "df.loc[df[\"country\"].isin([\"Italy\", \"Germany\"]), :]"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "metadata": {},
299 | "outputs": [],
300 | "source": [
301 | "# This could be the yearly rainfall in millimeters\n",
302 | "rainfall = pd.DataFrame(data={\"City 1\": [300.1, 100.2],\n",
303 | " \"City 2\": [400.3, 300.4],\n",
304 | " \"City 3\": [1000.5, 1100.6]})\n",
305 | "rainfall"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": null,
311 | "metadata": {},
312 | "outputs": [],
313 | "source": [
314 | "rainfall < 400"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": null,
320 | "metadata": {},
321 | "outputs": [],
322 | "source": [
323 | "rainfall[rainfall < 400]"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": null,
329 | "metadata": {},
330 | "outputs": [],
331 | "source": [
332 | "# A MultiIndex needs to be sorted\n",
333 | "df_multi = df.reset_index().set_index([\"continent\", \"country\"])\n",
334 | "df_multi = df_multi.sort_index()\n",
335 | "df_multi"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": null,
341 | "metadata": {},
342 | "outputs": [],
343 | "source": [
344 | "df_multi.loc[\"Europe\", :]"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": null,
350 | "metadata": {},
351 | "outputs": [],
352 | "source": [
353 | "df_multi.loc[(\"Europe\", \"Italy\"), :]"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": null,
359 | "metadata": {},
360 | "outputs": [],
361 | "source": [
362 | "df_multi.reset_index(level=0)"
363 | ]
364 | },
365 | {
366 | "cell_type": "markdown",
367 | "metadata": {},
368 | "source": [
369 | "## Setting Data"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": null,
375 | "metadata": {},
376 | "outputs": [],
377 | "source": [
378 | "# Copy the DataFrame first to leave the original untouched\n",
379 | "df2 = df.copy()"
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "execution_count": null,
385 | "metadata": {},
386 | "outputs": [],
387 | "source": [
388 | "df2.loc[1000, \"name\"] = \"JOHN\"\n",
389 | "df2"
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": null,
395 | "metadata": {},
396 | "outputs": [],
397 | "source": [
398 | "df2.loc[[1000, 1001], \"score\"] = [3, 4]\n",
399 | "df2"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": null,
405 | "metadata": {},
406 | "outputs": [],
407 | "source": [
408 | "tf = (df2[\"age\"] < 20) | (df2[\"country\"] == \"USA\")\n",
409 | "df2.loc[tf, \"name\"] = \"xxx\"\n",
410 | "df2"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": null,
416 | "metadata": {},
417 | "outputs": [],
418 | "source": [
419 | "# Copy the DataFrame first to leave the original untouched\n",
420 | "rainfall2 = rainfall.copy()\n",
421 | "rainfall2"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": null,
427 | "metadata": {},
428 | "outputs": [],
429 | "source": [
430 | "# Set the values to 0 wherever they are below 400\n",
431 | "rainfall2[rainfall2 < 400] = 0\n",
432 | "rainfall2"
433 | ]
434 | },
435 | {
436 | "cell_type": "code",
437 | "execution_count": null,
438 | "metadata": {},
439 | "outputs": [],
440 | "source": [
441 | "df2.replace(\"USA\", \"U.S.\")"
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": null,
447 | "metadata": {},
448 | "outputs": [],
449 | "source": [
450 | "df2.loc[:, \"discount\"] = 0\n",
451 | "df2.loc[:, \"price\"] = [49.9, 49.9, 99.9, 99.9]\n",
452 | "df2"
453 | ]
454 | },
455 | {
456 | "cell_type": "code",
457 | "execution_count": null,
458 | "metadata": {},
459 | "outputs": [],
460 | "source": [
461 | "df2 = df.copy() # Let's start with a fresh copy\n",
462 | "df2.loc[:, \"birth year\"] = 2021 - df2[\"age\"]\n",
463 | "df2"
464 | ]
465 | },
466 | {
467 | "cell_type": "markdown",
468 | "metadata": {},
469 | "source": [
470 | "## Missing Data"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": null,
476 | "metadata": {},
477 | "outputs": [],
478 | "source": [
479 | "df2 = df.copy() # Let's start with a fresh copy\n",
480 | "df2.loc[1000, \"score\"] = None\n",
481 | "df2.loc[1003, :] = None\n",
482 | "df2"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": null,
488 | "metadata": {},
489 | "outputs": [],
490 | "source": [
491 | "df2.dropna()"
492 | ]
493 | },
494 | {
495 | "cell_type": "code",
496 | "execution_count": null,
497 | "metadata": {},
498 | "outputs": [],
499 | "source": [
500 | "df2.dropna(how=\"all\")"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": null,
506 | "metadata": {},
507 | "outputs": [],
508 | "source": [
509 | "df2.isna()"
510 | ]
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": null,
515 | "metadata": {},
516 | "outputs": [],
517 | "source": [
518 | "df2.fillna({\"score\": df2[\"score\"].mean()})"
519 | ]
520 | },
521 | {
522 | "cell_type": "markdown",
523 | "metadata": {},
524 | "source": [
525 | "## Duplicate Data"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": null,
531 | "metadata": {},
532 | "outputs": [],
533 | "source": [
534 | "df.drop_duplicates([\"country\", \"continent\"])"
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "execution_count": null,
540 | "metadata": {},
541 | "outputs": [],
542 | "source": [
543 | "df[\"country\"].is_unique"
544 | ]
545 | },
546 | {
547 | "cell_type": "code",
548 | "execution_count": null,
549 | "metadata": {},
550 | "outputs": [],
551 | "source": [
552 | "df[\"country\"].unique()"
553 | ]
554 | },
555 | {
556 | "cell_type": "code",
557 | "execution_count": null,
558 | "metadata": {},
559 | "outputs": [],
560 | "source": [
561 | "# By default, it marks only duplicates as True, i.e.\n",
562 | "# without the first occurrence\n",
563 | "df[\"country\"].duplicated()"
564 | ]
565 | },
566 | {
567 | "cell_type": "code",
568 | "execution_count": null,
569 | "metadata": {},
570 | "outputs": [],
571 | "source": [
572 | "# To get all rows where \"country\" is duplicated, use\n",
573 | "# keep=False\n",
574 | "df.loc[df[\"country\"].duplicated(keep=False), :]"
575 | ]
576 | },
577 | {
578 | "cell_type": "markdown",
579 | "metadata": {},
580 | "source": [
581 | "## Arithmetic Operations"
582 | ]
583 | },
584 | {
585 | "cell_type": "code",
586 | "execution_count": null,
587 | "metadata": {},
588 | "outputs": [],
589 | "source": [
590 | "rainfall"
591 | ]
592 | },
593 | {
594 | "cell_type": "code",
595 | "execution_count": null,
596 | "metadata": {},
597 | "outputs": [],
598 | "source": [
599 | "rainfall + 100"
600 | ]
601 | },
602 | {
603 | "cell_type": "code",
604 | "execution_count": null,
605 | "metadata": {},
606 | "outputs": [],
607 | "source": [
608 | "more_rainfall = pd.DataFrame(data=[[100, 200], [300, 400]],\n",
609 | " index=[1, 2],\n",
610 | " columns=[\"City 1\", \"City 4\"])\n",
611 | "more_rainfall"
612 | ]
613 | },
614 | {
615 | "cell_type": "code",
616 | "execution_count": null,
617 | "metadata": {},
618 | "outputs": [],
619 | "source": [
620 | "rainfall + more_rainfall"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "execution_count": null,
626 | "metadata": {},
627 | "outputs": [],
628 | "source": [
629 | "rainfall.add(more_rainfall, fill_value=0)"
630 | ]
631 | },
632 | {
633 | "cell_type": "code",
634 | "execution_count": null,
635 | "metadata": {},
636 | "outputs": [],
637 | "source": [
638 | "# A Series taken from a row\n",
639 | "rainfall.loc[1, :]"
640 | ]
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": null,
645 | "metadata": {},
646 | "outputs": [],
647 | "source": [
648 | "rainfall + rainfall.loc[1, :]"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": null,
654 | "metadata": {},
655 | "outputs": [],
656 | "source": [
657 | "# A Series taken from a column\n",
658 | "rainfall.loc[:, \"City 2\"]"
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": null,
664 | "metadata": {},
665 | "outputs": [],
666 | "source": [
667 | "rainfall.add(rainfall.loc[:, \"City 2\"], axis=0)"
668 | ]
669 | },
670 | {
671 | "cell_type": "code",
672 | "execution_count": null,
673 | "metadata": {},
674 | "outputs": [],
675 | "source": [
676 | "# Let's create a new DataFrame\n",
677 | "users = pd.DataFrame(data=[\" mArk \", \"JOHN \", \"Tim\", \" jenny\"],\n",
678 | " columns=[\"name\"])\n",
679 | "users"
680 | ]
681 | },
682 | {
683 | "cell_type": "code",
684 | "execution_count": null,
685 | "metadata": {},
686 | "outputs": [],
687 | "source": [
688 | "users_cleaned = users.loc[:, \"name\"].str.strip().str.capitalize()\n",
689 | "users_cleaned"
690 | ]
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": null,
695 | "metadata": {},
696 | "outputs": [],
697 | "source": [
698 | "users_cleaned.str.startswith(\"J\")"
699 | ]
700 | },
701 | {
702 | "cell_type": "markdown",
703 | "metadata": {},
704 | "source": [
705 | "## Applying a Function"
706 | ]
707 | },
708 | {
709 | "cell_type": "code",
710 | "execution_count": null,
711 | "metadata": {},
712 | "outputs": [],
713 | "source": [
714 | "rainfall"
715 | ]
716 | },
717 | {
718 | "cell_type": "code",
719 | "execution_count": null,
720 | "metadata": {},
721 | "outputs": [],
722 | "source": [
723 | "def format_string(x):\n",
724 | " return f\"{x:,.2f}\""
725 | ]
726 | },
727 | {
728 | "cell_type": "code",
729 | "execution_count": null,
730 | "metadata": {},
731 | "outputs": [],
732 | "source": [
733 | "# Note that we pass in the function without calling it,\n",
734 | "# i.e., format_string and not format_string()!\n",
735 | "rainfall.applymap(format_string)"
736 | ]
737 | },
738 | {
739 | "cell_type": "code",
740 | "execution_count": null,
741 | "metadata": {},
742 | "outputs": [],
743 | "source": [
744 | "rainfall.applymap(lambda x: f\"{x:,.2f}\")"
745 | ]
746 | },
747 | {
748 | "cell_type": "markdown",
749 | "metadata": {},
750 | "source": [
751 | "# Combining DataFrames\n",
752 | "## Concatenating"
753 | ]
754 | },
755 | {
756 | "cell_type": "code",
757 | "execution_count": null,
758 | "metadata": {},
759 | "outputs": [],
760 | "source": [
761 | "data = [[15, \"France\", 4.1, \"Becky\"],\n",
762 | " [44, \"Canada\", 6.1, \"Leanne\"]]\n",
763 | "more_users = pd.DataFrame(data=data,\n",
764 | " columns=[\"age\", \"country\", \"score\", \"name\"],\n",
765 | " index=[1000, 1011])\n",
766 | "more_users"
767 | ]
768 | },
769 | {
770 | "cell_type": "code",
771 | "execution_count": null,
772 | "metadata": {},
773 | "outputs": [],
774 | "source": [
775 | "pd.concat([df, more_users], axis=0)"
776 | ]
777 | },
778 | {
779 | "cell_type": "code",
780 | "execution_count": null,
781 | "metadata": {},
782 | "outputs": [],
783 | "source": [
784 | "data = [[3, 4],\n",
785 | " [5, 6]]\n",
786 | "more_categories = pd.DataFrame(data=data,\n",
787 | " columns=[\"quizzes\", \"logins\"],\n",
788 | " index=[1000, 2000])\n",
789 | "more_categories"
790 | ]
791 | },
792 | {
793 | "cell_type": "code",
794 | "execution_count": null,
795 | "metadata": {},
796 | "outputs": [],
797 | "source": [
798 | "pd.concat([df, more_categories], axis=1)"
799 | ]
800 | },
801 | {
802 | "cell_type": "markdown",
803 | "metadata": {},
804 | "source": [
805 | "## Joining and Merging"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": null,
811 | "metadata": {},
812 | "outputs": [],
813 | "source": [
814 | "df1 = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]],\n",
815 | " columns=[\"A\", \"B\"])\n",
816 | "df1"
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": null,
822 | "metadata": {},
823 | "outputs": [],
824 | "source": [
825 | "df2 = pd.DataFrame(data=[[10, 20], [30, 40]],\n",
826 | " columns=[\"C\", \"D\"], index=[1, 3])\n",
827 | "df2"
828 | ]
829 | },
830 | {
831 | "cell_type": "code",
832 | "execution_count": null,
833 | "metadata": {},
834 | "outputs": [],
835 | "source": [
836 | "df1.join(df2, how=\"inner\")"
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": null,
842 | "metadata": {},
843 | "outputs": [],
844 | "source": [
845 | "df1.join(df2, how=\"left\")"
846 | ]
847 | },
848 | {
849 | "cell_type": "code",
850 | "execution_count": null,
851 | "metadata": {},
852 | "outputs": [],
853 | "source": [
854 | "df1.join(df2, how=\"right\")"
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "execution_count": null,
860 | "metadata": {},
861 | "outputs": [],
862 | "source": [
863 | "df1.join(df2, how=\"outer\")"
864 | ]
865 | },
866 | {
867 | "cell_type": "code",
868 | "execution_count": null,
869 | "metadata": {},
870 | "outputs": [],
871 | "source": [
872 | "# Add a column called \"category\" to both DataFrames\n",
873 | "df1[\"category\"] = [\"a\", \"b\", \"c\"]\n",
874 | "df2[\"category\"] = [\"c\", \"b\"]"
875 | ]
876 | },
877 | {
878 | "cell_type": "code",
879 | "execution_count": null,
880 | "metadata": {},
881 | "outputs": [],
882 | "source": [
883 | "df1"
884 | ]
885 | },
886 | {
887 | "cell_type": "code",
888 | "execution_count": null,
889 | "metadata": {},
890 | "outputs": [],
891 | "source": [
892 | "df2"
893 | ]
894 | },
895 | {
896 | "cell_type": "code",
897 | "execution_count": null,
898 | "metadata": {},
899 | "outputs": [],
900 | "source": [
901 | "df1.merge(df2, how=\"inner\", on=[\"category\"])"
902 | ]
903 | },
904 | {
905 | "cell_type": "code",
906 | "execution_count": null,
907 | "metadata": {},
908 | "outputs": [],
909 | "source": [
910 | "df1.merge(df2, how=\"left\", on=[\"category\"])"
911 | ]
912 | },
913 | {
914 | "cell_type": "markdown",
915 | "metadata": {},
916 | "source": [
917 | "# Data Aggregation and Descriptive Statistics\n",
918 | "## Descriptive Statistics"
919 | ]
920 | },
921 | {
922 | "cell_type": "code",
923 | "execution_count": null,
924 | "metadata": {},
925 | "outputs": [],
926 | "source": [
927 | "rainfall"
928 | ]
929 | },
930 | {
931 | "cell_type": "code",
932 | "execution_count": null,
933 | "metadata": {},
934 | "outputs": [],
935 | "source": [
936 | "rainfall.mean()"
937 | ]
938 | },
939 | {
940 | "cell_type": "code",
941 | "execution_count": null,
942 | "metadata": {},
943 | "outputs": [],
944 | "source": [
945 | "rainfall.mean(axis=1)"
946 | ]
947 | },
948 | {
949 | "cell_type": "markdown",
950 | "metadata": {},
951 | "source": [
952 | "## Grouping"
953 | ]
954 | },
955 | {
956 | "cell_type": "code",
957 | "execution_count": null,
958 | "metadata": {},
959 | "outputs": [],
960 | "source": [
961 | "df.groupby([\"continent\"]).mean()"
962 | ]
963 | },
964 | {
965 | "cell_type": "code",
966 | "execution_count": null,
967 | "metadata": {},
968 | "outputs": [],
969 | "source": [
970 | "df.groupby([\"continent\", \"country\"]).mean()"
971 | ]
972 | },
973 | {
974 | "cell_type": "code",
975 | "execution_count": null,
976 | "metadata": {},
977 | "outputs": [],
978 | "source": [
979 | "selection = df.loc[:, [\"age\", \"score\", \"continent\"]]\n",
980 | "selection.groupby([\"continent\"]).agg(lambda x: x.max() - x.min())"
981 | ]
982 | },
983 | {
984 | "cell_type": "markdown",
985 | "metadata": {},
986 | "source": [
987 | "## Pivoting and Melting"
988 | ]
989 | },
990 | {
991 | "cell_type": "code",
992 | "execution_count": null,
993 | "metadata": {},
994 | "outputs": [],
995 | "source": [
996 | "data = [[\"Oranges\", \"North\", 12.30],\n",
997 | " [\"Apples\", \"South\", 10.55],\n",
998 | " [\"Oranges\", \"South\", 22.00],\n",
999 | " [\"Bananas\", \"South\", 5.90],\n",
1000 | " [\"Bananas\", \"North\", 31.30],\n",
1001 | " [\"Oranges\", \"North\", 13.10]]\n",
1002 | "\n",
1003 | "sales = pd.DataFrame(data=data,\n",
1004 | " columns=[\"Fruit\", \"Region\", \"Revenue\"])\n",
1005 | "sales"
1006 | ]
1007 | },
1008 | {
1009 | "cell_type": "code",
1010 | "execution_count": null,
1011 | "metadata": {},
1012 | "outputs": [],
1013 | "source": [
1014 | "pivot = pd.pivot_table(sales,\n",
1015 | " index=\"Fruit\", columns=\"Region\",\n",
1016 | " values=\"Revenue\", aggfunc=\"sum\",\n",
1017 | " margins=True, margins_name=\"Total\")\n",
1018 | "pivot"
1019 | ]
1020 | },
1021 | {
1022 | "cell_type": "code",
1023 | "execution_count": null,
1024 | "metadata": {},
1025 | "outputs": [],
1026 | "source": [
1027 | "pd.melt(pivot.iloc[:-1,:-1].reset_index(),\n",
1028 | " id_vars=\"Fruit\",\n",
1029 | " value_vars=[\"North\", \"South\"], value_name=\"Revenue\")"
1030 | ]
1031 | },
1032 | {
1033 | "cell_type": "markdown",
1034 | "metadata": {},
1035 | "source": [
1036 | "# Plotting\n",
1037 | "## Matplotlib"
1038 | ]
1039 | },
1040 | {
1041 | "cell_type": "code",
1042 | "execution_count": null,
1043 | "metadata": {},
1044 | "outputs": [],
1045 | "source": [
1046 | "import numpy as np\n",
1047 | "%matplotlib inline\n",
1048 | "# Or %matplotlib notebook"
1049 | ]
1050 | },
1051 | {
1052 | "cell_type": "code",
1053 | "execution_count": null,
1054 | "metadata": {},
1055 | "outputs": [],
1056 | "source": [
1057 | "data = pd.DataFrame(data=np.random.rand(4, 4) * 100000,\n",
1058 | " index=[\"Q1\", \"Q2\", \"Q3\", \"Q4\"],\n",
1059 | " columns=[\"East\", \"West\", \"North\", \"South\"])\n",
1060 | "data.index.name = \"Quarters\"\n",
1061 | "data.columns.name = \"Region\"\n",
1062 | "data"
1063 | ]
1064 | },
1065 | {
1066 | "cell_type": "code",
1067 | "execution_count": null,
1068 | "metadata": {},
1069 | "outputs": [],
1070 | "source": [
1071 | "data.plot() # Shortcut for data.plot.line()"
1072 | ]
1073 | },
1074 | {
1075 | "cell_type": "markdown",
1076 | "metadata": {},
1077 | "source": [
1078 | "## Plotly"
1079 | ]
1080 | },
1081 | {
1082 | "cell_type": "code",
1083 | "execution_count": null,
1084 | "metadata": {},
1085 | "outputs": [],
1086 | "source": [
1087 | "# Set the plotting backend to Plotly\n",
1088 | "pd.options.plotting.backend = \"plotly\""
1089 | ]
1090 | },
1091 | {
1092 | "cell_type": "code",
1093 | "execution_count": null,
1094 | "metadata": {},
1095 | "outputs": [],
1096 | "source": [
1097 | "data.plot()"
1098 | ]
1099 | },
1100 | {
1101 | "cell_type": "code",
1102 | "execution_count": null,
1103 | "metadata": {},
1104 | "outputs": [],
1105 | "source": [
1106 | "# Display the same data as bar plot\n",
1107 | "data.plot.bar(barmode=\"group\")"
1108 | ]
1109 | },
1110 | {
1111 | "cell_type": "markdown",
1112 | "metadata": {},
1113 | "source": [
1114 | "# Data Import and Export\n",
1115 | "## Exporting to a CSV file"
1116 | ]
1117 | },
1118 | {
1119 | "cell_type": "code",
1120 | "execution_count": null,
1121 | "metadata": {},
1122 | "outputs": [],
1123 | "source": [
1124 | "df.to_csv(\"course_participants.csv\")"
1125 | ]
1126 | },
1127 | {
1128 | "cell_type": "markdown",
1129 | "metadata": {},
1130 | "source": [
1131 | "## Importing a CSV file"
1132 | ]
1133 | },
1134 | {
1135 | "cell_type": "code",
1136 | "execution_count": null,
1137 | "metadata": {},
1138 | "outputs": [],
1139 | "source": [
1140 | "msft = pd.read_csv(\"csv/MSFT.csv\")"
1141 | ]
1142 | },
1143 | {
1144 | "cell_type": "code",
1145 | "execution_count": null,
1146 | "metadata": {},
1147 | "outputs": [],
1148 | "source": [
1149 | "msft.info()"
1150 | ]
1151 | },
1152 | {
1153 | "cell_type": "code",
1154 | "execution_count": null,
1155 | "metadata": {},
1156 | "outputs": [],
1157 | "source": [
1158 | "# I am selecting a few columns because of space issues\n",
1159 | "# You can also just run: msft.head()\n",
1160 | "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].head()"
1161 | ]
1162 | },
1163 | {
1164 | "cell_type": "code",
1165 | "execution_count": null,
1166 | "metadata": {},
1167 | "outputs": [],
1168 | "source": [
1169 | "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].tail(2)"
1170 | ]
1171 | },
1172 | {
1173 | "cell_type": "code",
1174 | "execution_count": null,
1175 | "metadata": {},
1176 | "outputs": [],
1177 | "source": [
1178 | "msft.loc[:, [\"Adj Close\", \"Volume\"]].describe()"
1179 | ]
1180 | },
1181 | {
1182 | "cell_type": "code",
1183 | "execution_count": null,
1184 | "metadata": {},
1185 | "outputs": [],
1186 | "source": [
1187 | "# The line break in the URL is only to make it fit on the page\n",
1188 | "url = (\"https://raw.githubusercontent.com/fzumstein/\"\n",
1189 | " \"python-for-excel/1st-edition/csv/MSFT.csv\")\n",
1190 | "msft = pd.read_csv(url)"
1191 | ]
1192 | },
1193 | {
1194 | "cell_type": "code",
1195 | "execution_count": null,
1196 | "metadata": {},
1197 | "outputs": [],
1198 | "source": [
1199 | "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].head(2)"
1200 | ]
1201 | }
1202 | ],
1203 | "metadata": {
1204 | "kernelspec": {
1205 | "display_name": "Python 3",
1206 | "language": "python",
1207 | "name": "python3"
1208 | },
1209 | "language_info": {
1210 | "codemirror_mode": {
1211 | "name": "ipython",
1212 | "version": 3
1213 | },
1214 | "file_extension": ".py",
1215 | "mimetype": "text/x-python",
1216 | "name": "python",
1217 | "nbconvert_exporter": "python",
1218 | "pygments_lexer": "ipython3",
1219 | "version": "3.7.4"
1220 | }
1221 | },
1222 | "nbformat": 4,
1223 | "nbformat_minor": 4
1224 | }
1225 |
--------------------------------------------------------------------------------
/ch06.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Time Series\n",
8 | "## DatetimeIndex"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "# Let's start by importing the packages we use in this chapter\n",
18 | "# and by setting the plotting backend to Plotly\n",
19 | "import pandas as pd\n",
20 | "import numpy as np\n",
21 | "pd.options.plotting.backend = \"plotly\""
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "# This creates a DatetimeIndex based on a start timestamp,\n",
31 | "# number of periods and frequency (\"D\" = daily).\n",
32 | "daily_index = pd.date_range(\"2020-02-28\", periods=4, freq=\"D\")\n",
33 | "daily_index"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "# This creates a DatetimeIndex based on start/end timestamp.\n",
43 | "# The frequency is set to \"weekly on Sundays\" (\"W-SUN\").\n",
44 | "weekly_index = pd.date_range(\"2020-01-01\", \"2020-01-31\", freq=\"W-SUN\")\n",
45 | "weekly_index"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": null,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "# Construct a DataFrame based on the weekly_index. This could be\n",
55 | "# the visitor count of a museum that only opens on Sundays.\n",
56 | "pd.DataFrame(data=[21, 15, 33, 34],\n",
57 | " columns=[\"visitors\"], index=weekly_index)"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "msft = pd.read_csv(\"csv/MSFT.csv\")"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": null,
72 | "metadata": {},
73 | "outputs": [],
74 | "source": [
75 | "msft.info()"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "msft.loc[:, \"Date\"] = pd.to_datetime(msft[\"Date\"])"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "metadata": {},
91 | "outputs": [],
92 | "source": [
93 | "msft.dtypes"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {},
100 | "outputs": [],
101 | "source": [
102 | "msft = pd.read_csv(\"csv/MSFT.csv\",\n",
103 | " index_col=\"Date\", parse_dates=[\"Date\"])"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {},
110 | "outputs": [],
111 | "source": [
112 | "msft.info()"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {},
119 | "outputs": [],
120 | "source": [
121 | "msft.loc[:, \"Volume\"] = msft[\"Volume\"].astype(\"float\")\n",
122 | "msft[\"Volume\"].dtype"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "msft = msft.sort_index()"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "msft.index.date"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {},
147 | "outputs": [],
148 | "source": [
149 | "msft.loc[\"2019\", \"Adj Close\"]"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "msft.loc[\"2019-06\":\"2020-05\", \"Adj Close\"].plot()"
159 | ]
160 | },
161 | {
162 | "cell_type": "markdown",
163 | "metadata": {},
164 | "source": [
165 | "## Working with Time Zones"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "# Add the time information to the date\n",
175 | "msft_close = msft.loc[:, [\"Adj Close\"]].copy()\n",
176 | "msft_close.index = msft_close.index + pd.DateOffset(hours=16)\n",
177 | "msft_close.head(2)"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "# Make the timestamps time-zone-aware\n",
187 | "msft_close = msft_close.tz_localize(\"America/New_York\")\n",
188 | "msft_close.head(2)"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "msft_close = msft_close.tz_convert(\"UTC\")\n",
198 | "msft_close.loc[\"2020-01-02\", \"Adj Close\"] # 21:00 without DST"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "msft_close.loc[\"2020-05-01\", \"Adj Close\"] # 20:00 with DST"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "metadata": {},
213 | "source": [
214 | "## Shifting and Percentage Changes"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "msft_close.head()"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {},
230 | "outputs": [],
231 | "source": [
232 | "msft_close.shift(1).head()"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": null,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "returns = np.log(msft_close / msft_close.shift(1))\n",
242 | "returns = returns.rename(columns={\"Adj Close\": \"returns\"})\n",
243 | "returns.head()"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "# Plot a histogram with the daily log returns\n",
253 | "returns.plot.hist()"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": null,
259 | "metadata": {},
260 | "outputs": [],
261 | "source": [
262 | "simple_rets = msft_close.pct_change()\n",
263 | "simple_rets = simple_rets.rename(columns={\"Adj Close\": \"simple rets\"})\n",
264 | "simple_rets.head()"
265 | ]
266 | },
267 | {
268 | "cell_type": "markdown",
269 | "metadata": {},
270 | "source": [
271 | "## Rebasing and Correlation"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "parts = [] # List to collect individual DataFrames\n",
281 | "for ticker in [\"AAPL\", \"AMZN\", \"GOOGL\", \"MSFT\"]:\n",
282 | " # \"usecols\" allows us to only read in the Date and Adj Close\n",
283 | " # For a refresher about f-strings, see Chapter 3\n",
284 | " adj_close = pd.read_csv(f\"csv/{ticker}.csv\",\n",
285 | " index_col=\"Date\", parse_dates=[\"Date\"],\n",
286 | " usecols=[\"Date\", \"Adj Close\"])\n",
287 | " # Rename the column into the ticker symbol\n",
288 | " # (If you type this example by hand, make sure to keep the\n",
289 | " # following lines correctly indented!)\n",
290 | " adj_close = adj_close.rename(columns={\"Adj Close\": ticker})\n",
291 | " # Append the stock's DataFrame to the parts list\n",
292 | " parts.append(adj_close)"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "metadata": {},
299 | "outputs": [],
300 | "source": [
301 | "# Combine the 4 DataFrames into a single DataFrame\n",
302 | "adj_close = pd.concat(parts, axis=1)\n",
303 | "adj_close"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": null,
309 | "metadata": {},
310 | "outputs": [],
311 | "source": [
312 | "adj_close = adj_close.dropna()\n",
313 | "adj_close.info()"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": null,
319 | "metadata": {},
320 | "outputs": [],
321 | "source": [
322 | "# Use a sample from June 2019 - May 2020\n",
323 | "adj_close_sample = adj_close.loc[\"2019-06\":\"2020-05\", :]\n",
324 | "rebased_prices = adj_close_sample / adj_close_sample.iloc[0, :] * 100\n",
325 | "rebased_prices.head(2)"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": null,
331 | "metadata": {},
332 | "outputs": [],
333 | "source": [
334 | "rebased_prices.plot()"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": null,
340 | "metadata": {},
341 | "outputs": [],
342 | "source": [
343 | "# Correlation of daily log returns\n",
344 | "returns = np.log(adj_close / adj_close.shift(1))\n",
345 | "returns.corr()"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": null,
351 | "metadata": {},
352 | "outputs": [],
353 | "source": [
354 | "import plotly.express as px"
355 | ]
356 | },
357 | {
358 | "cell_type": "code",
359 | "execution_count": null,
360 | "metadata": {},
361 | "outputs": [],
362 | "source": [
363 | "fig = px.imshow(returns.corr(),\n",
364 | " x=adj_close.columns,\n",
365 | " y=adj_close.columns,\n",
366 | " color_continuous_scale=list(\n",
367 | " reversed(px.colors.sequential.RdBu)),\n",
368 | " zmin=-1, zmax=1)\n",
369 | "fig.show()"
370 | ]
371 | },
372 | {
373 | "cell_type": "markdown",
374 | "metadata": {},
375 | "source": [
376 | "## Resampling"
377 | ]
378 | },
379 | {
380 | "cell_type": "code",
381 | "execution_count": null,
382 | "metadata": {},
383 | "outputs": [],
384 | "source": [
385 | "end_of_month = adj_close.resample(\"M\").last()\n",
386 | "end_of_month.head()"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": null,
392 | "metadata": {},
393 | "outputs": [],
394 | "source": [
395 | "end_of_month.resample(\"D\").asfreq().head() # No transformation"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": null,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "end_of_month.resample(\"W-FRI\").ffill().head() # Forward fill"
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {},
410 | "source": [
411 | "## Rolling Windows"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": null,
417 | "metadata": {},
418 | "outputs": [],
419 | "source": [
420 | "# Plot the moving average for MSFT with data from 2019\n",
421 | "msft19 = msft.loc[\"2019\", [\"Adj Close\"]].copy()\n",
422 | "\n",
423 | "# Add the 25 day moving average as a new column to the DataFrame\n",
424 | "msft19.loc[:, \"25day average\"] = msft19[\"Adj Close\"].rolling(25).mean()\n",
425 | "msft19.plot()"
426 | ]
427 | }
428 | ],
429 | "metadata": {
430 | "kernelspec": {
431 | "display_name": "Python 3",
432 | "language": "python",
433 | "name": "python3"
434 | },
435 | "language_info": {
436 | "codemirror_mode": {
437 | "name": "ipython",
438 | "version": 3
439 | },
440 | "file_extension": ".py",
441 | "mimetype": "text/x-python",
442 | "name": "python",
443 | "nbconvert_exporter": "python",
444 | "pygments_lexer": "ipython3",
445 | "version": "3.7.4"
446 | }
447 | },
448 | "nbformat": 4,
449 | "nbformat_minor": 4
450 | }
--------------------------------------------------------------------------------
/ch07.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Excel File Manipulation with pandas"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Using pandas with Excel Files"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Case Study: Excel Reporting"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "df = pd.read_excel(\"sales_data/new/January.xlsx\")\n",
40 | "df.info()"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Reading Excel Files with pandas"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "df = pd.read_excel(\"xl/stores.xlsx\",\n",
57 | " sheet_name=\"2019\", skiprows=1, usecols=\"B:F\")\n",
58 | "df"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "df.info()"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "def fix_missing(x):\n",
77 | " return False if x in [\"\", \"MISSING\"] else x"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "df = pd.read_excel(\"xl/stores.xlsx\",\n",
87 | " sheet_name=\"2019\", skiprows=1, usecols=\"B:F\",\n",
88 | " converters={\"Flagship\": fix_missing})\n",
89 | "df"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {},
96 | "outputs": [],
97 | "source": [
98 | "# The Flagship column now has Dtype \"bool\"\n",
99 | "df.info()"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "sheets = pd.read_excel(\"xl/stores.xlsx\", sheet_name=[\"2019\", \"2020\"],\n",
109 | " skiprows=1, usecols=[\"Store\", \"Employees\"])\n",
110 | "sheets[\"2019\"].head(2)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "df = pd.read_excel(\"xl/stores.xlsx\", sheet_name=0,\n",
120 | " skiprows=2, skipfooter=3,\n",
121 | " usecols=\"B:C,F\", header=None,\n",
122 | " names=[\"Branch\", \"Employee_Count\", \"Is_Flagship\"])\n",
123 | "df"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {},
130 | "outputs": [],
131 | "source": [
132 | "df = pd.read_excel(\"xl/stores.xlsx\", sheet_name=\"2019\",\n",
133 | " skiprows=1, usecols=\"B,C,F\", skipfooter=2,\n",
134 | " na_values=\"MISSING\", keep_default_na=False)\n",
135 | "df"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "f = open(\"output.txt\", \"w\")\n",
145 | "f.write(\"Some text\")\n",
146 | "f.close()"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "### Context Managers and the with Statement"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": null,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "with open(\"output.txt\", \"w\") as f:\n",
163 | " f.write(\"Some text\")"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {},
170 | "outputs": [],
171 | "source": [
172 | "with pd.ExcelFile(\"xl/stores.xls\") as f:\n",
173 | " df1 = pd.read_excel(f, \"2019\", skiprows=1, usecols=\"B:F\", nrows=2)\n",
174 | " df2 = pd.read_excel(f, \"2020\", skiprows=1, usecols=\"B:F\", nrows=2)\n",
175 | "\n",
176 | "df1"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "metadata": {},
183 | "outputs": [],
184 | "source": [
185 | "stores = pd.ExcelFile(\"xl/stores.xlsx\")\n",
186 | "stores.sheet_names"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": null,
192 | "metadata": {},
193 | "outputs": [],
194 | "source": [
195 | "url = (\"https://raw.githubusercontent.com/fzumstein/\"\n",
196 | " \"python-for-excel/1st-edition/xl/stores.xlsx\")\n",
197 | "pd.read_excel(url, skiprows=1, usecols=\"B:E\", nrows=2)"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "## Writing Excel Files with pandas"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {},
211 | "outputs": [],
212 | "source": [
213 | "import numpy as np\n",
214 | "import datetime as dt"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "data = [[dt.datetime(2020,1,1, 10, 13), 2.222, 1, True],\n",
224 | " [dt.datetime(2020,1,2), np.nan, 2, False],\n",
225 | " [dt.datetime(2020,1,2), np.inf, 3, True]]\n",
226 | "df = pd.DataFrame(data=data,\n",
227 | " columns=[\"Dates\", \"Floats\", \"Integers\", \"Booleans\"])\n",
228 | "df.index.name=\"index\"\n",
229 | "df"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {},
236 | "outputs": [],
237 | "source": [
238 | "df.to_excel(\"written_with_pandas.xlsx\", sheet_name=\"Output\",\n",
239 | " startrow=1, startcol=1, index=True, header=True,\n",
240 | " na_rep=\"\", inf_rep=\"\")"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": null,
246 | "metadata": {},
247 | "outputs": [],
248 | "source": [
249 | "with pd.ExcelWriter(\"written_with_pandas2.xlsx\") as writer:\n",
250 | " df.to_excel(writer, sheet_name=\"Sheet1\", startrow=1, startcol=1)\n",
251 | " df.to_excel(writer, sheet_name=\"Sheet1\", startrow=10, startcol=1)\n",
252 | " df.to_excel(writer, sheet_name=\"Sheet2\")"
253 | ]
254 | }
255 | ],
256 | "metadata": {
257 | "kernelspec": {
258 | "display_name": "Python 3",
259 | "language": "python",
260 | "name": "python3"
261 | },
262 | "language_info": {
263 | "codemirror_mode": {
264 | "name": "ipython",
265 | "version": 3
266 | },
267 | "file_extension": ".py",
268 | "mimetype": "text/x-python",
269 | "name": "python",
270 | "nbconvert_exporter": "python",
271 | "pygments_lexer": "ipython3",
272 | "version": "3.7.4"
273 | }
274 | },
275 | "nbformat": 4,
276 | "nbformat_minor": 4
277 | }
278 |
--------------------------------------------------------------------------------
/ch08.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Reader and Writer Packages"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## OpenPyXL\n",
15 | "### Reading with OpenPyXL"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "import pandas as pd\n",
25 | "import openpyxl\n",
26 | "import excel\n",
27 | "import datetime as dt"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "# Open the workbook to read cell values.\n",
37 | "# The file is automatically closed again after loading the data.\n",
38 | "book = openpyxl.load_workbook(\"xl/stores.xlsx\", data_only=True)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "# Get a worksheet object by name or index (0-based)\n",
48 | "sheet = book[\"2019\"]\n",
49 | "sheet = book.worksheets[0]"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "# Get a list with all sheet names\n",
59 | "book.sheetnames"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "# Loop through the sheet objects.\n",
69 | "# Instead of \"name\", openpyxl uses \"title\".\n",
70 | "for i in book.worksheets:\n",
71 | " print(i.title)"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "# Getting the dimensions,\n",
81 | "# i.e., the used range of the sheet\n",
82 | "sheet.max_row, sheet.max_column"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "# Read the value of a single cell\n",
92 | "# using \"A1\" notation and using cell indices (1-based)\n",
93 | "sheet[\"B6\"].value\n",
94 | "sheet.cell(row=6, column=2).value"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "# Read in a range of cell values by using our excel module\n",
104 | "data = excel.read(book[\"2019\"], (2, 2), (8, 6))\n",
105 | "data[:2] # Print the first two rows"
106 | ]
107 | },
108 | {
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "### Writing with OpenPyXL"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {},
119 | "outputs": [],
120 | "source": [
121 | "import openpyxl\n",
122 | "from openpyxl.drawing.image import Image\n",
123 | "from openpyxl.chart import BarChart, Reference\n",
124 | "from openpyxl.styles import Font, colors\n",
125 | "from openpyxl.styles.borders import Border, Side\n",
126 | "from openpyxl.styles.alignment import Alignment\n",
127 | "from openpyxl.styles.fills import PatternFill\n",
128 | "import excel"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {},
135 | "outputs": [],
136 | "source": [
137 | "# Instantiate a workbook\n",
138 | "book = openpyxl.Workbook()\n",
139 | "\n",
140 | "# Get the first sheet and give it a name\n",
141 | "sheet = book.active\n",
142 | "sheet.title = \"Sheet1\"\n",
143 | "\n",
144 | "# Writing individual cells using A1 notation\n",
145 | "# and cell indices (1-based)\n",
146 | "sheet[\"A1\"].value = \"Hello 1\"\n",
147 | "sheet.cell(row=2, column=1, value=\"Hello 2\")\n",
148 | "\n",
149 | "# Formatting: fill color, alignment, border and font\n",
150 | "font_format = Font(color=\"FF0000\", bold=True)\n",
151 | "thin = Side(border_style=\"thin\", color=\"FF0000\")\n",
152 | "sheet[\"A3\"].value = \"Hello 3\"\n",
153 | "sheet[\"A3\"].font = font_format\n",
154 | "sheet[\"A3\"].border = Border(top=thin, left=thin,\n",
155 | " right=thin, bottom=thin)\n",
156 | "sheet[\"A3\"].alignment = Alignment(horizontal=\"center\")\n",
157 | "sheet[\"A3\"].fill = PatternFill(fgColor=\"FFFF00\", fill_type=\"solid\")\n",
158 | "\n",
159 | "# Number formatting (using Excel's formatting strings)\n",
160 | "sheet[\"A4\"].value = 3.3333\n",
161 | "sheet[\"A4\"].number_format = \"0.00\"\n",
162 | "\n",
163 | "# Date formatting (using Excel's formatting strings)\n",
164 | "sheet[\"A5\"].value = dt.date(2016, 10, 13)\n",
165 | "sheet[\"A5\"].number_format = \"mm/dd/yy\"\n",
166 | "\n",
167 | "# Formula: you must use the English name of the formula\n",
168 | "# with commas as delimiters\n",
169 | "sheet[\"A6\"].value = \"=SUM(A4, 2)\"\n",
170 | "\n",
171 | "# Image\n",
172 | "sheet.add_image(Image(\"images/python.png\"), \"C1\")\n",
173 | "\n",
174 | "# Two-dimensional list (we're using our excel module)\n",
175 | "data = [[None, \"North\", \"South\"],\n",
176 | " [\"Last Year\", 2, 5],\n",
177 | " [\"This Year\", 3, 6]]\n",
178 | "excel.write(sheet, data, \"A10\")\n",
179 | "\n",
180 | "# Chart\n",
181 | "chart = BarChart()\n",
182 | "chart.type = \"col\"\n",
183 | "chart.title = \"Sales Per Region\"\n",
184 | "chart.x_axis.title = \"Regions\"\n",
185 | "chart.y_axis.title = \"Sales\"\n",
186 | "chart_data = Reference(sheet, min_row=11, min_col=1,\n",
187 | " max_row=12, max_col=3)\n",
188 | "chart_categories = Reference(sheet, min_row=10, min_col=2,\n",
189 | " max_row=10, max_col=3)\n",
190 | "# from_rows interprets the data in the same way\n",
191 | "# as if you would add a chart manually in Excel\n",
192 | "chart.add_data(chart_data, titles_from_data=True, from_rows=True)\n",
193 | "chart.set_categories(chart_categories)\n",
194 | "sheet.add_chart(chart, \"A15\")\n",
195 | "\n",
196 | "# Saving the workbook creates the file on disk\n",
197 | "book.save(\"openpyxl.xlsx\")"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": [
206 | "book = openpyxl.Workbook()\n",
207 | "sheet = book.active\n",
208 | "sheet[\"A1\"].value = \"This is a template\"\n",
209 | "book.template = True\n",
210 | "book.save(\"template.xltx\")"
211 | ]
212 | },
213 | {
214 | "cell_type": "markdown",
215 | "metadata": {},
216 | "source": [
217 | "### Editing with OpenPyXL"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": null,
223 | "metadata": {},
224 | "outputs": [],
225 | "source": [
226 | "# Read the stores.xlsx file, change a cell\n",
227 | "# and store it under a new location/name.\n",
228 | "book = openpyxl.load_workbook(\"xl/stores.xlsx\")\n",
229 | "book[\"2019\"][\"A1\"].value = \"modified\"\n",
230 | "book.save(\"stores_edited.xlsx\")"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": null,
236 | "metadata": {},
237 | "outputs": [],
238 | "source": [
239 | "book = openpyxl.load_workbook(\"xl/macro.xlsm\", keep_vba=True)\n",
240 | "book[\"Sheet1\"][\"A1\"].value = \"Click the button!\"\n",
241 | "book.save(\"macro_openpyxl.xlsm\")"
242 | ]
243 | },
244 | {
245 | "cell_type": "markdown",
246 | "metadata": {},
247 | "source": [
248 | "## XlsxWriter"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "metadata": {},
255 | "outputs": [],
256 | "source": [
257 | "import datetime as dt\n",
258 | "import xlsxwriter\n",
259 | "import excel"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {},
266 | "outputs": [],
267 | "source": [
268 | "# Instantiate a workbook\n",
269 | "book = xlsxwriter.Workbook(\"xlsxwriter.xlsx\")\n",
270 | "\n",
271 | "# Add a sheet and give it a name\n",
272 | "sheet = book.add_worksheet(\"Sheet1\")\n",
273 | "\n",
274 | "# Writing individual cells using A1 notation\n",
275 | "# and cell indices (0-based)\n",
276 | "sheet.write(\"A1\", \"Hello 1\")\n",
277 | "sheet.write(1, 0, \"Hello 2\")\n",
278 | "\n",
279 | "# Formatting: fill color, alignment, border and font\n",
280 | "formatting = book.add_format({\"font_color\": \"#FF0000\",\n",
281 | " \"bg_color\": \"#FFFF00\",\n",
282 | " \"bold\": True, \"align\": \"center\",\n",
283 | " \"border\": 1, \"border_color\": \"#FF0000\"})\n",
284 | "sheet.write(\"A3\", \"Hello 3\", formatting)\n",
285 | "\n",
286 | "# Number formatting (using Excel's formatting strings)\n",
287 | "number_format = book.add_format({\"num_format\": \"0.00\"})\n",
288 | "sheet.write(\"A4\", 3.3333, number_format)\n",
289 | "\n",
290 | "# Date formatting (using Excel's formatting strings)\n",
291 | "date_format = book.add_format({\"num_format\": \"mm/dd/yy\"})\n",
292 | "sheet.write(\"A5\", dt.date(2016, 10, 13), date_format)\n",
293 | "\n",
294 | "# Formula: you must use the English name of the formula\n",
295 | "# with commas as delimiters\n",
296 | "sheet.write(\"A6\", \"=SUM(A4, 2)\")\n",
297 | "\n",
298 | "# Image\n",
299 | "sheet.insert_image(0, 2, \"images/python.png\")\n",
300 | "\n",
301 | "# Two-dimensional list (we're using our excel module)\n",
302 | "data = [[None, \"North\", \"South\"],\n",
303 | " [\"Last Year\", 2, 5],\n",
304 | " [\"This Year\", 3, 6]]\n",
305 | "excel.write(sheet, data, \"A10\")\n",
306 | "\n",
307 | "# Chart: see the file \"sales_report_xlsxwriter.py\" in the\n",
308 | "# companion repo to see how you can work with indices\n",
309 | "# instead of cell addresses\n",
310 | "chart = book.add_chart({\"type\": \"column\"})\n",
311 | "chart.set_title({\"name\": \"Sales per Region\"})\n",
312 | "chart.add_series({\"name\": \"=Sheet1!A11\",\n",
313 | " \"categories\": \"=Sheet1!B10:C10\",\n",
314 | " \"values\": \"=Sheet1!B11:C11\"})\n",
315 | "chart.add_series({\"name\": \"=Sheet1!A12\",\n",
316 | " \"categories\": \"=Sheet1!B10:C10\",\n",
317 | " \"values\": \"=Sheet1!B12:C12\"})\n",
318 | "chart.set_x_axis({\"name\": \"Regions\"})\n",
319 | "chart.set_y_axis({\"name\": \"Sales\"})\n",
320 | "sheet.insert_chart(\"A15\", chart)\n",
321 | "\n",
322 | "# Closing the workbook creates the file on disk\n",
323 | "book.close()"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": null,
329 | "metadata": {},
330 | "outputs": [],
331 | "source": [
332 | "book = xlsxwriter.Workbook(\"macro_xlsxwriter.xlsm\")\n",
333 | "sheet = book.add_worksheet(\"Sheet1\")\n",
334 | "sheet.write(\"A1\", \"Click the button!\")\n",
335 | "book.add_vba_project(\"xl/vbaProject.bin\")\n",
336 | "sheet.insert_button(\"A3\", {\"macro\": \"Hello\", \"caption\": \"Button 1\",\n",
337 | " \"width\": 130, \"height\": 35})\n",
338 | "book.close()"
339 | ]
340 | },
341 | {
342 | "cell_type": "markdown",
343 | "metadata": {},
344 | "source": [
345 | "## pyxlsb"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": null,
351 | "metadata": {},
352 | "outputs": [],
353 | "source": [
354 | "import pyxlsb\n",
355 | "import excel"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": null,
361 | "metadata": {},
362 | "outputs": [],
363 | "source": [
364 | "# Loop through sheets. With pyxlsb, the workbook\n",
365 | "# and sheet objects can be used as context managers.\n",
366 | "# book.sheets returns a list of sheet names, not objects!\n",
367 | "# To get a sheet object, use get_sheet() instead.\n",
368 | "with pyxlsb.open_workbook(\"xl/stores.xlsb\") as book:\n",
369 | " for sheet_name in book.sheets:\n",
370 | " with book.get_sheet(sheet_name) as sheet:\n",
371 | " dim = sheet.dimension\n",
372 | " print(f\"Sheet '{sheet_name}' has \"\n",
373 | " f\"{dim.h} rows and {dim.w} cols\")"
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "execution_count": null,
379 | "metadata": {},
380 | "outputs": [],
381 | "source": [
382 | "# Read in the values of a range of cells by using our excel module.\n",
383 | "# Instead of \"2019\", you could also use its index (1-based).\n",
384 | "with pyxlsb.open_workbook(\"xl/stores.xlsb\") as book:\n",
385 | " with book.get_sheet(\"2019\") as sheet:\n",
386 | " data = excel.read(sheet, \"B2\")\n",
387 | "data[:2] # Print the first two rows"
388 | ]
389 | },
390 | {
391 | "cell_type": "code",
392 | "execution_count": null,
393 | "metadata": {},
394 | "outputs": [],
395 | "source": [
396 | "from pyxlsb import convert_date\n",
397 | "convert_date(data[1][3])"
398 | ]
399 | },
400 | {
401 | "cell_type": "code",
402 | "execution_count": null,
403 | "metadata": {},
404 | "outputs": [],
405 | "source": [
406 | "df = pd.read_excel(\"xl/stores.xlsb\", engine=\"pyxlsb\")"
407 | ]
408 | },
409 | {
410 | "cell_type": "markdown",
411 | "metadata": {},
412 | "source": [
413 | "## xlrd, xlwt and xlutils"
414 | ]
415 | },
416 | {
417 | "cell_type": "markdown",
418 | "metadata": {},
419 | "source": [
420 | "### Reading with xlrd"
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": null,
426 | "metadata": {},
427 | "outputs": [],
428 | "source": [
429 | "import xlrd\n",
430 | "import xlwt\n",
431 | "from xlwt.Utils import cell_to_rowcol2\n",
432 | "import xlutils\n",
433 | "import excel"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": null,
439 | "metadata": {},
440 | "outputs": [],
441 | "source": [
442 | "# Open the workbook to read cell values. The file is\n",
443 | "# automatically closed again after loading the data.\n",
444 | "book = xlrd.open_workbook(\"xl/stores.xls\")"
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": null,
450 | "metadata": {},
451 | "outputs": [],
452 | "source": [
453 | "# Get a list with all sheet names\n",
454 | "book.sheet_names()"
455 | ]
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": null,
460 | "metadata": {},
461 | "outputs": [],
462 | "source": [
463 | "# Loop through the sheet objects\n",
464 | "for sheet in book.sheets():\n",
465 | " print(sheet.name)"
466 | ]
467 | },
468 | {
469 | "cell_type": "code",
470 | "execution_count": null,
471 | "metadata": {},
472 | "outputs": [],
473 | "source": [
474 | "# Get a sheet object by name or index (0-based)\n",
475 | "sheet = book.sheet_by_index(0)\n",
476 | "sheet = book.sheet_by_name(\"2019\")"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": null,
482 | "metadata": {},
483 | "outputs": [],
484 | "source": [
485 | "# Dimensions\n",
486 | "sheet.nrows, sheet.ncols"
487 | ]
488 | },
489 | {
490 | "cell_type": "code",
491 | "execution_count": null,
492 | "metadata": {},
493 | "outputs": [],
494 | "source": [
495 | "# Read the value of a single cell\n",
496 | "# using \"A1\" notation and using cell indices (0-based).\n",
497 | "# The \"*\" unpacks the tuple that cell_to_rowcol2 returns\n",
498 | "# into individual arguments.\n",
499 | "sheet.cell(*cell_to_rowcol2(\"B3\")).value\n",
500 | "sheet.cell(2, 1).value"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": null,
506 | "metadata": {},
507 | "outputs": [],
508 | "source": [
509 | "# Read in a range of cell values by using our excel module\n",
510 | "data = excel.read(sheet, \"B2\")\n",
511 | "data[:2] # Print the first two rows"
512 | ]
513 | },
514 | {
515 | "cell_type": "markdown",
516 | "metadata": {},
517 | "source": [
518 | "### Writing with xlwt"
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": null,
524 | "metadata": {},
525 | "outputs": [],
526 | "source": [
527 | "import xlwt\n",
528 | "from xlwt.Utils import cell_to_rowcol2\n",
529 | "import datetime as dt\n",
530 | "import excel"
531 | ]
532 | },
533 | {
534 | "cell_type": "code",
535 | "execution_count": null,
536 | "metadata": {},
537 | "outputs": [],
538 | "source": [
539 | "# Instantiate a workbook\n",
540 | "book = xlwt.Workbook()\n",
541 | "\n",
542 | "# Add a sheet and give it a name\n",
543 | "sheet = book.add_sheet(\"Sheet1\")\n",
544 | "\n",
545 | "# Writing individual cells using A1 notation\n",
546 | "# and cell indices (0-based)\n",
547 | "sheet.write(*cell_to_rowcol2(\"A1\"), \"Hello 1\")\n",
548 | "sheet.write(r=1, c=0, label=\"Hello 2\")\n",
549 | "\n",
550 | "# Formatting: fill color, alignment, border and font\n",
551 | "formatting = xlwt.easyxf(\"font: bold on, color red;\"\n",
552 | " \"align: horiz center;\"\n",
553 | " \"borders: top_color red, bottom_color red,\"\n",
554 | " \"right_color red, left_color red,\"\n",
555 | " \"left thin, right thin,\"\n",
556 | " \"top thin, bottom thin;\"\n",
557 | " \"pattern: pattern solid, fore_color yellow;\")\n",
558 | "sheet.write(r=2, c=0, label=\"Hello 3\", style=formatting)\n",
559 | "\n",
560 | "# Number formatting (using Excel's formatting strings)\n",
561 | "number_format = xlwt.easyxf(num_format_str=\"0.00\")\n",
562 | "sheet.write(3, 0, 3.3333, number_format)\n",
563 | "\n",
564 | "# Date formatting (using Excel's formatting strings)\n",
565 | "date_format = xlwt.easyxf(num_format_str=\"mm/dd/yyyy\")\n",
566 | "sheet.write(4, 0, dt.datetime(2012, 2, 3), date_format)\n",
567 | "\n",
568 | "# Formula: you must use the English name of the formula\n",
569 | "# with commas as delimiters\n",
570 | "sheet.write(5, 0, xlwt.Formula(\"SUM(A4, 2)\"))\n",
571 | "\n",
572 | "# Two-dimensional list (we're using our excel module)\n",
573 | "data = [[None, \"North\", \"South\"],\n",
574 | " [\"Last Year\", 2, 5],\n",
575 | " [\"This Year\", 3, 6]]\n",
576 | "excel.write(sheet, data, \"A10\")\n",
577 | "\n",
578 | "# Picture (only allows to add bmp format)\n",
579 | "sheet.insert_bitmap(\"images/python.bmp\", 0, 2)\n",
580 | "\n",
581 | "# This writes the file to disk\n",
582 | "book.save(\"xlwt.xls\")"
583 | ]
584 | },
585 | {
586 | "cell_type": "markdown",
587 | "metadata": {},
588 | "source": [
589 | "### Editing with xlutils"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": null,
595 | "metadata": {},
596 | "outputs": [],
597 | "source": [
598 | "import xlutils.copy"
599 | ]
600 | },
601 | {
602 | "cell_type": "code",
603 | "execution_count": null,
604 | "metadata": {},
605 | "outputs": [],
606 | "source": [
607 | "book = xlrd.open_workbook(\"xl/stores.xls\", formatting_info=True)\n",
608 | "book = xlutils.copy.copy(book)\n",
609 | "book.get_sheet(0).write(0, 0, \"changed!\")\n",
610 | "book.save(\"stores_edited.xls\")"
611 | ]
612 | },
613 | {
614 | "cell_type": "markdown",
615 | "metadata": {},
616 | "source": [
617 | "# Advanced Topics\n",
618 | "## Working with Big Files"
619 | ]
620 | },
621 | {
622 | "cell_type": "markdown",
623 | "metadata": {},
624 | "source": [
625 | "### Writing with OpenPyXL"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": null,
631 | "metadata": {},
632 | "outputs": [],
633 | "source": [
634 | "book = openpyxl.Workbook(write_only=True)\n",
635 | "# With write_only=True, book.active doesn't work\n",
636 | "sheet = book.create_sheet()\n",
637 | "# This will produce a sheet with 1000 x 200 cells\n",
638 | "for row in range(1000):\n",
639 | " sheet.append(list(range(200)))\n",
640 | "book.save(\"openpyxl_optimized.xlsx\")"
641 | ]
642 | },
643 | {
644 | "cell_type": "markdown",
645 | "metadata": {},
646 | "source": [
647 | "### Writing with XlsxWriter"
648 | ]
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": null,
653 | "metadata": {},
654 | "outputs": [],
655 | "source": [
656 | "book = xlsxwriter.Workbook(\"xlsxwriter_optimized.xlsx\",\n",
657 | " options={\"constant_memory\": True})\n",
658 | "sheet = book.add_worksheet()\n",
659 | "# This will produce a sheet with 1000 x 200 cells\n",
660 | "for row in range(1000):\n",
661 | " sheet.write_row(row , 0, list(range(200)))\n",
662 | "book.close()"
663 | ]
664 | },
665 | {
666 | "cell_type": "markdown",
667 | "metadata": {},
668 | "source": [
669 | "### Reading with xlrd"
670 | ]
671 | },
672 | {
673 | "cell_type": "code",
674 | "execution_count": null,
675 | "metadata": {},
676 | "outputs": [],
677 | "source": [
678 | "with xlrd.open_workbook(\"xl/stores.xls\", on_demand=True) as book:\n",
679 | " sheet = book.sheet_by_index(0) # Only loads the first sheet"
680 | ]
681 | },
682 | {
683 | "cell_type": "code",
684 | "execution_count": null,
685 | "metadata": {},
686 | "outputs": [],
687 | "source": [
688 | "with xlrd.open_workbook(\"xl/stores.xls\", on_demand=True) as book:\n",
689 | " with pd.ExcelFile(book, engine=\"xlrd\") as f:\n",
690 | " df = pd.read_excel(f, sheet_name=0)"
691 | ]
692 | },
693 | {
694 | "cell_type": "markdown",
695 | "metadata": {},
696 | "source": [
697 | "### Reading with OpenPyXL"
698 | ]
699 | },
700 | {
701 | "cell_type": "code",
702 | "execution_count": null,
703 | "metadata": {},
704 | "outputs": [],
705 | "source": [
706 | "book = openpyxl.load_workbook(\"xl/big.xlsx\",\n",
707 | " data_only=True, read_only=True,\n",
708 | " keep_links=False)\n",
709 | "# Perform the desired read operations here\n",
710 | "book.close() # Required with read_only=True"
711 | ]
712 | },
713 | {
714 | "cell_type": "markdown",
715 | "metadata": {},
716 | "source": [
717 | "### Reading in Parallel"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": null,
723 | "metadata": {},
724 | "outputs": [],
725 | "source": [
726 | "%%time\n",
727 | "data = pd.read_excel(\"xl/big.xlsx\",\n",
728 | " sheet_name=None, engine=\"openpyxl\")"
729 | ]
730 | },
731 | {
732 | "cell_type": "code",
733 | "execution_count": null,
734 | "metadata": {},
735 | "outputs": [],
736 | "source": [
737 | "%%time\n",
738 | "import parallel_pandas\n",
739 | "data = parallel_pandas.read_excel(\"xl/big.xlsx\", sheet_name=None)"
740 | ]
741 | },
742 | {
743 | "cell_type": "markdown",
744 | "metadata": {},
745 | "source": [
746 | "## Formatting DataFrames in Excel"
747 | ]
748 | },
749 | {
750 | "cell_type": "code",
751 | "execution_count": null,
752 | "metadata": {},
753 | "outputs": [],
754 | "source": [
755 | "with pd.ExcelFile(\"xl/stores.xlsx\", engine=\"openpyxl\") as xlfile:\n",
756 | " # Read a DataFrame\n",
757 | " df = pd.read_excel(xlfile, sheet_name=\"2020\")\n",
758 | "\n",
759 | " # Get the OpenPyXL workbook object\n",
760 | " book = xlfile.book\n",
761 | "\n",
762 | " # From here on, it's OpenPyXL code\n",
763 | " sheet = book[\"2019\"]\n",
764 | " value = sheet[\"B3\"].value # Read a single value"
765 | ]
766 | },
767 | {
768 | "cell_type": "code",
769 | "execution_count": null,
770 | "metadata": {},
771 | "outputs": [],
772 | "source": [
773 | "with pd.ExcelWriter(\"pandas_and_openpyxl.xlsx\",\n",
774 | " engine=\"openpyxl\") as writer:\n",
775 | " df = pd.DataFrame({\"col1\": [1, 2, 3, 4], \"col2\": [5, 6, 7, 8]})\n",
776 | " # Write a DataFrame\n",
777 | " df.to_excel(writer, \"Sheet1\", startrow=4, startcol=2)\n",
778 | "\n",
779 | " # Get the OpenPyXL workbook and sheet objects\n",
780 | " book = writer.book\n",
781 | " sheet = writer.sheets[\"Sheet1\"]\n",
782 | "\n",
783 | " # From here on, it's OpenPyXL code\n",
784 | " sheet[\"A1\"].value = \"This is a Title\" # Write a single cell value"
785 | ]
786 | },
787 | {
788 | "cell_type": "code",
789 | "execution_count": null,
790 | "metadata": {},
791 | "outputs": [],
792 | "source": [
793 | "df = pd.DataFrame({\"col1\": [1, -2], \"col2\": [-3, 4]},\n",
794 | " index=[\"row1\", \"row2\"])\n",
795 | "df.index.name = \"ix\"\n",
796 | "df"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": null,
802 | "metadata": {},
803 | "outputs": [],
804 | "source": [
805 | "from openpyxl.styles import PatternFill"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": null,
811 | "metadata": {},
812 | "outputs": [],
813 | "source": [
814 | "with pd.ExcelWriter(\"formatting_openpyxl.xlsx\",\n",
815 | " engine=\"openpyxl\") as writer:\n",
816 | " # Write out the df with the default formatting to A1\n",
817 | " df.to_excel(writer, startrow=0, startcol=0)\n",
818 | "\n",
819 | " # Write out the df with custom index/header formatting to A6\n",
820 | " startrow, startcol = 0, 5\n",
821 | " # 1. Write out the data part of the DataFrame\n",
822 | " df.to_excel(writer, header=False, index=False,\n",
823 | " startrow=startrow + 1, startcol=startcol + 1)\n",
824 | " # Get the sheet object and create a style object\n",
825 | " sheet = writer.sheets[\"Sheet1\"]\n",
826 | " style = PatternFill(fgColor=\"D9D9D9\", fill_type=\"solid\")\n",
827 | "\n",
828 | " # 2. Write out the styled column headers\n",
829 | " for i, col in enumerate(df.columns):\n",
830 | " sheet.cell(row=startrow + 1, column=i + startcol + 2,\n",
831 | " value=col).fill = style\n",
832 | "\n",
833 | " # 3. Write out the styled index\n",
834 | " index = [df.index.name if df.index.name else None] + list(df.index)\n",
835 | " for i, row in enumerate(index):\n",
836 | " sheet.cell(row=i + startrow + 1, column=startcol + 1,\n",
837 | " value=row).fill = style"
838 | ]
839 | },
840 | {
841 | "cell_type": "code",
842 | "execution_count": null,
843 | "metadata": {},
844 | "outputs": [],
845 | "source": [
846 | "# Formatting index/headers with XlsxWriter\n",
847 | "with pd.ExcelWriter(\"formatting_xlsxwriter.xlsx\",\n",
848 | " engine=\"xlsxwriter\") as writer:\n",
849 | " # Write out the df with the default formatting to A1\n",
850 | " df.to_excel(writer, startrow=0, startcol=0)\n",
851 | "\n",
852 | " # Write out the df with custom index/header formatting to A6\n",
853 | " startrow, startcol = 0, 5\n",
854 | " # 1. Write out the data part of the DataFrame\n",
855 | " df.to_excel(writer, header=False, index=False,\n",
856 | " startrow=startrow + 1, startcol=startcol + 1)\n",
857 | " # Get the book and sheet object and create a style object\n",
858 | " book = writer.book\n",
859 | " sheet = writer.sheets[\"Sheet1\"]\n",
860 | " style = book.add_format({\"bg_color\": \"#D9D9D9\"})\n",
861 | "\n",
862 | " # 2. Write out the styled column headers\n",
863 | " for i, col in enumerate(df.columns):\n",
864 | " sheet.write(startrow, startcol + i + 1, col, style)\n",
865 | "\n",
866 | " # 3. Write out the styled index\n",
867 | " index = [df.index.name if df.index.name else None] + list(df.index)\n",
868 | " for i, row in enumerate(index):\n",
869 | " sheet.write(startrow + i, startcol, row, style)"
870 | ]
871 | },
872 | {
873 | "cell_type": "code",
874 | "execution_count": null,
875 | "metadata": {},
876 | "outputs": [],
877 | "source": [
878 | "from openpyxl.styles import Alignment"
879 | ]
880 | },
881 | {
882 | "cell_type": "code",
883 | "execution_count": null,
884 | "metadata": {},
885 | "outputs": [],
886 | "source": [
887 | "with pd.ExcelWriter(\"data_format_openpyxl.xlsx\",\n",
888 | " engine=\"openpyxl\") as writer:\n",
889 | " # Write out the DataFrame\n",
890 | " df.to_excel(writer)\n",
891 | " \n",
892 | " # Get the book and sheet objects\n",
893 | " book = writer.book\n",
894 | " sheet = writer.sheets[\"Sheet1\"]\n",
895 | " \n",
896 | " # Formatting individual cells\n",
897 | " nrows, ncols = df.shape\n",
898 | " for row in range(nrows):\n",
899 | " for col in range(ncols):\n",
900 | " # +1 to account for the header/index\n",
901 | " # +1 since OpenPyXL is 1-based\n",
902 | " cell = sheet.cell(row=row + 2,\n",
903 | " column=col + 2)\n",
904 | " cell.number_format = \"0.000\"\n",
905 | " cell.alignment = Alignment(horizontal=\"center\")"
906 | ]
907 | },
908 | {
909 | "cell_type": "code",
910 | "execution_count": null,
911 | "metadata": {},
912 | "outputs": [],
913 | "source": [
914 | "with pd.ExcelWriter(\"data_format_xlsxwriter.xlsx\",\n",
915 | " engine=\"xlsxwriter\") as writer:\n",
916 | " # Write out the DataFrame\n",
917 | " df.to_excel(writer)\n",
918 | "\n",
919 | " # Get the book and sheet objects\n",
920 | " book = writer.book\n",
921 | " sheet = writer.sheets[\"Sheet1\"]\n",
922 | " \n",
923 | " # Formatting the columns (individual cells can't be formatted)\n",
924 | " number_format = book.add_format({\"num_format\": \"0.000\",\n",
925 | " \"align\": \"center\"})\n",
926 | " sheet.set_column(first_col=1, last_col=2,\n",
927 | " cell_format=number_format)"
928 | ]
929 | },
930 | {
931 | "cell_type": "code",
932 | "execution_count": null,
933 | "metadata": {},
934 | "outputs": [],
935 | "source": [
936 | "df.style.applymap(lambda x: \"number-format: 0.000;\"\n",
937 | " \"text-align: center\")\\\n",
938 | " .to_excel(\"styled.xlsx\")"
939 | ]
940 | },
941 | {
942 | "cell_type": "code",
943 | "execution_count": null,
944 | "metadata": {},
945 | "outputs": [],
946 | "source": [
947 | "df = pd.DataFrame({\"Date\": [dt.date(2020, 1, 1)],\n",
948 | " \"Datetime\": [dt.datetime(2020, 1, 1, 10)]})\n",
949 | "with pd.ExcelWriter(\"date.xlsx\",\n",
950 | " date_format=\"yyyy-mm-dd\",\n",
951 | " datetime_format=\"yyyy-mm-dd hh:mm:ss\") as writer:\n",
952 | " df.to_excel(writer)"
953 | ]
954 | }
955 | ],
956 | "metadata": {
957 | "kernelspec": {
958 | "display_name": "Python 3",
959 | "language": "python",
960 | "name": "python3"
961 | },
962 | "language_info": {
963 | "codemirror_mode": {
964 | "name": "ipython",
965 | "version": 3
966 | },
967 | "file_extension": ".py",
968 | "mimetype": "text/x-python",
969 | "name": "python",
970 | "nbconvert_exporter": "python",
971 | "pygments_lexer": "ipython3",
972 | "version": "3.7.4"
973 | }
974 | },
975 | "nbformat": 4,
976 | "nbformat_minor": 4
977 | }
978 |
--------------------------------------------------------------------------------
/ch09.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Excel Automation"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Getting Started with xlwings"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Using Excel as Data Viewer"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "# First, let's import the packages that we'll use in this chapter\n",
31 | "import datetime as dt\n",
32 | "import xlwings as xw\n",
33 | "import pandas as pd\n",
34 | "import numpy as np"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "# Let's create a DataFrame based on pseudorandom numbers and\n",
44 | "# with enough rows that only the head and tail are shown\n",
45 | "df = pd.DataFrame(data=np.random.randn(100, 5),\n",
46 | " columns=[f\"Trial {i}\" for i in range(1, 6)])\n",
47 | "df"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "# View the DataFrame in Excel\n",
57 | "xw.view(df)"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "## The Excel Object Model"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "# Create a new empty workbook and print its name. This is the\n",
74 | "# book we will use to run most of the code samples in this chapter.\n",
75 | "book = xw.Book()\n",
76 | "book.name"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "# Accessing the sheets collection\n",
86 | "book.sheets"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "# Get a sheet object by index or name. You will need to adjust\n",
96 | "# \"Sheet1\" if your sheet is called differently.\n",
97 | "sheet1 = book.sheets[0]\n",
98 | "sheet1 = book.sheets[\"Sheet1\"]"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {},
105 | "outputs": [],
106 | "source": [
107 | "sheet1.range(\"A1\")"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "# Most common tasks: write values...\n",
117 | "sheet1.range(\"A1\").value = [[1, 2],\n",
118 | " [3, 4]]\n",
119 | "sheet1.range(\"A4\").value = \"Hello!\""
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "# ...and read values\n",
129 | "sheet1.range(\"A1:B2\").value"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "sheet1.range(\"A4\").value"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "# Indexing\n",
148 | "sheet1.range(\"A1:B2\")[0, 0]"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {},
155 | "outputs": [],
156 | "source": [
157 | "# Slicing\n",
158 | "sheet1.range(\"A1:B2\")[:, 1]"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "# Single cell: A1 notation\n",
168 | "sheet1[\"A1\"]"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": null,
174 | "metadata": {},
175 | "outputs": [],
176 | "source": [
177 | "# Multiple cells: A1 notation\n",
178 | "sheet1[\"A1:B2\"]"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": null,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "# Single cell: indexing\n",
188 | "sheet1[0, 0]"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "# Multiple cells: slicing\n",
198 | "sheet1[:2, :2]"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "# D10 via sheet indexing\n",
208 | "sheet1[9, 3]"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {},
215 | "outputs": [],
216 | "source": [
217 | "# D10 via range object\n",
218 | "sheet1.range((10, 4))"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {},
225 | "outputs": [],
226 | "source": [
227 | "# D10:F11 via sheet slicing\n",
228 | "sheet1[9:11, 3:6]"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "# D10:F11 via range object\n",
238 | "sheet1.range((10, 4), (11, 6))"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "metadata": {},
245 | "outputs": [],
246 | "source": [
247 | "sheet1[\"A1\"].sheet.book.app"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": null,
253 | "metadata": {},
254 | "outputs": [],
255 | "source": [
256 | "# Get one app object from the open workbook\n",
257 | "# and create an additional invisible app instance\n",
258 | "visible_app = sheet1.book.app\n",
259 | "invisible_app = xw.App(visible=False)"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {},
266 | "outputs": [],
267 | "source": [
268 | "# List the book names that are open in each instance\n",
269 | "# by using a list comprehension\n",
270 | "[book.name for book in visible_app.books]"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": null,
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "[book.name for book in invisible_app.books]"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {},
286 | "outputs": [],
287 | "source": [
288 | "# An app key represents the process ID (PID)\n",
289 | "xw.apps.keys()"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": null,
295 | "metadata": {},
296 | "outputs": [],
297 | "source": [
298 | "# It can also be accessed via the pid attribute\n",
299 | "xw.apps.active.pid"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": null,
305 | "metadata": {},
306 | "outputs": [],
307 | "source": [
308 | "# Work with the book in the invisible Excel instance\n",
309 | "invisible_book = invisible_app.books[0]\n",
310 | "invisible_book.sheets[0][\"A1\"].value = \"Created by an invisible app.\""
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": null,
316 | "metadata": {},
317 | "outputs": [],
318 | "source": [
319 | "# Save the Excel workbook in the xl directory\n",
320 | "invisible_book.save(\"xl/invisible.xlsx\")"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": null,
326 | "metadata": {},
327 | "outputs": [],
328 | "source": [
329 | "# Quit the invisible Excel instance\n",
330 | "invisible_app.quit()"
331 | ]
332 | },
333 | {
334 | "cell_type": "markdown",
335 | "metadata": {},
336 | "source": [
337 | "## Running VBA Code"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "metadata": {},
344 | "outputs": [],
345 | "source": [
346 | "vba_book = xw.Book(\"xl/vba.xlsm\")"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": null,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": [
355 | "# Instantiate a macro object with the VBA function\n",
356 | "mysum = vba_book.macro(\"Module1.MySum\")\n",
357 | "# Call a VBA function\n",
358 | "mysum(5, 4)"
359 | ]
360 | },
361 | {
362 | "cell_type": "code",
363 | "execution_count": null,
364 | "metadata": {},
365 | "outputs": [],
366 | "source": [
367 | "# It works the same with a VBA Sub procedure\n",
368 | "show_msgbox = vba_book.macro(\"Module1.ShowMsgBox\")\n",
369 | "show_msgbox(\"Hello xlwings!\")"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": null,
375 | "metadata": {},
376 | "outputs": [],
377 | "source": [
378 | "# Close the book again (make sure to close the MessageBox first)\n",
379 | "vba_book.close()"
380 | ]
381 | },
382 | {
383 | "cell_type": "markdown",
384 | "metadata": {},
385 | "source": [
386 | "# Converters, Options and Collections"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {},
392 | "source": [
393 | "## Working with DataFrames"
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": null,
399 | "metadata": {},
400 | "outputs": [],
401 | "source": [
402 | "data = [[\"Mark\", 55, \"Italy\", 4.5, \"Europe\"],\n",
403 | " [\"John\", 33, \"USA\", 6.7, \"America\"]]\n",
404 | "df = pd.DataFrame(data=data,\n",
405 | " columns=[\"name\", \"age\", \"country\",\n",
406 | " \"score\", \"continent\"],\n",
407 | " index=[1001, 1000])\n",
408 | "df.index.name = \"user_id\"\n",
409 | "df"
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "execution_count": null,
415 | "metadata": {},
416 | "outputs": [],
417 | "source": [
418 | "sheet1[\"A6\"].value = df"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": null,
424 | "metadata": {},
425 | "outputs": [],
426 | "source": [
427 | "sheet1[\"B10\"].options(header=False, index=False).value = df"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "execution_count": null,
433 | "metadata": {},
434 | "outputs": [],
435 | "source": [
436 | "df2 = sheet1[\"A6\"].expand().options(pd.DataFrame).value\n",
437 | "df2"
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": null,
443 | "metadata": {},
444 | "outputs": [],
445 | "source": [
446 | "# If you want the index to be an integer index,\n",
447 | "# you can change its data type\n",
448 | "df2.index = df2.index.astype(int)\n",
449 | "df2"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": null,
455 | "metadata": {},
456 | "outputs": [],
457 | "source": [
458 | "# By setting index=False, it will put all the values from Excel into\n",
459 | "# the data part of the DataFrame and will use the default index\n",
460 | "sheet1[\"A6\"].expand().options(pd.DataFrame, index=False).value"
461 | ]
462 | },
463 | {
464 | "cell_type": "markdown",
465 | "metadata": {},
466 | "source": [
467 | "## Converters and Options"
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": null,
473 | "metadata": {},
474 | "outputs": [],
475 | "source": [
476 | "# Horizontal range (one-dimensional)\n",
477 | "sheet1[\"A1:B1\"].value"
478 | ]
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": null,
483 | "metadata": {},
484 | "outputs": [],
485 | "source": [
486 | "# Vertical range (one-dimensional)\n",
487 | "sheet1[\"A1:A2\"].value"
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "execution_count": null,
493 | "metadata": {},
494 | "outputs": [],
495 | "source": [
496 | "# Horizontal range (two-dimensional)\n",
497 | "sheet1[\"A1:B1\"].options(ndim=2).value"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": null,
503 | "metadata": {},
504 | "outputs": [],
505 | "source": [
506 | "# Vertical range (two-dimensional)\n",
507 | "sheet1[\"A1:A2\"].options(ndim=2).value"
508 | ]
509 | },
510 | {
511 | "cell_type": "code",
512 | "execution_count": null,
513 | "metadata": {},
514 | "outputs": [],
515 | "source": [
516 | "# Using the NumPy array converter behaves the same:\n",
517 | "# vertical range leads to a one-dimensional array\n",
518 | "sheet1[\"A1:A2\"].options(np.array).value"
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": null,
524 | "metadata": {},
525 | "outputs": [],
526 | "source": [
527 | "# Preserving the column orientation\n",
528 | "sheet1[\"A1:A2\"].options(np.array, ndim=2).value"
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "execution_count": null,
534 | "metadata": {},
535 | "outputs": [],
536 | "source": [
537 | "# If you need to write out a list vertically,\n",
538 | "# the \"transpose\" option comes in handy\n",
539 | "sheet1[\"D1\"].options(transpose=True).value = [100, 200]"
540 | ]
541 | },
542 | {
543 | "cell_type": "code",
544 | "execution_count": null,
545 | "metadata": {},
546 | "outputs": [],
547 | "source": [
548 | "# Write out some sample data\n",
549 | "sheet1[\"A13\"].value = [dt.datetime(2020, 1, 1), None, 1.0]"
550 | ]
551 | },
552 | {
553 | "cell_type": "code",
554 | "execution_count": null,
555 | "metadata": {},
556 | "outputs": [],
557 | "source": [
558 | "# Read it back using the default options\n",
559 | "sheet1[\"A13:C13\"].value"
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": null,
565 | "metadata": {},
566 | "outputs": [],
567 | "source": [
568 | "# Read it back using non-default options\n",
569 | "sheet1[\"A13:C13\"].options(empty=\"NA\",\n",
570 | " dates=dt.date,\n",
571 | " numbers=int).value"
572 | ]
573 | },
574 | {
575 | "cell_type": "markdown",
576 | "metadata": {},
577 | "source": [
578 | "## Charts, Pictures and Defined Names"
579 | ]
580 | },
581 | {
582 | "cell_type": "code",
583 | "execution_count": null,
584 | "metadata": {},
585 | "outputs": [],
586 | "source": [
587 | "sheet1[\"A15\"].value = [[None, \"North\", \"South\"],\n",
588 | " [\"Last Year\", 2, 5],\n",
589 | " [\"This Year\", 3, 6]]"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": null,
595 | "metadata": {},
596 | "outputs": [],
597 | "source": [
598 | "chart = sheet1.charts.add(top=sheet1[\"A19\"].top,\n",
599 | " left=sheet1[\"A19\"].left)\n",
600 | "chart.chart_type = \"column_clustered\"\n",
601 | "chart.set_source_data(sheet1[\"A15\"].expand())"
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "execution_count": null,
607 | "metadata": {},
608 | "outputs": [],
609 | "source": [
610 | "# Read in the chart data as DataFrame\n",
611 | "df = sheet1[\"A15\"].expand().options(pd.DataFrame).value\n",
612 | "df"
613 | ]
614 | },
615 | {
616 | "cell_type": "code",
617 | "execution_count": null,
618 | "metadata": {},
619 | "outputs": [],
620 | "source": [
621 | "# Enable Matplotlib by using the notebook magic command\n",
622 | "# and switch to the \"seaborn\" style\n",
623 | "%matplotlib inline\n",
624 | "import matplotlib.pyplot as plt\n",
625 | "plt.style.use(\"seaborn\")"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": null,
631 | "metadata": {},
632 | "outputs": [],
633 | "source": [
634 | "# The pandas plot method returns an \"axis\" object from\n",
635 | "# where you can get the figure. \"T\" transposes the\n",
636 | "# DataFrame to bring the plot into the desired orientation\n",
637 | "ax = df.T.plot.bar()\n",
638 | "fig = ax.get_figure()"
639 | ]
640 | },
641 | {
642 | "cell_type": "code",
643 | "execution_count": null,
644 | "metadata": {},
645 | "outputs": [],
646 | "source": [
647 | "# Send the plot to Excel\n",
648 | "plot = sheet1.pictures.add(fig, name=\"SalesPlot\",\n",
649 | " top=sheet1[\"H19\"].top,\n",
650 | " left=sheet1[\"H19\"].left)\n",
651 | "# Let's scale the plot to 70%\n",
652 | "plot.width, plot.height = plot.width * 0.7, plot.height * 0.7"
653 | ]
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": null,
658 | "metadata": {},
659 | "outputs": [],
660 | "source": [
661 | "ax = (df + 1).T.plot.bar()\n",
662 | "plot = plot.update(ax.get_figure())"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": null,
668 | "metadata": {},
669 | "outputs": [],
670 | "source": [
671 | "# The book scope is the default scope\n",
672 | "sheet1[\"A1:B2\"].name = \"matrix1\""
673 | ]
674 | },
675 | {
676 | "cell_type": "code",
677 | "execution_count": null,
678 | "metadata": {},
679 | "outputs": [],
680 | "source": [
681 | "# For the sheet scope, prepend the sheet name with\n",
682 | "# an exclamation point\n",
683 | "sheet1[\"B10:E11\"].name = \"Sheet1!matrix2\""
684 | ]
685 | },
686 | {
687 | "cell_type": "code",
688 | "execution_count": null,
689 | "metadata": {},
690 | "outputs": [],
691 | "source": [
692 | "# Now you can access the range by name\n",
693 | "sheet1[\"matrix1\"]"
694 | ]
695 | },
696 | {
697 | "cell_type": "code",
698 | "execution_count": null,
699 | "metadata": {},
700 | "outputs": [],
701 | "source": [
702 | "# If you access the names collection via the \"sheet1\" object,\n",
703 | "# it contains only names with that sheet's scope\n",
704 | "sheet1.names"
705 | ]
706 | },
707 | {
708 | "cell_type": "code",
709 | "execution_count": null,
710 | "metadata": {},
711 | "outputs": [],
712 | "source": [
713 | "# If you access the names collection via the \"book\" object,\n",
714 | "# it contains all names, including book and sheet scope\n",
715 | "book.names"
716 | ]
717 | },
718 | {
719 | "cell_type": "code",
720 | "execution_count": null,
721 | "metadata": {},
722 | "outputs": [],
723 | "source": [
724 | "# Names have various methods and attributes.\n",
725 | "# You can, for example, get the respective range object.\n",
726 | "book.names[\"matrix1\"].refers_to_range"
727 | ]
728 | },
729 | {
730 | "cell_type": "code",
731 | "execution_count": null,
732 | "metadata": {},
733 | "outputs": [],
734 | "source": [
735 | "# If you want to assign a name to a constant\n",
736 | "# or a formula, use the \"add\" method.\n",
737 | "# You may need to replace the decimal point with a comma\n",
738 | "# if your are using an international version of Excel.\n",
739 | "book.names.add(\"EURUSD\", \"=1.1151\")"
740 | ]
741 | },
742 | {
743 | "cell_type": "markdown",
744 | "metadata": {},
745 | "source": [
746 | "# Advanced Topics"
747 | ]
748 | },
749 | {
750 | "cell_type": "markdown",
751 | "metadata": {},
752 | "source": [
753 | "## Performance"
754 | ]
755 | },
756 | {
757 | "cell_type": "code",
758 | "execution_count": null,
759 | "metadata": {},
760 | "outputs": [],
761 | "source": [
762 | "# Add a new sheet and write 150 values\n",
763 | "# to it to have something to work with\n",
764 | "sheet2 = book.sheets.add()\n",
765 | "sheet2[\"A1\"].value = np.arange(150).reshape(30, 5)"
766 | ]
767 | },
768 | {
769 | "cell_type": "code",
770 | "execution_count": null,
771 | "metadata": {},
772 | "outputs": [],
773 | "source": [
774 | "%%time\n",
775 | "# This makes 150 cross-application calls\n",
776 | "for cell in sheet2[\"A1:E30\"]:\n",
777 | " cell.value += 1"
778 | ]
779 | },
780 | {
781 | "cell_type": "code",
782 | "execution_count": null,
783 | "metadata": {},
784 | "outputs": [],
785 | "source": [
786 | "%%time\n",
787 | "# This makes just two cross-application calls\n",
788 | "values = sheet2[\"A1:E30\"].options(np.array).value\n",
789 | "sheet2[\"A1:E30\"].value = values + 1"
790 | ]
791 | },
792 | {
793 | "cell_type": "code",
794 | "execution_count": null,
795 | "metadata": {},
796 | "outputs": [],
797 | "source": [
798 | "# With raw values, you must provide the full\n",
799 | "# target range, sheet[\"A35\"] doesn't work anymore\n",
800 | "sheet1[\"A35:B36\"].options(\"raw\").value = [[1, 2], [3, 4]]"
801 | ]
802 | }
803 | ],
804 | "metadata": {
805 | "kernelspec": {
806 | "display_name": "Python 3",
807 | "language": "python",
808 | "name": "python3"
809 | },
810 | "language_info": {
811 | "codemirror_mode": {
812 | "name": "ipython",
813 | "version": 3
814 | },
815 | "file_extension": ".py",
816 | "mimetype": "text/x-python",
817 | "name": "python",
818 | "nbconvert_exporter": "python",
819 | "pygments_lexer": "ipython3",
820 | "version": "3.7.4"
821 | }
822 | },
823 | "nbformat": 4,
824 | "nbformat_minor": 4
825 | }
826 |
--------------------------------------------------------------------------------
/ch11.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Case Study Preliminaries"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Web APIs"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import json"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {
30 | "pycharm": {
31 | "name": "#%%\n"
32 | }
33 | },
34 | "outputs": [],
35 | "source": [
36 | "# A Python dictionary...\n",
37 | "user_dict = {\"name\": \"Jane Doe\",\n",
38 | " \"age\": 23,\n",
39 | " \"married\": False,\n",
40 | " \"children\": None,\n",
41 | " \"hobbies\": [\"hiking\", \"reading\"]}"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {
48 | "pycharm": {
49 | "name": "#%%\n"
50 | }
51 | },
52 | "outputs": [],
53 | "source": [
54 | "# ...converted to a JSON string\n",
55 | "# by json.dumps (\"dump string\"). The \"indent\" parameter is\n",
56 | "# optional and prettifies the printing.\n",
57 | "user_json = json.dumps(user_dict, indent=4)\n",
58 | "print(user_json)"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "pycharm": {
66 | "name": "#%%\n"
67 | }
68 | },
69 | "outputs": [],
70 | "source": [
71 | "# Convert the JSON string back to a native Python data structure\n",
72 | "json.loads(user_json)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "import requests"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {
88 | "pycharm": {
89 | "name": "#%%\n"
90 | }
91 | },
92 | "outputs": [],
93 | "source": [
94 | "response = requests.get(\"https://pypi.org/pypi/pandas/json\")\n",
95 | "response.status_code"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {
102 | "pycharm": {
103 | "name": "#%%\n"
104 | }
105 | },
106 | "outputs": [],
107 | "source": [
108 | "# response.json()"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {
115 | "pycharm": {
116 | "name": "#%%\n"
117 | }
118 | },
119 | "outputs": [],
120 | "source": [
121 | "releases = []\n",
122 | "for version, files in response.json()['releases'].items():\n",
123 | " releases.append(f\"{version}: {files[0]['upload_time']}\")\n",
124 | "releases[:3] # show the first 3 elements of the list"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "## Databases"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "import urllib.parse"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {
147 | "pycharm": {
148 | "name": "#%%\n"
149 | }
150 | },
151 | "outputs": [],
152 | "source": [
153 | "urllib.parse.quote_plus(\"pa$$word\")"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": null,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "# Let's start with the imports\n",
163 | "import sqlite3\n",
164 | "from sqlalchemy import create_engine\n",
165 | "import pandas as pd"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {
172 | "pycharm": {
173 | "name": "#%%\n"
174 | }
175 | },
176 | "outputs": [],
177 | "source": [
178 | "# Our SQL query: \"select all columns from the packages table\"\n",
179 | "sql = \"SELECT * FROM packages\""
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "pycharm": {
187 | "name": "#%%\n"
188 | }
189 | },
190 | "outputs": [],
191 | "source": [
192 | "# Option 1: Database driver (sqlite3 is part of the standard library)\n",
193 | "# Using the connection as context manager automatically commits\n",
194 | "# the transaction or rolls it back in case of an error.\n",
195 | "with sqlite3.connect(\"packagetracker/packagetracker.db\") as con:\n",
196 | " cursor = con.cursor() # We need a cursor to run SQL queries\n",
197 | " result = cursor.execute(sql).fetchall() # Return all records\n",
198 | "result"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {
205 | "pycharm": {
206 | "name": "#%%\n"
207 | }
208 | },
209 | "outputs": [],
210 | "source": [
211 | "# Option 2: SQLAlchemy\n",
212 | "# \"create_engine\" expects the connection string of your database.\n",
213 | "# Here, we can execute a query as a method of the connection object.\n",
214 | "engine = create_engine(\"sqlite:///packagetracker/packagetracker.db\")\n",
215 | "with engine.connect() as con:\n",
216 | " result = con.execute(sql).fetchall()\n",
217 | "result"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": null,
223 | "metadata": {
224 | "pycharm": {
225 | "name": "#%%\n"
226 | }
227 | },
228 | "outputs": [],
229 | "source": [
230 | "# Option 3: pandas\n",
231 | "# Providing a table name to \"read_sql\" reads the full table.\n",
232 | "# Pandas requires an SQLAlchemy engine that we reuse from\n",
233 | "# the previous example.\n",
234 | "df = pd.read_sql(\"packages\", engine, index_col=\"package_id\")\n",
235 | "df"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {
242 | "pycharm": {
243 | "name": "#%%\n"
244 | }
245 | },
246 | "outputs": [],
247 | "source": [
248 | "# \"read_sql\" also accepts an SQL query\n",
249 | "pd.read_sql(sql, engine, index_col=\"package_id\")"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {
256 | "pycharm": {
257 | "name": "#%%\n"
258 | }
259 | },
260 | "outputs": [],
261 | "source": [
262 | "# The DataFrame method \"to_sql\" writes DataFrames to tables\n",
263 | "# \"if_exists\" has to be either \"fail\", \"append\" or \"replace\"\n",
264 | "# and defines what happens if the table already exists\n",
265 | "df.to_sql(\"packages2\", con=engine, if_exists=\"append\")"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": null,
271 | "metadata": {
272 | "pycharm": {
273 | "name": "#%%\n"
274 | }
275 | },
276 | "outputs": [],
277 | "source": [
278 | "# The previous command created a new table \"packages2\" and\n",
279 | "# inserted the records from the DataFrame df as we can\n",
280 | "# verify by reading it back\n",
281 | "pd.read_sql(\"packages2\", engine, index_col=\"package_id\")"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {
288 | "pycharm": {
289 | "name": "#%%\n"
290 | }
291 | },
292 | "outputs": [],
293 | "source": [
294 | "# Let's get rid of the table again by running the\n",
295 | "# \"drop table\" command via SQLAlchemy\n",
296 | "with engine.connect() as con:\n",
297 | " con.execute(\"DROP TABLE packages2\")"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": null,
303 | "metadata": {},
304 | "outputs": [],
305 | "source": [
306 | "# Let's start by importing SQLAlchemy's text function\n",
307 | "from sqlalchemy.sql import text"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": null,
313 | "metadata": {
314 | "pycharm": {
315 | "name": "#%%\n"
316 | }
317 | },
318 | "outputs": [],
319 | "source": [
320 | "# \":package_id\" is the placeholder\n",
321 | "sql = \"\"\"\n",
322 | "SELECT v.uploaded_at, v.version_string\n",
323 | "FROM packages p\n",
324 | "INNER JOIN package_versions v ON p.package_id = v.package_id\n",
325 | "WHERE p.package_id = :package_id\n",
326 | "ORDER BY v.uploaded_at\n",
327 | "\"\"\""
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": null,
333 | "metadata": {
334 | "pycharm": {
335 | "name": "#%%\n"
336 | }
337 | },
338 | "outputs": [],
339 | "source": [
340 | "# Via SQLAlchemy\n",
341 | "with engine.connect() as con:\n",
342 | " result = con.execute(text(sql), package_id=1).fetchall()\n",
343 | "result[:3] # Print the first 3 records"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": null,
349 | "metadata": {
350 | "pycharm": {
351 | "name": "#%%\n"
352 | }
353 | },
354 | "outputs": [],
355 | "source": [
356 | "# Via pandas\n",
357 | "pd.read_sql(text(sql), engine, parse_dates=[\"uploaded_at\"],\n",
358 | " params={\"package_id\": 1},\n",
359 | " index_col=[\"uploaded_at\"]).head(3)"
360 | ]
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {},
365 | "source": [
366 | "## Exceptions"
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": null,
372 | "metadata": {
373 | "pycharm": {
374 | "name": "#%%\n"
375 | }
376 | },
377 | "outputs": [],
378 | "source": [
379 | "def print_reciprocal(number):\n",
380 | " result = 1 / number\n",
381 | " print(f\"The reciprocal is: {result}\")"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": null,
387 | "metadata": {
388 | "pycharm": {
389 | "name": "#%%\n"
390 | }
391 | },
392 | "outputs": [],
393 | "source": [
394 | "print_reciprocal(0) # This will raise an error"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": null,
400 | "metadata": {
401 | "pycharm": {
402 | "name": "#%%\n"
403 | }
404 | },
405 | "outputs": [],
406 | "source": [
407 | "def print_reciprocal(number):\n",
408 | " try:\n",
409 | " result = 1 / number\n",
410 | " except Exception as e:\n",
411 | " # \"as e\" makes the Exception object available as variable \"e\"\n",
412 | " # \"repr\" stands for \"printable representation\" of an object\n",
413 | " # and gives you back a string with the error message\n",
414 | " print(f\"There was an error: {repr(e)}\")\n",
415 | " result = \"N/A\"\n",
416 | " else:\n",
417 | " print(\"There was no error!\")\n",
418 | " finally:\n",
419 | " print(f\"The reciprocal is: {result}\")"
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": null,
425 | "metadata": {
426 | "pycharm": {
427 | "name": "#%%\n"
428 | }
429 | },
430 | "outputs": [],
431 | "source": [
432 | "print_reciprocal(10)"
433 | ]
434 | },
435 | {
436 | "cell_type": "code",
437 | "execution_count": null,
438 | "metadata": {
439 | "pycharm": {
440 | "name": "#%%\n"
441 | }
442 | },
443 | "outputs": [],
444 | "source": [
445 | "print_reciprocal(\"a\")"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": null,
451 | "metadata": {
452 | "pycharm": {
453 | "name": "#%%\n"
454 | }
455 | },
456 | "outputs": [],
457 | "source": [
458 | "print_reciprocal(0)"
459 | ]
460 | },
461 | {
462 | "cell_type": "code",
463 | "execution_count": null,
464 | "metadata": {
465 | "pycharm": {
466 | "name": "#%%\n"
467 | }
468 | },
469 | "outputs": [],
470 | "source": [
471 | "def print_reciprocal(number):\n",
472 | " try:\n",
473 | " result = 1 / number\n",
474 | " print(f\"The reciprocal is: {result}\")\n",
475 | " except (TypeError, ZeroDivisionError):\n",
476 | " print(\"Please type in any number except 0.\")"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": null,
482 | "metadata": {
483 | "pycharm": {
484 | "name": "#%%\n"
485 | }
486 | },
487 | "outputs": [],
488 | "source": [
489 | "print_reciprocal(\"a\")"
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | "execution_count": null,
495 | "metadata": {
496 | "pycharm": {
497 | "name": "#%%\n"
498 | }
499 | },
500 | "outputs": [],
501 | "source": [
502 | "def print_reciprocal(number):\n",
503 | " try:\n",
504 | " result = 1 / number\n",
505 | " print(f\"The reciprocal is: {result}\")\n",
506 | " except TypeError:\n",
507 | " print(\"Please type in a number.\")\n",
508 | " except ZeroDivisionError:\n",
509 | " print(\"The reciprocal of 0 is not defined.\")"
510 | ]
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": null,
515 | "metadata": {},
516 | "outputs": [],
517 | "source": [
518 | "print_reciprocal(\"a\")"
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": null,
524 | "metadata": {},
525 | "outputs": [],
526 | "source": [
527 | "print_reciprocal(0)"
528 | ]
529 | }
530 | ],
531 | "metadata": {
532 | "kernelspec": {
533 | "display_name": "Python 3",
534 | "language": "python",
535 | "name": "python3"
536 | },
537 | "language_info": {
538 | "codemirror_mode": {
539 | "name": "ipython",
540 | "version": 3
541 | },
542 | "file_extension": ".py",
543 | "mimetype": "text/x-python",
544 | "name": "python",
545 | "nbconvert_exporter": "python",
546 | "pygments_lexer": "ipython3",
547 | "version": "3.7.4"
548 | }
549 | },
550 | "nbformat": 4,
551 | "nbformat_minor": 4
552 | }
553 |
--------------------------------------------------------------------------------
/ch12.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# User-Defined Functions (UDFs)"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Function Decorators"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "# This is the definition of the function decorator\n",
24 | "def verbose(func):\n",
25 | " def wrapper():\n",
26 | " print(\"Before calling the function.\")\n",
27 | " func()\n",
28 | " print(\"After calling the function.\")\n",
29 | " return wrapper"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": null,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "# Using a function decorator\n",
39 | "@verbose\n",
40 | "def print_hello():\n",
41 | " print(\"hello!\")"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "# Effect of calling the decorated function\n",
51 | "print_hello()"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "## Fetching Data from Google Trends"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "from pytrends.request import TrendReq"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "# First, let's instantiate a TrendRequest object\n",
77 | "trend = TrendReq()"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "# Now we can print the suggestions as they would appear\n",
87 | "# online in the dropdown of Google Trends after typing in \"Python\"\n",
88 | "trend.suggestions(\"Python\")"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "## Caching"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "import time"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "cache = {}\n",
114 | "\n",
115 | "def slow_sum(a, b):\n",
116 | " key = (a, b)\n",
117 | " if key in cache:\n",
118 | " return cache[key]\n",
119 | " else:\n",
120 | " time.sleep(2) # sleep for 2 seconds\n",
121 | " result = a + b\n",
122 | " cache[key] = result\n",
123 | " return result"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {},
130 | "outputs": [],
131 | "source": [
132 | "%%time\n",
133 | "slow_sum(1, 2)"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": [
142 | "%%time\n",
143 | "slow_sum(1, 2)"
144 | ]
145 | }
146 | ],
147 | "metadata": {
148 | "kernelspec": {
149 | "display_name": "Python 3",
150 | "language": "python",
151 | "name": "python3"
152 | },
153 | "language_info": {
154 | "codemirror_mode": {
155 | "name": "ipython",
156 | "version": 3
157 | },
158 | "file_extension": ".py",
159 | "mimetype": "text/x-python",
160 | "name": "python",
161 | "nbconvert_exporter": "python",
162 | "pygments_lexer": "ipython3",
163 | "version": "3.7.4"
164 | }
165 | },
166 | "nbformat": 4,
167 | "nbformat_minor": 4
168 | }
169 |
--------------------------------------------------------------------------------
/conda/xl310.yml:
--------------------------------------------------------------------------------
1 | name: xl310
2 | channels:
3 | - defaults
4 | dependencies:
5 | - python=3.10
6 | - pip=21.2.4
7 | - pip:
8 | - flake8==4.0.1
9 | - lxml==4.7.1
10 | - matplotlib==3.5.1
11 | - notebook==6.4.6
12 | - openpyxl==3.0.9
13 | - pandas==1.3.5
14 | - numpy==1.21.0
15 | - pillow==8.4.0
16 | - plotly==5.4.0
17 | - python-dateutil==2.8.2
18 | - requests==2.26.0
19 | - sqlalchemy==1.4.28
20 | - xlrd==2.0.1
21 | - xlsxwriter==3.0.2
22 | - xlutils==2.0.0
23 | - xlwings==0.25.3
24 | - xlwt==1.3.0
25 | - pytrends==4.7.3
26 | - pyxlsb==1.0.9
27 |
--------------------------------------------------------------------------------
/conda/xl38.yml:
--------------------------------------------------------------------------------
1 | name: xl38
2 | channels:
3 | - defaults
4 | dependencies:
5 | - flake8=3.8.4
6 | - lxml=4.6.1
7 | - matplotlib=3.3.2
8 | - notebook=6.1.4
9 | - openpyxl=3.0.5
10 | - pandas=1.1.3
11 | - numpy=1.19.2
12 | - pillow=8.0.1
13 | - pip=20.2.4
14 | - plotly=4.14.1
15 | - python=3.8.5
16 | - python-dateutil=2.8.1
17 | - requests=2.24.0
18 | - sqlalchemy=1.3.20
19 | - xlrd=1.2.0
20 | - xlsxwriter=1.3.7
21 | - xlutils=2.0.0
22 | - xlwings=0.20.8
23 | - xlwt=1.3.0
24 | - pip:
25 | - pytrends==4.7.3
26 | - pyxlsb==1.0.7
--------------------------------------------------------------------------------
/debugging.py:
--------------------------------------------------------------------------------
1 | a = 3
2 | b = 4
3 |
4 | c = a + b
5 |
6 | print(c)
7 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | # This is a copy of conda/xl38.yml (but without xlwings) for Binder
2 | name: xl38
3 | channels:
4 | - defaults
5 | dependencies:
6 | - flake8=3.8.4
7 | - lxml=4.6.1
8 | - matplotlib=3.3.2
9 | - notebook=6.1.4
10 | - openpyxl=3.0.5
11 | - pandas=1.1.3
12 | - numpy=1.19.2
13 | - pillow=8.0.1
14 | - pip=20.2.4
15 | - plotly=4.14.1
16 | - python=3.8.5
17 | - python-dateutil=2.8.1
18 | - requests=2.24.0
19 | - sqlalchemy=1.3.20
20 | - xlrd=1.2.0
21 | - xlsxwriter=1.3.7
22 | - xlutils=2.0.0
23 | - xlwt=1.3.0
24 | - pip:
25 | - pytrends==4.7.3
26 | - pyxlsb==1.0.7
27 |
--------------------------------------------------------------------------------
/excel.py:
--------------------------------------------------------------------------------
1 | """This module offers a read and write function to get
2 | 2-dimensional lists in and out of Excel files.
3 | """
4 | import re
5 | import itertools
6 | import datetime as dt
7 |
8 | # Optional dependencies
9 | try:
10 | import openpyxl
11 | except ImportError:
12 | openpyxl = None
13 | try:
14 | import pyxlsb
15 | except ImportError:
16 | pyxlsb = None
17 | try:
18 | import xlrd
19 | from xlrd.biffh import error_text_from_code
20 | except ImportError:
21 | xlrd = None
22 | try:
23 | import xlwt
24 | except ImportError:
25 | xlwt = None
26 | try:
27 | import xlsxwriter
28 | except ImportError:
29 | xlsxwriter = None
30 |
31 |
32 | def read(sheet, first_cell="A1", last_cell=None):
33 | """Read a 2-dimensional list from an Excel range.
34 |
35 | Parameters
36 | ----------
37 | sheet : object
38 | An xlrd, openpyxl or pyxlsb sheet object
39 | first_cell : str or tuple, optional
40 | Top-left corner of the Excel range you want to read.
41 | Can be a string like "A1" or a row/col tuple like (1, 1),
42 | default is "A1".
43 | last_cell : str or tuple, optional
44 | Bottom-right corner of the Excel range you want to read.
45 | Can be a string like "A1" or a row/col tuple like (1, 1),
46 | default is the bottom-right cell of the used range.
47 |
48 | Returns
49 | -------
50 | list
51 | A 2-dimensional list with the values of the Excel range
52 | """
53 | # xlrd
54 | if xlrd and isinstance(sheet, xlrd.sheet.Sheet):
55 | # isinstance returns True if sheet is of type xlrd.sheet.Sheet
56 | if last_cell is None:
57 | # actual range with data, not used range
58 | last_cell = (sheet.nrows, sheet.ncols)
59 | # Transform "A1" notation into tuples of 1-based indices
60 | if not isinstance(first_cell, tuple):
61 | first_cell = xl_cell_to_rowcol(first_cell)
62 | first_cell = (first_cell[0] + 1, first_cell[1] + 1)
63 | if not isinstance(last_cell, tuple):
64 | last_cell = xl_cell_to_rowcol(last_cell)
65 | last_cell = (last_cell[0] + 1, last_cell[1] + 1)
66 | values = []
67 | for r in range(first_cell[0] - 1, last_cell[0]):
68 | row = []
69 | for c in range(first_cell[1] - 1, last_cell[1]):
70 | # Handle the different cell types
71 | if sheet.cell(r, c).ctype == xlrd.XL_CELL_DATE:
72 | value = xlrd.xldate.xldate_as_datetime(
73 | sheet.cell(r, c).value, sheet.book.datemode)
74 | elif sheet.cell(r, c).ctype in [xlrd.XL_CELL_EMPTY,
75 | xlrd.XL_CELL_BLANK]:
76 | value = None
77 | elif sheet.cell(r, c).ctype == xlrd.XL_CELL_ERROR:
78 | value = error_text_from_code[sheet.cell(r, c).value]
79 | elif sheet.cell(r, c).ctype == xlrd.XL_CELL_BOOLEAN:
80 | value = bool(sheet.cell(r, c).value)
81 | else:
82 | value = sheet.cell(r, c).value
83 | row.append(value)
84 | values.append(row)
85 | return values
86 |
87 | # OpenPyXL
88 | elif openpyxl and isinstance(
89 | sheet,
90 | (openpyxl.worksheet.worksheet.Worksheet,
91 | openpyxl.worksheet._read_only.ReadOnlyWorksheet)):
92 | if last_cell is None:
93 | # used range
94 | last_cell = (sheet.max_row, sheet.max_column)
95 | if not isinstance(first_cell, tuple):
96 | first_cell = openpyxl.utils.cell.coordinate_to_tuple(first_cell)
97 | if not isinstance(last_cell, tuple):
98 | last_cell = openpyxl.utils.cell.coordinate_to_tuple(last_cell)
99 | data = []
100 | for row in sheet.iter_rows(min_row=first_cell[0], min_col=first_cell[1],
101 | max_row=last_cell[0], max_col=last_cell[1],
102 | values_only=True):
103 | data.append(list(row))
104 | return data
105 |
106 | # pyxlsb
107 | elif pyxlsb and isinstance(sheet, pyxlsb.worksheet.Worksheet):
108 | errors = {"0x0": "#NULL!", "0x7": "#DIV/0!", "0xf": "#VALUE!",
109 | "0x17": "#REF!", "0x1d": "#NAME?", "0x24": "#NUM!",
110 | "0x2a": "#N/A"}
111 | if not isinstance(first_cell, tuple):
112 | first_cell = xl_cell_to_rowcol(first_cell)
113 | first_cell = (first_cell[0] + 1, first_cell[1] + 1)
114 | if last_cell and not isinstance(last_cell, tuple):
115 | last_cell = xl_cell_to_rowcol(last_cell)
116 | last_cell = (last_cell[0] + 1, last_cell[1] + 1)
117 | data = []
118 | # sheet.rows() is a generator that requires islice to slice it
119 | for row in itertools.islice(sheet.rows(),
120 | first_cell[0] - 1,
121 | last_cell[0] if last_cell else None):
122 | data.append([errors.get(cell.v, cell.v) for cell in row]
123 | [first_cell[1] - 1 : last_cell[1] if last_cell else None])
124 | return data
125 | else:
126 | raise TypeError(f"Couldn't handle sheet of type {type(sheet)}")
127 |
128 |
129 | def write(sheet, values, first_cell="A1", date_format=None):
130 | """Write a 2-dimensional list to an Excel range.
131 |
132 | Parameters
133 | ----------
134 | sheet : object
135 | An openpyxl, xlsxwriter or xlwt sheet object. openpyxl's
136 | write_only=True mode is not supported.
137 | values : list
138 | A 2-dimensional list of values
139 | first_cell : str or tuple, optional
140 | Top-left corner of the Excel range where you want to write out
141 | the DataFrame. Can be a string like "A1" or a row/col tuple
142 | like (1, 1), default is "A1".
143 | date_format : str, optional
144 | Only accepted if sheet is an openpyxl or xlwt sheet. By default,
145 | formats dates in the following format: "mm/dd/yy". For xlsxwriter,
146 | set the format when you instantiate a Workbook by providing:
147 | options={"default_date_format": "mm/dd/yy"}
148 | """
149 | # OpenPyXL
150 | if openpyxl and isinstance(
151 | sheet, openpyxl.worksheet.worksheet.Worksheet):
152 | if date_format is None:
153 | date_format = "mm/dd/yy"
154 | if not isinstance(first_cell, tuple):
155 | first_cell = openpyxl.utils.coordinate_to_tuple(first_cell)
156 | for i, row in enumerate(values):
157 | for j, value in enumerate(row):
158 | cell = sheet.cell(row=first_cell[0] + i,
159 | column=first_cell[1] + j)
160 | cell.value = value
161 | if date_format and isinstance(value, (dt.datetime, dt.date)):
162 | cell.number_format = date_format
163 |
164 | # XlsxWriter
165 | elif xlsxwriter and isinstance(sheet, xlsxwriter.worksheet.Worksheet):
166 | if date_format is not None:
167 | raise ValueError("date_format must be set as Workbook option")
168 | if isinstance(first_cell, tuple):
169 | first_cell = first_cell[0] - 1, first_cell[1] - 1
170 | else:
171 | first_cell = xl_cell_to_rowcol(first_cell)
172 | for r, row_data in enumerate(values):
173 | sheet.write_row(first_cell[0] + r, first_cell[1], row_data)
174 |
175 | # xlwt
176 | elif xlwt and isinstance(sheet, xlwt.Worksheet):
177 | if date_format is None:
178 | date_format = "mm/dd/yy"
179 | date_format = xlwt.easyxf(num_format_str=date_format)
180 | if isinstance(first_cell, tuple):
181 | first_cell = (first_cell[0] - 1, first_cell[1] - 1)
182 | else:
183 | first_cell = xl_cell_to_rowcol(first_cell)
184 | for i, row in enumerate(values):
185 | for j, cell in enumerate(row):
186 | if isinstance(cell, (dt.datetime, dt.date)):
187 | sheet.write(i + first_cell[0], j + first_cell[1],
188 | cell, date_format)
189 | else:
190 | sheet.write(i + first_cell[0], j + first_cell[1],
191 | cell)
192 | else:
193 | raise TypeError(f"Couldn't handle sheet of type {type(sheet)}")
194 |
195 |
196 | def xl_cell_to_rowcol(cell_str):
197 | """
198 | Convert a cell reference in A1 notation to a zero indexed row and column.
199 |
200 | Args:
201 | cell_str: A1 style string.
202 |
203 | Returns:
204 | row, col: Zero indexed cell row and column indices.
205 |
206 | This function is from XlsxWriter
207 | Copyright (c) 2013-2020, John McNamara
208 | All rights reserved.
209 |
210 | Redistribution and use in source and binary forms, with or without
211 | modification, are permitted provided that the following conditions are met:
212 |
213 | 1. Redistributions of source code must retain the above copyright notice, this
214 | list of conditions and the following disclaimer.
215 | 2. Redistributions in binary form must reproduce the above copyright notice,
216 | this list of conditions and the following disclaimer in the documentation
217 | and/or other materials provided with the distribution.
218 |
219 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
220 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
221 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
222 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
223 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
224 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
225 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
226 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
227 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
228 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
229 |
230 | The views and conclusions contained in the software and documentation are those
231 | of the authors and should not be interpreted as representing official policies,
232 | either expressed or implied, of the FreeBSD Project.
233 |
234 | """
235 | if not cell_str:
236 | return 0, 0
237 |
238 | match = re.compile(r"(\$?)([A-Z]{1,3})(\$?)(\d+)").match(cell_str)
239 | col_str = match.group(2)
240 | row_str = match.group(4)
241 |
242 | # Convert base26 column string to number.
243 | expn = 0
244 | col = 0
245 | for char in reversed(col_str):
246 | col += (ord(char) - ord("A") + 1) * (26 ** expn)
247 | expn += 1
248 |
249 | # Convert 1-index to zero-index
250 | row = int(row_str) - 1
251 | col -= 1
252 |
253 | return row, col
254 |
--------------------------------------------------------------------------------
/images/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/images/cover.png
--------------------------------------------------------------------------------
/images/python.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/images/python.bmp
--------------------------------------------------------------------------------
/images/python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/images/python.png
--------------------------------------------------------------------------------
/packagetracker/database.py:
--------------------------------------------------------------------------------
1 | """This module handles all database interactions"""
2 |
3 | from pathlib import Path
4 | from sqlite3 import Connection as SQLite3Connection
5 |
6 | import sqlalchemy
7 | from sqlalchemy import event
8 | from sqlalchemy.sql import text
9 | from sqlalchemy.engine import Engine
10 | import pandas as pd
11 |
12 |
13 | # Have SQLAlchemy enforce foreign keys with SQLite, see:
14 | # https://docs.sqlalchemy.org/en/latest/dialects/sqlite.html#foreign-key-support
15 | @event.listens_for(Engine, "connect")
16 | def set_sqlite_pragma(dbapi_connection, connection_record):
17 | if isinstance(dbapi_connection, SQLite3Connection):
18 | cursor = dbapi_connection.cursor()
19 | cursor.execute("PRAGMA foreign_keys=ON")
20 | cursor.close()
21 |
22 |
23 | # We want the database file to sit next to this file.
24 | # Here, we are turning the path into an absolute path.
25 | this_dir = Path(__file__).resolve().parent
26 | db_path = this_dir / "packagetracker.db"
27 |
28 | # Database engine
29 | engine = sqlalchemy.create_engine(f"sqlite:///{db_path}")
30 |
31 |
32 | def get_packages():
33 | """Get all packages as DataFrame"""
34 |
35 | return pd.read_sql_table("packages", con=engine, index_col="package_id")
36 |
37 |
38 | def store_package(package_name):
39 | """Insert a new package_name into the packages table"""
40 |
41 | try:
42 | with engine.connect() as con:
43 | con.execute(text("INSERT INTO packages (package_name) VALUES (:package_name)"),
44 | package_name=package_name)
45 | return None
46 | except sqlalchemy.exc.IntegrityError:
47 | return f"{package_name} already exists"
48 | except Exception as e:
49 | return repr(e)
50 |
51 |
52 | def get_versions(package_name):
53 | """Get all versions for the package with the name package_name"""
54 |
55 | sql = """
56 | SELECT v.uploaded_at, v.version_string
57 | FROM packages p
58 | INNER JOIN package_versions v ON p.package_id = v.package_id
59 | WHERE p.package_name = :package_name
60 | """
61 | return pd.read_sql_query(text(sql), engine, parse_dates=["uploaded_at"],
62 | params={"package_name": package_name},
63 | index_col=["uploaded_at"])
64 |
65 |
66 | def store_versions(df):
67 | """Insert the records of the provided DataFrame df into the package_versions table"""
68 |
69 | df.to_sql("package_versions", con=engine, if_exists="append", index=False)
70 |
71 |
72 | def delete_versions():
73 | """Delete all records from the version table"""
74 |
75 | with engine.connect() as con:
76 | con.execute("DELETE FROM package_versions")
77 |
78 |
79 | def create_db():
80 | """Run this function to create the database tables.
81 | In case of sqlite, this is also creating the database file.
82 | """
83 |
84 | sql_table_packages = """
85 | CREATE TABLE packages (
86 | package_id INTEGER PRIMARY KEY,
87 | package_name TEXT NOT NULL,
88 | UNIQUE(package_name)
89 | )
90 | """
91 |
92 | sql_table_versions = """
93 | CREATE TABLE package_versions (
94 | package_id INTEGER,
95 | version_string TEXT,
96 | uploaded_at TIMESTAMP NOT NULL,
97 | PRIMARY KEY (package_id, version_string),
98 | FOREIGN KEY (package_id) REFERENCES packages (package_id)
99 | )
100 | """
101 |
102 | sql_statements = [sql_table_packages, sql_table_versions]
103 | with engine.connect() as con:
104 | for sql in sql_statements:
105 | con.execute(sql)
106 |
107 |
108 | if __name__ == "__main__":
109 | # Run this as a script to create the packagetracker.db database
110 | create_db()
111 |
--------------------------------------------------------------------------------
/packagetracker/packagetracker.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/packagetracker/packagetracker.db
--------------------------------------------------------------------------------
/packagetracker/packagetracker.py:
--------------------------------------------------------------------------------
1 | """This module contains all functions that are either called from Excel
2 | or manipulate Excel.
3 | """
4 |
5 | import datetime as dt
6 |
7 | from dateutil import tz
8 | import requests
9 | import pandas as pd
10 | import matplotlib.pyplot as plt
11 | import xlwings as xw
12 |
13 | import database
14 |
15 |
16 | # This is the part of the URL that is the same for every request
17 | BASE_URL = "https://pypi.org/pypi"
18 |
19 |
20 | def add_package():
21 | """ Adds a new package including the version history to the database.
22 | Triggers an update of the dropdown on the Tracker tab.
23 | """
24 | # Excel objects
25 | db_sheet = xw.Book.caller().sheets["Database"]
26 | package_name = db_sheet["new_package"].value
27 | feedback_cell = db_sheet["new_package"].offset(column_offset=1)
28 |
29 | # Clear feedback cell
30 | feedback_cell.clear_contents()
31 |
32 | # Check if the package exists on PyPI
33 | if not package_name:
34 | feedback_cell.value = "Error: Please provide a name!"
35 | return
36 | if requests.get(f"{BASE_URL}/{package_name}/json",
37 | timeout=6).status_code != 200:
38 | feedback_cell.value = "Error: Package not found!"
39 | return
40 |
41 | # Insert the package name into the packages table
42 | error = database.store_package(package_name)
43 | db_sheet["new_package"].clear_contents()
44 |
45 | # Show any errors, otherwise kick off a database update and
46 | # refresh the dropdown so you can select the new package
47 | if error:
48 | feedback_cell.value = f"Error: {error}"
49 | else:
50 | feedback_cell.value = f"Added {package_name} successfully."
51 | update_database()
52 | refresh_dropdown()
53 |
54 |
55 | def update_database():
56 | """ Deletes all records from the versions table, fetches all
57 | data again from PyPI and stores the versions again in the table.
58 | """
59 | # Excel objects
60 | sheet_db = xw.Book.caller().sheets["Database"]
61 |
62 | # Clear logs
63 | sheet_db["log"].expand().clear_contents()
64 |
65 | # Keeping things super simple: Delete all versions for all packages
66 | # and repopulate the package_versions table from scratch
67 | database.delete_versions()
68 | df_packages = database.get_packages()
69 | logs = []
70 |
71 | # Query the PyPI REST API
72 | for package_id, row in df_packages.iterrows():
73 | ret = requests.get(f"{BASE_URL}/{row['package_name']}/json",
74 | timeout=6)
75 | if ret.status_code == 200:
76 | ret = ret.json() # parse the JSON string into a dictionary
77 | logs.append(f"INFO: {row['package_name']} downloaded successfully")
78 | else:
79 | logs.append(f"ERROR: Could not download data for {row['package_name']}")
80 | continue
81 |
82 | # Instantiate a DataFrame by extracting data from the REST API response
83 | releases = []
84 | for version, files in ret["releases"].items():
85 | if ret["releases"][version]: # ignore releases without info
86 | releases.append((files[0]["upload_time"], version, package_id))
87 | df_releases = pd.DataFrame(columns=["uploaded_at", "version_string", "package_id"],
88 | data=releases)
89 | df_releases["uploaded_at"] = pd.to_datetime(df_releases["uploaded_at"])
90 | df_releases = df_releases.sort_values("uploaded_at")
91 | database.store_versions(df_releases)
92 | logs.append(f"INFO: {row['package_name']} stored to database successfully")
93 |
94 | # Write out the last updated timestamp and logs
95 | sheet_db["updated_at"].value = (f"Last updated: "
96 | f"{dt.datetime.now(tz.UTC).isoformat()}")
97 | sheet_db["log"].options(transpose=True).value = logs
98 |
99 |
100 | def show_history():
101 | """ Shows the latest release and plots the release history
102 | (number of releases per year)
103 | """
104 | # Excel objects
105 | book = xw.Book.caller()
106 | tracker_sheet = book.sheets["Tracker"]
107 | package_name = tracker_sheet["package_selection"].value
108 | feedback_cell = tracker_sheet["package_selection"].offset(column_offset=1)
109 | picture_cell = tracker_sheet["latest_release"].offset(row_offset=2)
110 |
111 | # Use the "seaborn" style for the Matplotlib plots produced by pandas
112 | plt.style.use("seaborn")
113 |
114 | # Check input
115 | if not package_name:
116 | feedback_cell.value = ("Error: Please select a package first! "
117 | "You may first have to add one to the database.")
118 | return
119 |
120 | # Clear output cells and picture
121 | feedback_cell.clear_contents()
122 | tracker_sheet["latest_release"].clear_contents()
123 | if "releases_per_year" in tracker_sheet.pictures:
124 | tracker_sheet.pictures["releases_per_year"].delete()
125 |
126 | # Get all versions of the package from the database
127 | try:
128 | df_releases = database.get_versions(package_name)
129 | except Exception as e:
130 | feedback_cell.value = repr(e)
131 | return
132 | if df_releases.empty:
133 | feedback_cell.value = f"Error: Didn't find any releases for {package_name}"
134 | return
135 |
136 | # Calculate the number of releases per year and plot it
137 | df_releases_yearly = df_releases.resample("Y").count()
138 | df_releases_yearly.index = df_releases_yearly.index.year
139 | df_releases_yearly.index.name = "Years"
140 | df_releases_yearly = df_releases_yearly.rename(
141 | columns={"version_string": "Number of Releases"})
142 | ax = df_releases_yearly.plot.bar(
143 | title=f"Number of Releases per Year "
144 | f"({tracker_sheet['package_selection'].value})")
145 |
146 | # Write the results and plot to Excel
147 | version = df_releases.loc[df_releases.index.max(), "version_string"]
148 | tracker_sheet["latest_release"].value = (
149 | f"{version} ({df_releases.index.max():%B %d, %Y})")
150 | tracker_sheet.pictures.add(ax.get_figure(), name="releases_per_year",
151 | top=picture_cell.top,
152 | left=picture_cell.left)
153 |
154 |
155 | def refresh_dropdown():
156 | """ Refreshes the dropdown on the Tracker tab with the content of
157 | the packages table.
158 | """
159 | # Excel objects
160 | book = xw.Book.caller()
161 | dropdown_sheet = book.sheets["Dropdown"]
162 | tracker_sheet = book.sheets["Tracker"]
163 |
164 | # Clear the current value in the dropdown
165 | tracker_sheet["package_selection"].clear_contents()
166 |
167 | # If the Excel table has non-empty rows, delete them before repopulating
168 | # it again with the values from the packages database table
169 | if dropdown_sheet["dropdown_content"].value:
170 | dropdown_sheet["dropdown_content"].delete()
171 | dropdown_sheet["dropdown_content"].options(
172 | header=False, index=False).value = database.get_packages()
173 |
174 |
175 | if __name__ == "__main__":
176 | xw.Book("packagetracker.xlsm").set_mock_caller()
177 | add_package()
178 |
--------------------------------------------------------------------------------
/packagetracker/packagetracker.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/packagetracker/packagetracker.xlsm
--------------------------------------------------------------------------------
/parallel_openpyxl.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | from itertools import repeat
3 |
4 | import openpyxl
5 | import excel
6 |
7 |
8 | def _read_sheet(filename, sheetname):
9 | # The leading underscore in the function name is used by convention
10 | # to mark it as "private", i.e., it shouldn't be used directly outside
11 | # of this module.
12 | book = openpyxl.load_workbook(filename,
13 | read_only=True, data_only=True)
14 | sheet = book[sheetname]
15 | data = excel.read(sheet)
16 | book.close()
17 | return sheet.title, data
18 |
19 | def load_workbook(filename, sheetnames=None):
20 | if sheetnames is None:
21 | book = openpyxl.load_workbook(filename,
22 | read_only=True, data_only=True)
23 | sheetnames = book.sheetnames
24 | book.close()
25 | with multiprocessing.Pool() as pool:
26 | # By default, Pool spawns as many processes as there are CPU cores.
27 | # starmap maps a tuple of arguments to a function. The zip expression
28 | # produces a list with tuples of the following form:
29 | # [('filename.xlsx', 'Sheet1'), ('filename.xlsx', 'Sheet2)]
30 | data = pool.starmap(_read_sheet, zip(repeat(filename), sheetnames))
31 | return {i[0]: i[1] for i in data}
32 |
--------------------------------------------------------------------------------
/parallel_pandas.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | from itertools import repeat
3 |
4 | import pandas as pd
5 | import openpyxl
6 |
7 |
8 | def _read_sheet(filename, sheet_name):
9 | # The leading underscore in the function name is used by convention
10 | # to mark it as "private", i.e., it shouldn't be used directly outside
11 | # of this module.
12 | df = pd.read_excel(filename, sheet_name=sheet_name, engine='openpyxl')
13 | return sheet_name, df
14 |
15 |
16 | def read_excel(filename, sheet_name=None):
17 | if sheet_name is None:
18 | book = openpyxl.load_workbook(filename,
19 | read_only=True, data_only=True)
20 | sheet_name = book.sheetnames
21 | book.close()
22 | with multiprocessing.Pool() as pool:
23 | # By default, Pool spawns as many processes as there are CPU cores.
24 | # starmap maps a tuple of arguments to a function. The zip expression
25 | # produces a list with tuples of the following form:
26 | # [('filename.xlsx', 'Sheet1'), ('filename.xlsx', 'Sheet2)]
27 | data = pool.starmap(_read_sheet, zip(repeat(filename), sheet_name))
28 | return {i[0]: i[1] for i in data}
29 |
--------------------------------------------------------------------------------
/parallel_xlrd.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | from itertools import repeat
3 |
4 | import xlrd
5 | import excel
6 |
7 |
8 | def _read_sheet(filename, sheetname):
9 | # The leading underscore in the function name is used by convention
10 | # to mark it as "private", i.e., it shouldn't be used directly outside
11 | # of this module.
12 | with xlrd.open_workbook(filename, on_demand=True) as book:
13 | sheet = book.sheet_by_name(sheetname)
14 | data = excel.read(sheet)
15 | return sheet.name, data
16 |
17 |
18 | def open_workbook(filename, sheetnames=None):
19 | if sheetnames is None:
20 | with xlrd.open_workbook(filename, on_demand=True) as book:
21 | sheetnames = book.sheet_names()
22 | with multiprocessing.Pool() as pool:
23 | # By default, Pool spawns as many processes as there are CPU cores.
24 | # starmap maps a tuple of arguments to a function. The zip expression
25 | # produces a list with tuples of the following form:
26 | # [('filename.xlsx', 'Sheet1'), ('filename.xlsx', 'Sheet2)]
27 | data = pool.starmap(_read_sheet, zip(repeat(filename), sheetnames))
28 | return {i[0]: i[1] for i in data}
29 |
--------------------------------------------------------------------------------
/pep8_sample.py:
--------------------------------------------------------------------------------
1 | """This script shows a few PEP 8 rules.
2 | """
3 |
4 | import datetime as dt
5 |
6 |
7 | TEMPERATURE_SCALES = ("fahrenheit", "kelvin",
8 | "celsius")
9 |
10 |
11 | class TemperatureConverter:
12 | pass # Doesn't do anything at the moment
13 |
14 |
15 | def convert_to_celsius(degrees, source="fahrenheit"):
16 | """This function converts degrees Fahrenheit or Kelvin
17 | into degrees Celsius.
18 | """
19 | if source.lower() == "fahrenheit":
20 | return (degrees-32) * (5/9)
21 | elif source.lower() == "kelvin":
22 | return degrees - 273.15
23 | else:
24 | return f"Don't know how to convert from {source}"
25 |
26 |
27 | celsius = convert_to_celsius(44, source="fahrenheit")
28 | non_celsius_scales = TEMPERATURE_SCALES[:-1]
29 |
30 | print("Current time: " + dt.datetime.now().isoformat())
31 | print(f"The temperature in Celsius is: {celsius}")
32 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # If you don't use Anaconda, you can use this file to install all dependencies.
2 | # Run it like so from a Command Prompt or Terminal:
3 | #
4 | # pip install -r requirements.txt
5 |
6 | flake8==3.8.4
7 | lxml==4.6.2
8 | matplotlib==3.3.2
9 | notebook==6.1.5
10 | openpyxl==3.0.5
11 | pandas==1.1.3
12 | numpy==1.19.2
13 | pillow==8.0.1
14 | plotly==4.12.0
15 | python-dateutil==2.8.1
16 | requests==2.25.0
17 | sqlalchemy==1.3.20
18 | xlrd==1.2.0
19 | xlsxwriter==1.3.7
20 | xlutils==2.0.0
21 | xlwings==0.20.8
22 | xlwt==1.3.0
23 | pytrends==4.7.3
24 | pyxlsb==1.0.6
25 |
--------------------------------------------------------------------------------
/sales_data/existing/April.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/April.xls
--------------------------------------------------------------------------------
/sales_data/existing/August.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/August.xls
--------------------------------------------------------------------------------
/sales_data/existing/December.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/December.xls
--------------------------------------------------------------------------------
/sales_data/existing/February.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/February.xls
--------------------------------------------------------------------------------
/sales_data/existing/January.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/January.xls
--------------------------------------------------------------------------------
/sales_data/existing/July.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/July.xls
--------------------------------------------------------------------------------
/sales_data/existing/June.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/June.xls
--------------------------------------------------------------------------------
/sales_data/existing/March.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/March.xls
--------------------------------------------------------------------------------
/sales_data/existing/May.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/May.xls
--------------------------------------------------------------------------------
/sales_data/existing/November.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/November.xls
--------------------------------------------------------------------------------
/sales_data/existing/October.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/October.xls
--------------------------------------------------------------------------------
/sales_data/existing/September.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/September.xls
--------------------------------------------------------------------------------
/sales_data/new/April.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/April.xlsx
--------------------------------------------------------------------------------
/sales_data/new/August.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/August.xlsx
--------------------------------------------------------------------------------
/sales_data/new/December.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/December.xlsx
--------------------------------------------------------------------------------
/sales_data/new/February.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/February.xlsx
--------------------------------------------------------------------------------
/sales_data/new/January.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/January.xlsx
--------------------------------------------------------------------------------
/sales_data/new/July.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/July.xlsx
--------------------------------------------------------------------------------
/sales_data/new/June.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/June.xlsx
--------------------------------------------------------------------------------
/sales_data/new/March.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/March.xlsx
--------------------------------------------------------------------------------
/sales_data/new/May.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/May.xlsx
--------------------------------------------------------------------------------
/sales_data/new/November.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/November.xlsx
--------------------------------------------------------------------------------
/sales_data/new/October.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/October.xlsx
--------------------------------------------------------------------------------
/sales_data/new/September.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/September.xlsx
--------------------------------------------------------------------------------
/sales_report_openpyxl.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pandas as pd
4 | from openpyxl.styles import Font, Alignment
5 | from openpyxl.formatting.rule import CellIsRule
6 | from openpyxl.chart import BarChart, Reference
7 | from openpyxl.chart.shapes import GraphicalProperties
8 | from openpyxl.drawing.line import LineProperties
9 |
10 |
11 | # Directory of this file
12 | this_dir = Path(__file__).resolve().parent
13 |
14 | # Read in all files
15 | parts = []
16 | for path in (this_dir / "sales_data").rglob("*.xls*"):
17 | print(f'Reading {path.name}')
18 | part = pd.read_excel(path)
19 | parts.append(part)
20 |
21 | # Combine the DataFrames from each file into a single DataFrame
22 | df = pd.concat(parts)
23 |
24 | # Pivot each store into a column and sum up all transactions per date
25 | pivot = pd.pivot_table(df,
26 | index="transaction_date", columns="store",
27 | values="amount", aggfunc="sum")
28 |
29 | # Resample to end of month and assign an index name
30 | summary = pivot.resample("M").sum()
31 | summary.index.name = "Month"
32 |
33 | # Sort columns by total revenue
34 | summary = summary.loc[:, summary.sum().sort_values().index]
35 |
36 | # Add row and column totals: Using "append" together with "rename"
37 | # is a convenient way to add a row to the bottom of a DataFrame
38 | summary.loc[:, "Total"] = summary.sum(axis=1)
39 | summary = summary.append(summary.sum(axis=0).rename("Total"))
40 |
41 | #### Write summary report to Excel file ####
42 |
43 | # DataFrame position and number of rows/columns
44 | # openpxyl uses 1-based indices
45 | startrow, startcol = 3, 2
46 | nrows, ncols = summary.shape
47 |
48 | # Starting with pandas 1.3.0, the following line will raise a FutureWarning.
49 | # To fix this, replace write_only=True with engine_kwargs={"write_only": True}
50 | with pd.ExcelWriter(this_dir / "sales_report_openpyxl.xlsx",
51 | engine="openpyxl", write_only=True) as writer:
52 | # pandas uses 0-based indices
53 | summary.to_excel(writer, sheet_name="Sheet1",
54 | startrow=startrow - 1, startcol=startcol - 1)
55 |
56 | # Get openpyxl book and sheet object
57 | book = writer.book
58 | sheet = writer.sheets["Sheet1"]
59 |
60 | # Set title
61 | sheet.cell(row=1, column=startcol, value="Sales Report")
62 | sheet.cell(row=1, column=startcol).font = Font(size=24, bold=True)
63 |
64 | # Sheet formatting
65 | sheet.sheet_view.showGridLines = False
66 |
67 | # Format the DataFrame with
68 | # - number format
69 | # - column width
70 | # - conditional formatting
71 | for row in range(startrow + 1, startrow + nrows + 1):
72 | for col in range(startcol + 1, startcol + ncols + 1):
73 | cell = sheet.cell(row=row, column=col)
74 | cell.number_format = "#,##0"
75 | cell.alignment = Alignment(horizontal="center")
76 |
77 | for cell in sheet["B"]:
78 | cell.number_format = "mmm yy"
79 |
80 | for col in range(startcol, startcol + ncols + 1):
81 | cell = sheet.cell(row=startrow, column=col)
82 | sheet.column_dimensions[cell.column_letter].width = 14
83 |
84 | first_cell = sheet.cell(row=startrow + 1, column=startcol + 1)
85 | last_cell = sheet.cell(row=startrow + nrows, column=startcol + ncols)
86 | range_address = f"{first_cell.coordinate}:{last_cell.coordinate}"
87 | sheet.conditional_formatting.add(range_address,
88 | CellIsRule(operator="lessThan",
89 | formula=["20000"],
90 | stopIfTrue=True,
91 | font=Font(color="E93423")))
92 |
93 | # Chart
94 | chart = BarChart()
95 | chart.type = "col"
96 | chart.title = "Sales per Month and Store"
97 | chart.height = 11.5
98 | chart.width = 20.5
99 |
100 | # Add each column as a series, ignoring total row and col
101 | data = Reference(sheet, min_col=startcol + 1, min_row=startrow,
102 | max_row=startrow + nrows - 1,
103 | max_col=startcol + ncols - 1)
104 | categories = Reference(sheet, min_col=startcol, min_row=startrow + 1,
105 | max_row=startrow + nrows - 1)
106 | chart.add_data(data, titles_from_data=True)
107 | chart.set_categories(categories)
108 | cell = sheet.cell(row=startrow + nrows + 2, column=startcol)
109 | sheet.add_chart(chart=chart, anchor=cell.coordinate)
110 |
111 | # Chart formatting
112 | chart.y_axis.title = "Sales"
113 | chart.x_axis.title = summary.index.name
114 | # Hide y-axis line: spPR stands for ShapeProperties
115 | chart.y_axis.spPr = GraphicalProperties(ln=LineProperties(noFill=True))
116 |
--------------------------------------------------------------------------------
/sales_report_pandas.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pandas as pd
4 |
5 |
6 | # Directory of this file
7 | this_dir = Path(__file__).resolve().parent
8 |
9 | # Read in all Excel files from all subfolders of sales_data
10 | parts = []
11 | for path in (this_dir / "sales_data").rglob("*.xls*"):
12 | print(f'Reading {path.name}')
13 | part = pd.read_excel(path, index_col="transaction_id")
14 | parts.append(part)
15 |
16 | # Combine the DataFrames from each file into a single DataFrame
17 | # pandas takes care of properly aligning the columns
18 | df = pd.concat(parts)
19 |
20 | # Pivot each store into a column and sum up all transactions per date
21 | pivot = pd.pivot_table(df,
22 | index="transaction_date", columns="store",
23 | values="amount", aggfunc="sum")
24 |
25 | # Resample to end of month and assign an index name
26 | summary = pivot.resample("M").sum()
27 | summary.index.name = "Month"
28 |
29 | # Write summary report to Excel file
30 | summary.to_excel(this_dir / "sales_report_pandas.xlsx")
31 |
--------------------------------------------------------------------------------
/sales_report_xlsxwriter.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pandas as pd
4 |
5 |
6 | # Directory of this file
7 | this_dir = Path(__file__).resolve().parent
8 |
9 | # Read in all files
10 | parts = []
11 | for path in (this_dir / "sales_data").rglob("*.xls*"):
12 | print(f'Reading {path.name}')
13 | part = pd.read_excel(path)
14 | parts.append(part)
15 |
16 | # Combine the DataFrames from each file into a single DataFrame
17 | df = pd.concat(parts)
18 |
19 | # Pivot each store into a column and sum up all transactions per date
20 | pivot = pd.pivot_table(df,
21 | index="transaction_date", columns="store",
22 | values="amount", aggfunc="sum")
23 |
24 | # Resample to end of month and assign an index name
25 | summary = pivot.resample("M").sum()
26 | summary.index.name = "Month"
27 |
28 | # Sort columns by total revenue
29 | summary = summary.loc[:, summary.sum().sort_values().index]
30 |
31 | # Add row and column totals: Using "append" together with "rename"
32 | # is a convenient way to add a row to the bottom of a DataFrame
33 | summary.loc[:, "Total"] = summary.sum(axis=1)
34 | summary = summary.append(summary.sum(axis=0).rename("Total"))
35 |
36 | #### Write summary report to Excel file ####
37 |
38 | # DataFrame position and number of rows/columns
39 | # xlsxwriter uses 0-based indices
40 | startrow, startcol = 2, 1
41 | nrows, ncols = summary.shape
42 |
43 | with pd.ExcelWriter(this_dir / "sales_report_xlsxwriter.xlsx",
44 | engine="xlsxwriter", datetime_format="mmm yy") as writer:
45 | summary.to_excel(writer, sheet_name="Sheet1",
46 | startrow=startrow, startcol=startcol)
47 |
48 | # Get xlsxwriter book and sheet object
49 | book = writer.book
50 | sheet = writer.sheets["Sheet1"]
51 |
52 | # Set title
53 | title_format = book.add_format({"bold": True, "size": 24})
54 | sheet.write(0, startcol, "Sales Report", title_format)
55 |
56 | # Sheet formatting
57 | # 2 = hide on screen and when printing
58 | sheet.hide_gridlines(2)
59 |
60 | # Format the DataFrame with
61 | # - number format
62 | # - column width
63 | # - conditional formatting
64 | number_format = book.add_format({"num_format": "#,##0",
65 | "align": "center"})
66 | below_target_format = book.add_format({"font_color": "#E93423"})
67 | sheet.set_column(first_col=startcol, last_col=startcol + ncols,
68 | width=14, cell_format=number_format)
69 | sheet.conditional_format(first_row=startrow + 1,
70 | first_col=startcol + 1,
71 | last_row=startrow + nrows,
72 | last_col=startcol + ncols,
73 | options={"type": "cell", "criteria": "<=",
74 | "value": 20000,
75 | "format": below_target_format})
76 |
77 | # Chart
78 | chart = book.add_chart({"type": "column"})
79 | chart.set_title({"name": "Sales per Month and Store"})
80 | chart.set_size({"width": 830, "height": 450})
81 |
82 | # Add each column as a series, ignoring total row and col
83 | for col in range(1, ncols):
84 | chart.add_series({
85 | # [sheetname, first_row, first_col, last_row, last_col]
86 | "name": ["Sheet1", startrow, startcol + col],
87 | "categories": ["Sheet1", startrow + 1, startcol,
88 | startrow + nrows - 1, startcol],
89 | "values": ["Sheet1", startrow + 1, startcol + col,
90 | startrow + nrows - 1, startcol + col],
91 | })
92 |
93 | # Chart formatting
94 | chart.set_x_axis({"name": summary.index.name,
95 | "major_tick_mark": "none"})
96 | chart.set_y_axis({"name": "Sales",
97 | "line": {"none": True},
98 | "major_gridlines": {"visible": True},
99 | "major_tick_mark": "none"})
100 |
101 | # Add the chart to the sheet
102 | sheet.insert_chart(startrow + nrows + 2, startcol, chart)
103 |
--------------------------------------------------------------------------------
/sales_report_xlwings.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pandas as pd
4 | import xlwings as xw
5 |
6 |
7 | # Directory of this file
8 | this_dir = Path(__file__).resolve().parent
9 |
10 | # Read in all files
11 | parts = []
12 | for path in (this_dir / "sales_data").rglob("*.xls*"):
13 | print(f'Reading {path.name}')
14 | part = pd.read_excel(path)
15 | parts.append(part)
16 |
17 | # Combine the DataFrames from each file into a single DataFrame
18 | df = pd.concat(parts)
19 |
20 | # Pivot each store into a column and sum up all transactions per date
21 | pivot = pd.pivot_table(df,
22 | index="transaction_date", columns="store",
23 | values="amount", aggfunc="sum")
24 |
25 | # Resample to end of month and assign an index name
26 | summary = pivot.resample("M").sum()
27 | summary.index.name = "Month"
28 |
29 | # Sort columns by total revenue
30 | summary = summary.loc[:, summary.sum().sort_values().index]
31 |
32 | # Add row and column totals: Using "append" together with "rename"
33 | # is a convenient way to add a row to the bottom of a DataFrame
34 | summary.loc[:, "Total"] = summary.sum(axis=1)
35 | summary = summary.append(summary.sum(axis=0).rename("Total"))
36 |
37 | #### Write summary report to Excel file ####
38 |
39 | # Open the template, paste the data, autofit the columns
40 | # and adjust the chart source. Then save it under a different name.
41 | template = xw.Book(this_dir / "xl" / "sales_report_template.xlsx")
42 | sheet = template.sheets["Sheet1"]
43 | sheet["B3"].value = summary
44 | sheet["B3"].expand().columns.autofit()
45 | sheet.charts["Chart 1"].set_source_data(sheet["B3"].expand()[:-1, :-1])
46 | template.save(this_dir / "sales_report_xlwings.xlsx")
47 |
--------------------------------------------------------------------------------
/temperature.py:
--------------------------------------------------------------------------------
1 | TEMPERATURE_SCALES = ("fahrenheit", "kelvin", "celsius")
2 |
3 |
4 | def convert_to_celsius(degrees, source="fahrenheit"):
5 | if source.lower() == "fahrenheit":
6 | return (degrees-32) * (5/9)
7 | elif source.lower() == "kelvin":
8 | return degrees - 273.15
9 | else:
10 | return f"Don't know how to convert from {source}"
11 |
12 |
13 | print("This is the temperature module.")
14 |
--------------------------------------------------------------------------------
/udfs/describe/describe.py:
--------------------------------------------------------------------------------
1 | import xlwings as xw
2 | import pandas as pd
3 |
4 |
5 | @xw.func
6 | @xw.arg("df", pd.DataFrame, index=True, header=True)
7 | def describe(df):
8 | return df.describe()
9 |
--------------------------------------------------------------------------------
/udfs/describe/describe.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/describe/describe.xlsm
--------------------------------------------------------------------------------
/udfs/first_udf/first_udf.py:
--------------------------------------------------------------------------------
1 | import xlwings as xw
2 |
3 |
4 | def main():
5 | wb = xw.Book.caller()
6 | sheet = wb.sheets[0]
7 | if sheet["A1"].value == "Hello xlwings!":
8 | sheet["A1"].value = "Bye xlwings!"
9 | else:
10 | sheet["A1"].value = "Hello xlwings!"
11 |
12 |
13 | @xw.func
14 | def hello(name):
15 | return f"Hello {name}!"
16 |
17 |
18 | if __name__ == "__main__":
19 | xw.Book("first_udf.xlsm").set_mock_caller()
20 | main()
21 |
--------------------------------------------------------------------------------
/udfs/first_udf/first_udf.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/first_udf/first_udf.xlsm
--------------------------------------------------------------------------------
/udfs/google_trends/google_trends.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from pytrends.request import TrendReq
3 | import matplotlib.pyplot as plt
4 | import xlwings as xw
5 |
6 |
7 | @xw.func(call_in_wizard=False)
8 | @xw.arg("mids", doc="Machine IDs: A range of max 5 cells")
9 | @xw.arg("start_date", doc="A date-formatted cell")
10 | @xw.arg("end_date", doc="A date-formatted cell")
11 | def get_interest_over_time(mids, start_date, end_date):
12 | """Query Google Trends - replaces the Machine ID (mid) of
13 | common programming languages with their human-readable
14 | equivalent in the return value, e.g., instead of "/m/05z1_"
15 | it returns "Python".
16 | """
17 | # Check and transform parameters
18 | assert len(mids) <= 5, "Too many mids (max: 5)"
19 | start_date = start_date.date().isoformat()
20 | end_date = end_date.date().isoformat()
21 |
22 | # Make the Google Trends request and return the DataFrame
23 | trend = TrendReq(timeout=10)
24 | trend.build_payload(kw_list=mids,
25 | timeframe=f"{start_date} {end_date}")
26 | df = trend.interest_over_time()
27 |
28 | # Replace Google's "mid" with a human-readable word
29 | mids = {"/m/05z1_": "Python", "/m/02p97": "JavaScript",
30 | "/m/0jgqg": "C++", "/m/07sbkfb": "Java", "/m/060kv": "PHP"}
31 | df = df.rename(columns=mids)
32 |
33 | # Drop the isPartial column
34 | return df.drop(columns="isPartial")
35 |
36 |
37 | @xw.func
38 | @xw.arg("df", pd.DataFrame)
39 | def plot(df, name, caller):
40 | plt.style.use("seaborn")
41 | if not df.empty:
42 | caller.sheet.pictures.add(df.plot().get_figure(),
43 | top=caller.offset(row_offset=1).top,
44 | left=caller.left,
45 | name=name, update=True)
46 | return f""
47 |
48 |
49 | if __name__ == "__main__":
50 | xw.serve()
51 |
--------------------------------------------------------------------------------
/udfs/google_trends/google_trends.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/google_trends/google_trends.xlsm
--------------------------------------------------------------------------------
/udfs/google_trends_cache/google_trends_cache.py:
--------------------------------------------------------------------------------
1 | from functools import lru_cache
2 |
3 | import pandas as pd
4 | from pytrends.request import TrendReq
5 | import matplotlib.pyplot as plt
6 | import xlwings as xw
7 |
8 |
9 | @lru_cache()
10 | @xw.func(call_in_wizard=False)
11 | @xw.arg("mids", xw.Range, doc="Machine IDs: A range of max 5 cells")
12 | @xw.arg("start_date", doc="A date-formatted cell")
13 | @xw.arg("end_date", doc="A date-formatted cell")
14 | def get_interest_over_time(mids, start_date, end_date):
15 | """Query Google Trends - replaces the Machine ID (mid) of
16 | common programming languages with their human-readable
17 | equivalent in the return value, e.g., instead of "/m/05z1_"
18 | it returns "Python".
19 | """
20 | mids = mids.value
21 |
22 | # Check and transform parameters
23 | assert len(mids) <= 5, "Too many mids (max: 5)"
24 | start_date = start_date.date().isoformat()
25 | end_date = end_date.date().isoformat()
26 |
27 | # Make the Google Trends request and return the DataFrame
28 | trend = TrendReq(timeout=10)
29 | trend.build_payload(kw_list=mids,
30 | timeframe=f"{start_date} {end_date}")
31 | df = trend.interest_over_time()
32 |
33 | # Replace Google's "mid" with a human-readable word
34 | mids = {"/m/05z1_": "Python", "/m/02p97": "JavaScript",
35 | "/m/0jgqg": "C++", "/m/07sbkfb": "Java", "/m/060kv": "PHP"}
36 | df = df.rename(columns=mids)
37 |
38 | # Drop the isPartial column
39 | return df.drop(columns="isPartial")
40 |
41 |
42 | @xw.func
43 | @xw.arg("df", pd.DataFrame)
44 | def plot(df, name, caller):
45 | plt.style.use("seaborn")
46 | if not df.empty:
47 | caller.sheet.pictures.add(df.plot().get_figure(),
48 | top=caller.offset(row_offset=1).top,
49 | left=caller.left,
50 | name=name, update=True)
51 | return f""
52 |
53 |
54 | if __name__ == "__main__":
55 | xw.serve()
56 |
--------------------------------------------------------------------------------
/udfs/google_trends_cache/google_trends_cache.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/google_trends_cache/google_trends_cache.xlsm
--------------------------------------------------------------------------------
/udfs/importsub/importsub.py:
--------------------------------------------------------------------------------
1 | import xlwings as xw
2 |
3 |
4 | @xw.sub
5 | def main():
6 | wb = xw.Book.caller()
7 | sheet = wb.sheets[0]
8 | if sheet["A1"].value == "Hello xlwings!":
9 | sheet["A1"].value = "Bye xlwings!"
10 | else:
11 | sheet["A1"].value = "Hello xlwings!"
12 |
13 |
14 | @xw.func
15 | def hello(name):
16 | return f"Hello {name}!"
17 |
18 |
19 | if __name__ == "__main__":
20 | xw.Book("importsub.xlsm").set_mock_caller()
21 | main()
22 |
--------------------------------------------------------------------------------
/udfs/importsub/importsub.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/importsub/importsub.xlsm
--------------------------------------------------------------------------------
/udfs/raw_values/raw_values.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import xlwings as xw
3 |
4 |
5 | @xw.func
6 | @xw.ret("raw")
7 | def randn(i=1000, j=1000):
8 | """Returns an array with dimensions (i, j) with normally distributed
9 | pseudorandom numbers provided by NumPy's random.randn
10 | """
11 | return np.random.randn(i, j)
12 |
--------------------------------------------------------------------------------
/udfs/raw_values/raw_values.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/raw_values/raw_values.xlsm
--------------------------------------------------------------------------------
/udfs/revenues/revenues.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import xlwings as xw
3 |
4 |
5 | @xw.func
6 | def revenue(base_fee, users, price):
7 | return base_fee + users * price
8 |
9 |
10 | @xw.func
11 | @xw.arg("users", np.array, ndim=2)
12 | @xw.arg("price", np.array)
13 | def revenue2(base_fee, users, price):
14 | return base_fee + users * price
15 |
--------------------------------------------------------------------------------
/udfs/revenues/revenues.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/revenues/revenues.xlsm
--------------------------------------------------------------------------------
/xl/array_calculations.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/array_calculations.xlsx
--------------------------------------------------------------------------------
/xl/big.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/big.xlsx
--------------------------------------------------------------------------------
/xl/course_participants.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/course_participants.xlsx
--------------------------------------------------------------------------------
/xl/currency_converter.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/currency_converter.xlsx
--------------------------------------------------------------------------------
/xl/macro.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/macro.xlsm
--------------------------------------------------------------------------------
/xl/sales_report_template.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/sales_report_template.xlsx
--------------------------------------------------------------------------------
/xl/stores.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/stores.xls
--------------------------------------------------------------------------------
/xl/stores.xlsb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/stores.xlsb
--------------------------------------------------------------------------------
/xl/stores.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/stores.xlsx
--------------------------------------------------------------------------------
/xl/vba.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/vba.xlsm
--------------------------------------------------------------------------------
/xl/vbaProject.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/vbaProject.bin
--------------------------------------------------------------------------------