├── .gitignore
├── LICENSE
├── README.md
├── appendixC.ipynb
├── ch02.ipynb
├── ch03.ipynb
├── ch04.ipynb
├── ch05.ipynb
├── ch06.ipynb
├── ch07.ipynb
├── ch08.ipynb
├── ch09.ipynb
├── ch11.ipynb
├── ch12.ipynb
├── conda
    ├── xl310.yml
    └── xl38.yml
├── csv
    ├── AAPL.csv
    ├── AMZN.csv
    ├── GOOGL.csv
    └── MSFT.csv
├── debugging.py
├── environment.yml
├── excel.py
├── images
    ├── cover.png
    ├── python.bmp
    └── python.png
├── packagetracker
    ├── database.py
    ├── packagetracker.db
    ├── packagetracker.py
    └── packagetracker.xlsm
├── parallel_openpyxl.py
├── parallel_pandas.py
├── parallel_xlrd.py
├── pep8_sample.py
├── requirements.txt
├── sales_data
    ├── existing
    │   ├── April.xls
    │   ├── August.xls
    │   ├── December.xls
    │   ├── February.xls
    │   ├── January.xls
    │   ├── July.xls
    │   ├── June.xls
    │   ├── March.xls
    │   ├── May.xls
    │   ├── November.xls
    │   ├── October.xls
    │   └── September.xls
    └── new
    │   ├── April.xlsx
    │   ├── August.xlsx
    │   ├── December.xlsx
    │   ├── February.xlsx
    │   ├── January.xlsx
    │   ├── July.xlsx
    │   ├── June.xlsx
    │   ├── March.xlsx
    │   ├── May.xlsx
    │   ├── November.xlsx
    │   ├── October.xlsx
    │   └── September.xlsx
├── sales_report_openpyxl.py
├── sales_report_pandas.py
├── sales_report_xlsxwriter.py
├── sales_report_xlwings.py
├── temperature.py
├── udfs
    ├── describe
    │   ├── describe.py
    │   └── describe.xlsm
    ├── first_udf
    │   ├── first_udf.py
    │   └── first_udf.xlsm
    ├── google_trends
    │   ├── google_trends.py
    │   └── google_trends.xlsm
    ├── google_trends_cache
    │   ├── google_trends_cache.py
    │   └── google_trends_cache.xlsm
    ├── importsub
    │   ├── importsub.py
    │   └── importsub.xlsm
    ├── raw_values
    │   ├── raw_values.py
    │   └── raw_values.xlsm
    └── revenues
    │   ├── revenues.py
    │   └── revenues.xlsm
└── xl
    ├── array_calculations.xlsx
    ├── big.xlsx
    ├── course_participants.xlsx
    ├── currency_converter.xlsx
    ├── macro.xlsm
    ├── sales_report_template.xlsx
    ├── stores.xls
    ├── stores.xlsb
    ├── stores.xlsx
    ├── vba.xlsm
    └── vbaProject.bin


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | .DS_Store
3 | ~$*.xls*
4 | *.pyc


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Zoomer Analytics GmbH
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python for Excel (O'Reilly, 2021)
2 | 
3 | <img src="https://github.com/fzumstein/python-for-excel/blob/1st-edition/images/cover.png?raw=true" width="350">
4 | 
5 | This is the companion repository for the O'Reilly book [Python for Excel](https://learning.oreilly.com/library/view/python-for-excel/9781492080992/).
6 | 
7 | All notebooks can be run in the cloud except `ch09.ipynb` (requires a local installation of Excel):  
8 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/fzumstein/python-for-excel/1st-edition?urlpath=tree)
9 | 


--------------------------------------------------------------------------------
/appendixC.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Appendix C\n",
  8 |     "## Classes and Objects"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {
 15 |     "pycharm": {
 16 |      "name": "#%%\n"
 17 |     }
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "class Car:\n",
 22 |     "    def __init__(self, color, speed=0):\n",
 23 |     "        self.color = color\n",
 24 |     "        self.speed = speed\n",
 25 |     "\n",
 26 |     "    def accelerate(self, mph):\n",
 27 |     "        self.speed += mph"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {
 34 |     "pycharm": {
 35 |      "name": "#%%\n"
 36 |     }
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "# Let's instantiate two car objects\n",
 41 |     "car1 = Car(\"red\")\n",
 42 |     "car2 = Car(color=\"blue\")"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "pycharm": {
 50 |      "name": "#%%\n"
 51 |     }
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "# By default, an object prints its memory location\n",
 56 |     "car1"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {
 63 |     "pycharm": {
 64 |      "name": "#%%\n"
 65 |     }
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "# Attributes give you access to the data of an object\n",
 70 |     "car1.color"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "pycharm": {
 78 |      "name": "#%%\n"
 79 |     }
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "car1.speed"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {
 90 |     "pycharm": {
 91 |      "name": "#%%\n"
 92 |     }
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# Calling the accelerate method on car1\n",
 97 |     "car1.accelerate(20)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "pycharm": {
105 |      "name": "#%%\n"
106 |     }
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "# The speed attribute of car1 changed\n",
111 |     "car1.speed"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "pycharm": {
119 |      "name": "#%%\n"
120 |     }
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "# The speed attribute of car2 remained the same\n",
125 |     "car2.speed"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {
132 |     "pycharm": {
133 |      "name": "#%%\n"
134 |     }
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "car1.color = \"green\""
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {
145 |     "pycharm": {
146 |      "name": "#%%\n"
147 |     }
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "car1.color"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "pycharm": {
159 |      "name": "#%%\n"
160 |     }
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "car2.color  # unchanged"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "## Working with time-zone-aware datetime objects"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {
178 |     "pycharm": {
179 |      "name": "#%%\n"
180 |     }
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "import datetime as dt\n",
185 |     "from dateutil import tz"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {
192 |     "pycharm": {
193 |      "name": "#%%\n"
194 |     }
195 |    },
196 |    "outputs": [],
197 |    "source": [
198 |     "# Time-zone-naive datetime object\n",
199 |     "timestamp = dt.datetime(2020, 1, 31, 14, 30)\n",
200 |     "timestamp.isoformat()"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "pycharm": {
208 |      "name": "#%%\n"
209 |     }
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "# Time-zone-aware datetime object\n",
214 |     "timestamp_eastern = dt.datetime(2020, 1, 31, 14, 30,\n",
215 |     "                                tzinfo=tz.gettz(\"US/Eastern\"))\n",
216 |     "# Printing in isoformat makes it easy to\n",
217 |     "# see the offset from UTC\n",
218 |     "timestamp_eastern.isoformat()"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "pycharm": {
226 |      "name": "#%%\n"
227 |     }
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "# Assign a time zone to a naive datetime object\n",
232 |     "timestamp_eastern = timestamp.replace(tzinfo=tz.gettz(\"US/Eastern\"))\n",
233 |     "timestamp_eastern.isoformat()"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {
240 |     "pycharm": {
241 |      "name": "#%%\n"
242 |     }
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "# Convert from one time zone to another.\n",
247 |     "# Since the UTC time zone is so common,\n",
248 |     "# there is a shortcut: tz.UTC\n",
249 |     "timestamp_utc = timestamp_eastern.astimezone(tz.UTC)\n",
250 |     "timestamp_utc.isoformat()"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {
257 |     "pycharm": {
258 |      "name": "#%%\n"
259 |     }
260 |    },
261 |    "outputs": [],
262 |    "source": [
263 |     "# From time-zone-aware to naive\n",
264 |     "timestamp_eastern.replace(tzinfo=None)"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "metadata": {
271 |     "pycharm": {
272 |      "name": "#%%\n"
273 |     }
274 |    },
275 |    "outputs": [],
276 |    "source": [
277 |     "# Current time without time zone\n",
278 |     "dt.datetime.now()"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {
285 |     "pycharm": {
286 |      "name": "#%%\n"
287 |     }
288 |    },
289 |    "outputs": [],
290 |    "source": [
291 |     "# Current time in UTC time zone\n",
292 |     "dt.datetime.now(tz.UTC)"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {},
298 |    "source": [
299 |     "## Mutable vs. Immutable Objects"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {
306 |     "pycharm": {
307 |      "name": "#%%\n"
308 |     }
309 |    },
310 |    "outputs": [],
311 |    "source": [
312 |     "a = [1, 2, 3]\n",
313 |     "b = a\n",
314 |     "a[1] = 22\n",
315 |     "print(a)\n",
316 |     "print(b)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {
323 |     "pycharm": {
324 |      "name": "#%%\n"
325 |     }
326 |    },
327 |    "outputs": [],
328 |    "source": [
329 |     "a = [1, 2, 3]\n",
330 |     "b = a.copy()"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {
337 |     "pycharm": {
338 |      "name": "#%%\n"
339 |     }
340 |    },
341 |    "outputs": [],
342 |    "source": [
343 |     "a"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {
350 |     "pycharm": {
351 |      "name": "#%%\n"
352 |     }
353 |    },
354 |    "outputs": [],
355 |    "source": [
356 |     "b"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": null,
362 |    "metadata": {
363 |     "pycharm": {
364 |      "name": "#%%\n"
365 |     }
366 |    },
367 |    "outputs": [],
368 |    "source": [
369 |     "a[1] = 22  # Changing \"a\"..."
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {
376 |     "pycharm": {
377 |      "name": "#%%\n"
378 |     }
379 |    },
380 |    "outputs": [],
381 |    "source": [
382 |     "a"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "metadata": {
389 |     "pycharm": {
390 |      "name": "#%%\n"
391 |     }
392 |    },
393 |    "outputs": [],
394 |    "source": [
395 |     "b  # ...doesn't affect \"b\""
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": null,
401 |    "metadata": {
402 |     "pycharm": {
403 |      "name": "#%%\n"
404 |     }
405 |    },
406 |    "outputs": [],
407 |    "source": [
408 |     "import copy\n",
409 |     "b = copy.deepcopy(a)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {
416 |     "pycharm": {
417 |      "name": "#%%\n"
418 |     }
419 |    },
420 |    "outputs": [],
421 |    "source": [
422 |     "def increment(x):\n",
423 |     "    x = x + 1\n",
424 |     "    return x"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": null,
430 |    "metadata": {
431 |     "pycharm": {
432 |      "name": "#%%\n"
433 |     }
434 |    },
435 |    "outputs": [],
436 |    "source": [
437 |     "a = 1\n",
438 |     "print(increment(a))\n",
439 |     "print(a)"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": null,
445 |    "metadata": {
446 |     "pycharm": {
447 |      "name": "#%%\n"
448 |     }
449 |    },
450 |    "outputs": [],
451 |    "source": [
452 |     "def increment(x):\n",
453 |     "    x[0] = x[0] + 1\n",
454 |     "    return x"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": null,
460 |    "metadata": {
461 |     "pycharm": {
462 |      "name": "#%%\n"
463 |     }
464 |    },
465 |    "outputs": [],
466 |    "source": [
467 |     "a = [1]\n",
468 |     "print(increment(a))\n",
469 |     "print(a)"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": null,
475 |    "metadata": {
476 |     "pycharm": {
477 |      "name": "#%%\n"
478 |     }
479 |    },
480 |    "outputs": [],
481 |    "source": [
482 |     "a = [1]\n",
483 |     "print(increment(a.copy()))\n",
484 |     "print(a)"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": null,
490 |    "metadata": {
491 |     "pycharm": {
492 |      "name": "#%%\n"
493 |     }
494 |    },
495 |    "outputs": [],
496 |    "source": [
497 |     "# Don't do this:\n",
498 |     "def add_one(x=[]):\n",
499 |     "    x.append(1)\n",
500 |     "    return x"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": null,
506 |    "metadata": {
507 |     "pycharm": {
508 |      "name": "#%%\n"
509 |     }
510 |    },
511 |    "outputs": [],
512 |    "source": [
513 |     "add_one()"
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "code",
518 |    "execution_count": null,
519 |    "metadata": {
520 |     "pycharm": {
521 |      "name": "#%%\n"
522 |     }
523 |    },
524 |    "outputs": [],
525 |    "source": [
526 |     "add_one()"
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "code",
531 |    "execution_count": null,
532 |    "metadata": {
533 |     "pycharm": {
534 |      "name": "#%%\n"
535 |     }
536 |    },
537 |    "outputs": [],
538 |    "source": [
539 |     "def add_one(x=None):\n",
540 |     "    if x is None:\n",
541 |     "        x = []\n",
542 |     "    x.append(1)\n",
543 |     "    return x"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "code",
548 |    "execution_count": null,
549 |    "metadata": {
550 |     "pycharm": {
551 |      "name": "#%%\n"
552 |     }
553 |    },
554 |    "outputs": [],
555 |    "source": [
556 |     "add_one()"
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "code",
561 |    "execution_count": null,
562 |    "metadata": {
563 |     "pycharm": {
564 |      "name": "#%%\n"
565 |     }
566 |    },
567 |    "outputs": [],
568 |    "source": [
569 |     "add_one()"
570 |    ]
571 |   }
572 |  ],
573 |  "metadata": {
574 |   "kernelspec": {
575 |    "display_name": "Python 3",
576 |    "language": "python",
577 |    "name": "python3"
578 |   },
579 |   "language_info": {
580 |    "codemirror_mode": {
581 |     "name": "ipython",
582 |     "version": 3
583 |    },
584 |    "file_extension": ".py",
585 |    "mimetype": "text/x-python",
586 |    "name": "python",
587 |    "nbconvert_exporter": "python",
588 |    "pygments_lexer": "ipython3",
589 |    "version": "3.7.4"
590 |   }
591 |  },
592 |  "nbformat": 4,
593 |  "nbformat_minor": 4
594 | }
595 | 


--------------------------------------------------------------------------------
/ch02.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "3 + 4"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "markdown",
14 |    "metadata": {},
15 |    "source": [
16 |     "# This is a first-level heading\n",
17 |     "\n",
18 |     "## This is a second-level heading\n",
19 |     "\n",
20 |     "You can make your text *italic* or **bold** or `monospaced`.\n",
21 |     "\n",
22 |     "* This is a bullet point\n",
23 |     "* This is another bullet point"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "markdown",
28 |    "metadata": {},
29 |    "source": [
30 |     "## Run Order Matters"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "code",
35 |    "execution_count": null,
36 |    "metadata": {},
37 |    "outputs": [],
38 |    "source": [
39 |     "a = 1"
40 |    ]
41 |   },
42 |   {
43 |    "cell_type": "code",
44 |    "execution_count": null,
45 |    "metadata": {},
46 |    "outputs": [],
47 |    "source": [
48 |     "a"
49 |    ]
50 |   },
51 |   {
52 |    "cell_type": "code",
53 |    "execution_count": null,
54 |    "metadata": {},
55 |    "outputs": [],
56 |    "source": [
57 |     "a = 2"
58 |    ]
59 |   }
60 |  ],
61 |  "metadata": {
62 |   "kernelspec": {
63 |    "display_name": "Python 3",
64 |    "language": "python",
65 |    "name": "python3"
66 |   },
67 |   "language_info": {
68 |    "codemirror_mode": {
69 |     "name": "ipython",
70 |     "version": 3
71 |    },
72 |    "file_extension": ".py",
73 |    "mimetype": "text/x-python",
74 |    "name": "python",
75 |    "nbconvert_exporter": "python",
76 |    "pygments_lexer": "ipython3",
77 |    "version": "3.7.4"
78 |   }
79 |  },
80 |  "nbformat": 4,
81 |  "nbformat_minor": 4
82 | }
83 | 


--------------------------------------------------------------------------------
/ch04.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Foundations: NumPy\n",
  8 |     "## NumPy Array"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "matrix = [[1, 2, 3],\n",
 18 |     "          [4, 5, 6],\n",
 19 |     "          [7, 8, 9]]"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "[[i + 1 for i in row] for row in matrix]"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "# First, let's import NumPy\n",
 38 |     "import numpy as np"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Constructing an array with a simple list results in a 1d array\n",
 48 |     "array1 = np.array([10, 100, 1000.])"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "# Constructing an array with a nested list results in a 2d array\n",
 58 |     "array2 = np.array([[1., 2., 3.],\n",
 59 |     "                   [4., 5., 6.]])"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "array1.dtype"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "float(array1[0])"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "## Vectorization and Broadcasting"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "array2 + 1"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "array2 * array2"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "array2 * array1"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "array2 @ array2.T  # array2.T is a shortcut for array2.transpose()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "## Universal Functions (ufunc)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "import math"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "math.sqrt(array2)  # This will raise en Error"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "np.array([[math.sqrt(i) for i in row] for row in array2])"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "np.sqrt(array2)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "array2.sum(axis=0)  # Returns a 1d array"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "array2.sum()"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "## Getting and Setting Array Elements"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "array1[2]  # Returns a scalar"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "array2[0, 0]  # Returns a scalar"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "array2[:, 1:]  # Returns a 2d array"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "array2[:, 1]  # Returns a 1d array"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": null,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "array2[1, :2]  # Returns a 1d array"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "## Useful Array Constructors"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "np.arange(2 * 5).reshape(2, 5)  # 2 rows, 5 columns"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": [
258 |     "np.random.randn(2, 3)  # 2 rows, 3 columns"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "## View vs. Copy"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "array2"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "subset = array2[:, :2]\n",
284 |     "subset"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "metadata": {},
291 |    "outputs": [],
292 |    "source": [
293 |     "subset[0, 0] = 1000"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": null,
299 |    "metadata": {},
300 |    "outputs": [],
301 |    "source": [
302 |     "subset"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "array2"
312 |    ]
313 |   }
314 |  ],
315 |  "metadata": {
316 |   "kernelspec": {
317 |    "display_name": "Python 3",
318 |    "language": "python",
319 |    "name": "python3"
320 |   },
321 |   "language_info": {
322 |    "codemirror_mode": {
323 |     "name": "ipython",
324 |     "version": 3
325 |    },
326 |    "file_extension": ".py",
327 |    "mimetype": "text/x-python",
328 |    "name": "python",
329 |    "nbconvert_exporter": "python",
330 |    "pygments_lexer": "ipython3",
331 |    "version": "3.7.4"
332 |   }
333 |  },
334 |  "nbformat": 4,
335 |  "nbformat_minor": 4
336 | }
337 | 


--------------------------------------------------------------------------------
/ch05.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# DataFrame and Series"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": null,
  13 |    "metadata": {},
  14 |    "outputs": [],
  15 |    "source": [
  16 |     "import pandas as pd"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "code",
  21 |    "execution_count": null,
  22 |    "metadata": {},
  23 |    "outputs": [],
  24 |    "source": [
  25 |     "pd.read_excel(\"xl/course_participants.xlsx\")"
  26 |    ]
  27 |   },
  28 |   {
  29 |    "cell_type": "code",
  30 |    "execution_count": null,
  31 |    "metadata": {},
  32 |    "outputs": [],
  33 |    "source": [
  34 |     "data = [[\"Mark\", 55, \"Italy\", 4.5, \"Europe\"],\n",
  35 |     "        [\"John\", 33, \"USA\", 6.7, \"America\"],\n",
  36 |     "        [\"Tim\", 41, \"USA\", 3.9, \"America\"],\n",
  37 |     "        [\"Jenny\", 12, \"Germany\", 9.0, \"Europe\"]]\n",
  38 |     "df = pd.DataFrame(data=data,\n",
  39 |     "                  columns=[\"name\", \"age\", \"country\",\n",
  40 |     "                           \"score\", \"continent\"],\n",
  41 |     "                  index=[1001, 1000, 1002, 1003])\n",
  42 |     "df"
  43 |    ]
  44 |   },
  45 |   {
  46 |    "cell_type": "code",
  47 |    "execution_count": null,
  48 |    "metadata": {},
  49 |    "outputs": [],
  50 |    "source": [
  51 |     "df.info()"
  52 |    ]
  53 |   },
  54 |   {
  55 |    "cell_type": "markdown",
  56 |    "metadata": {},
  57 |    "source": [
  58 |     "## Index"
  59 |    ]
  60 |   },
  61 |   {
  62 |    "cell_type": "code",
  63 |    "execution_count": null,
  64 |    "metadata": {},
  65 |    "outputs": [],
  66 |    "source": [
  67 |     "df.index"
  68 |    ]
  69 |   },
  70 |   {
  71 |    "cell_type": "code",
  72 |    "execution_count": null,
  73 |    "metadata": {},
  74 |    "outputs": [],
  75 |    "source": [
  76 |     "df.index.name = \"user_id\"\n",
  77 |     "df"
  78 |    ]
  79 |   },
  80 |   {
  81 |    "cell_type": "code",
  82 |    "execution_count": null,
  83 |    "metadata": {},
  84 |    "outputs": [],
  85 |    "source": [
  86 |     "# \"reset_index\" turns the index into a column, replacing the\n",
  87 |     "# index with the default index. This corresponds to the DataFrame\n",
  88 |     "# from the beginning that we loaded from Excel.\n",
  89 |     "df.reset_index()"
  90 |    ]
  91 |   },
  92 |   {
  93 |    "cell_type": "code",
  94 |    "execution_count": null,
  95 |    "metadata": {},
  96 |    "outputs": [],
  97 |    "source": [
  98 |     "# \"reset_index\" turns \"user_id\" into a regular column and\n",
  99 |     "# \"set_index\" turns the column \"name\" into the index\n",
 100 |     "df.reset_index().set_index(\"name\")"
 101 |    ]
 102 |   },
 103 |   {
 104 |    "cell_type": "code",
 105 |    "execution_count": null,
 106 |    "metadata": {},
 107 |    "outputs": [],
 108 |    "source": [
 109 |     "df.reindex([999, 1000, 1001, 1004])"
 110 |    ]
 111 |   },
 112 |   {
 113 |    "cell_type": "code",
 114 |    "execution_count": null,
 115 |    "metadata": {},
 116 |    "outputs": [],
 117 |    "source": [
 118 |     "df.sort_index()"
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "code",
 123 |    "execution_count": null,
 124 |    "metadata": {},
 125 |    "outputs": [],
 126 |    "source": [
 127 |     "df.sort_values([\"continent\", \"age\"])"
 128 |    ]
 129 |   },
 130 |   {
 131 |    "cell_type": "markdown",
 132 |    "metadata": {},
 133 |    "source": [
 134 |     "## Columns"
 135 |    ]
 136 |   },
 137 |   {
 138 |    "cell_type": "code",
 139 |    "execution_count": null,
 140 |    "metadata": {},
 141 |    "outputs": [],
 142 |    "source": [
 143 |     "df.columns"
 144 |    ]
 145 |   },
 146 |   {
 147 |    "cell_type": "code",
 148 |    "execution_count": null,
 149 |    "metadata": {},
 150 |    "outputs": [],
 151 |    "source": [
 152 |     "df.columns.name = \"properties\"\n",
 153 |     "df"
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "code",
 158 |    "execution_count": null,
 159 |    "metadata": {},
 160 |    "outputs": [],
 161 |    "source": [
 162 |     "df.rename(columns={\"name\": \"First Name\", \"age\": \"Age\"})"
 163 |    ]
 164 |   },
 165 |   {
 166 |    "cell_type": "code",
 167 |    "execution_count": null,
 168 |    "metadata": {},
 169 |    "outputs": [],
 170 |    "source": [
 171 |     "df.drop(columns=[\"name\", \"country\"],\n",
 172 |     "        index=[1000, 1003])"
 173 |    ]
 174 |   },
 175 |   {
 176 |    "cell_type": "code",
 177 |    "execution_count": null,
 178 |    "metadata": {},
 179 |    "outputs": [],
 180 |    "source": [
 181 |     "df.T  # Shortcut for df.transpose()"
 182 |    ]
 183 |   },
 184 |   {
 185 |    "cell_type": "code",
 186 |    "execution_count": null,
 187 |    "metadata": {},
 188 |    "outputs": [],
 189 |    "source": [
 190 |     "df.loc[:, [\"continent\", \"country\", \"name\", \"age\", \"score\"]]"
 191 |    ]
 192 |   },
 193 |   {
 194 |    "cell_type": "markdown",
 195 |    "metadata": {},
 196 |    "source": [
 197 |     "# Data Manipulation\n",
 198 |     "## Selecting Data"
 199 |    ]
 200 |   },
 201 |   {
 202 |    "cell_type": "code",
 203 |    "execution_count": null,
 204 |    "metadata": {},
 205 |    "outputs": [],
 206 |    "source": [
 207 |     "# Using scalars for both row and column selection returns a scalar\n",
 208 |     "df.loc[1001, \"name\"]"
 209 |    ]
 210 |   },
 211 |   {
 212 |    "cell_type": "code",
 213 |    "execution_count": null,
 214 |    "metadata": {},
 215 |    "outputs": [],
 216 |    "source": [
 217 |     "# Using a scalar on either the row or column selection returns a Series\n",
 218 |     "df.loc[[1001, 1002], \"age\"]"
 219 |    ]
 220 |   },
 221 |   {
 222 |    "cell_type": "code",
 223 |    "execution_count": null,
 224 |    "metadata": {},
 225 |    "outputs": [],
 226 |    "source": [
 227 |     "# Selecting multiple rows and columns returns a DataFrame\n",
 228 |     "df.loc[:1002, [\"name\", \"country\"]]"
 229 |    ]
 230 |   },
 231 |   {
 232 |    "cell_type": "code",
 233 |    "execution_count": null,
 234 |    "metadata": {},
 235 |    "outputs": [],
 236 |    "source": [
 237 |     "df.iloc[0, 0]  # Returns a Scalar"
 238 |    ]
 239 |   },
 240 |   {
 241 |    "cell_type": "code",
 242 |    "execution_count": null,
 243 |    "metadata": {},
 244 |    "outputs": [],
 245 |    "source": [
 246 |     "df.iloc[[0, 2], 1]  # Returns a Series"
 247 |    ]
 248 |   },
 249 |   {
 250 |    "cell_type": "code",
 251 |    "execution_count": null,
 252 |    "metadata": {},
 253 |    "outputs": [],
 254 |    "source": [
 255 |     "df.iloc[:3, [0, 2]]  # Returns a DataFrame"
 256 |    ]
 257 |   },
 258 |   {
 259 |    "cell_type": "code",
 260 |    "execution_count": null,
 261 |    "metadata": {},
 262 |    "outputs": [],
 263 |    "source": [
 264 |     "tf = (df[\"age\"] > 40) & (df[\"country\"] == \"USA\")\n",
 265 |     "tf  # This is a Series with only True/False"
 266 |    ]
 267 |   },
 268 |   {
 269 |    "cell_type": "code",
 270 |    "execution_count": null,
 271 |    "metadata": {},
 272 |    "outputs": [],
 273 |    "source": [
 274 |     "df.loc[tf, :]"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "code",
 279 |    "execution_count": null,
 280 |    "metadata": {},
 281 |    "outputs": [],
 282 |    "source": [
 283 |     "df.loc[df.index > 1001, :]"
 284 |    ]
 285 |   },
 286 |   {
 287 |    "cell_type": "code",
 288 |    "execution_count": null,
 289 |    "metadata": {},
 290 |    "outputs": [],
 291 |    "source": [
 292 |     "df.loc[df[\"country\"].isin([\"Italy\", \"Germany\"]), :]"
 293 |    ]
 294 |   },
 295 |   {
 296 |    "cell_type": "code",
 297 |    "execution_count": null,
 298 |    "metadata": {},
 299 |    "outputs": [],
 300 |    "source": [
 301 |     "# This could be the yearly rainfall in millimeters\n",
 302 |     "rainfall = pd.DataFrame(data={\"City 1\": [300.1, 100.2],\n",
 303 |     "                              \"City 2\": [400.3, 300.4],\n",
 304 |     "                              \"City 3\": [1000.5, 1100.6]})\n",
 305 |     "rainfall"
 306 |    ]
 307 |   },
 308 |   {
 309 |    "cell_type": "code",
 310 |    "execution_count": null,
 311 |    "metadata": {},
 312 |    "outputs": [],
 313 |    "source": [
 314 |     "rainfall < 400"
 315 |    ]
 316 |   },
 317 |   {
 318 |    "cell_type": "code",
 319 |    "execution_count": null,
 320 |    "metadata": {},
 321 |    "outputs": [],
 322 |    "source": [
 323 |     "rainfall[rainfall < 400]"
 324 |    ]
 325 |   },
 326 |   {
 327 |    "cell_type": "code",
 328 |    "execution_count": null,
 329 |    "metadata": {},
 330 |    "outputs": [],
 331 |    "source": [
 332 |     "# A MultiIndex needs to be sorted\n",
 333 |     "df_multi = df.reset_index().set_index([\"continent\", \"country\"])\n",
 334 |     "df_multi = df_multi.sort_index()\n",
 335 |     "df_multi"
 336 |    ]
 337 |   },
 338 |   {
 339 |    "cell_type": "code",
 340 |    "execution_count": null,
 341 |    "metadata": {},
 342 |    "outputs": [],
 343 |    "source": [
 344 |     "df_multi.loc[\"Europe\", :]"
 345 |    ]
 346 |   },
 347 |   {
 348 |    "cell_type": "code",
 349 |    "execution_count": null,
 350 |    "metadata": {},
 351 |    "outputs": [],
 352 |    "source": [
 353 |     "df_multi.loc[(\"Europe\", \"Italy\"), :]"
 354 |    ]
 355 |   },
 356 |   {
 357 |    "cell_type": "code",
 358 |    "execution_count": null,
 359 |    "metadata": {},
 360 |    "outputs": [],
 361 |    "source": [
 362 |     "df_multi.reset_index(level=0)"
 363 |    ]
 364 |   },
 365 |   {
 366 |    "cell_type": "markdown",
 367 |    "metadata": {},
 368 |    "source": [
 369 |     "## Setting Data"
 370 |    ]
 371 |   },
 372 |   {
 373 |    "cell_type": "code",
 374 |    "execution_count": null,
 375 |    "metadata": {},
 376 |    "outputs": [],
 377 |    "source": [
 378 |     "# Copy the DataFrame first to leave the original untouched\n",
 379 |     "df2 = df.copy()"
 380 |    ]
 381 |   },
 382 |   {
 383 |    "cell_type": "code",
 384 |    "execution_count": null,
 385 |    "metadata": {},
 386 |    "outputs": [],
 387 |    "source": [
 388 |     "df2.loc[1000, \"name\"] = \"JOHN\"\n",
 389 |     "df2"
 390 |    ]
 391 |   },
 392 |   {
 393 |    "cell_type": "code",
 394 |    "execution_count": null,
 395 |    "metadata": {},
 396 |    "outputs": [],
 397 |    "source": [
 398 |     "df2.loc[[1000, 1001], \"score\"] = [3, 4]\n",
 399 |     "df2"
 400 |    ]
 401 |   },
 402 |   {
 403 |    "cell_type": "code",
 404 |    "execution_count": null,
 405 |    "metadata": {},
 406 |    "outputs": [],
 407 |    "source": [
 408 |     "tf = (df2[\"age\"] < 20) | (df2[\"country\"] == \"USA\")\n",
 409 |     "df2.loc[tf, \"name\"] = \"xxx\"\n",
 410 |     "df2"
 411 |    ]
 412 |   },
 413 |   {
 414 |    "cell_type": "code",
 415 |    "execution_count": null,
 416 |    "metadata": {},
 417 |    "outputs": [],
 418 |    "source": [
 419 |     "# Copy the DataFrame first to leave the original untouched\n",
 420 |     "rainfall2 = rainfall.copy()\n",
 421 |     "rainfall2"
 422 |    ]
 423 |   },
 424 |   {
 425 |    "cell_type": "code",
 426 |    "execution_count": null,
 427 |    "metadata": {},
 428 |    "outputs": [],
 429 |    "source": [
 430 |     "# Set the values to 0 wherever they are below 400\n",
 431 |     "rainfall2[rainfall2 < 400] = 0\n",
 432 |     "rainfall2"
 433 |    ]
 434 |   },
 435 |   {
 436 |    "cell_type": "code",
 437 |    "execution_count": null,
 438 |    "metadata": {},
 439 |    "outputs": [],
 440 |    "source": [
 441 |     "df2.replace(\"USA\", \"U.S.\")"
 442 |    ]
 443 |   },
 444 |   {
 445 |    "cell_type": "code",
 446 |    "execution_count": null,
 447 |    "metadata": {},
 448 |    "outputs": [],
 449 |    "source": [
 450 |     "df2.loc[:, \"discount\"] = 0\n",
 451 |     "df2.loc[:, \"price\"] = [49.9, 49.9, 99.9, 99.9]\n",
 452 |     "df2"
 453 |    ]
 454 |   },
 455 |   {
 456 |    "cell_type": "code",
 457 |    "execution_count": null,
 458 |    "metadata": {},
 459 |    "outputs": [],
 460 |    "source": [
 461 |     "df2 = df.copy()  # Let's start with a fresh copy\n",
 462 |     "df2.loc[:, \"birth year\"] = 2021 - df2[\"age\"]\n",
 463 |     "df2"
 464 |    ]
 465 |   },
 466 |   {
 467 |    "cell_type": "markdown",
 468 |    "metadata": {},
 469 |    "source": [
 470 |     "## Missing Data"
 471 |    ]
 472 |   },
 473 |   {
 474 |    "cell_type": "code",
 475 |    "execution_count": null,
 476 |    "metadata": {},
 477 |    "outputs": [],
 478 |    "source": [
 479 |     "df2 = df.copy() # Let's start with a fresh copy\n",
 480 |     "df2.loc[1000, \"score\"] = None\n",
 481 |     "df2.loc[1003, :] = None\n",
 482 |     "df2"
 483 |    ]
 484 |   },
 485 |   {
 486 |    "cell_type": "code",
 487 |    "execution_count": null,
 488 |    "metadata": {},
 489 |    "outputs": [],
 490 |    "source": [
 491 |     "df2.dropna()"
 492 |    ]
 493 |   },
 494 |   {
 495 |    "cell_type": "code",
 496 |    "execution_count": null,
 497 |    "metadata": {},
 498 |    "outputs": [],
 499 |    "source": [
 500 |     "df2.dropna(how=\"all\")"
 501 |    ]
 502 |   },
 503 |   {
 504 |    "cell_type": "code",
 505 |    "execution_count": null,
 506 |    "metadata": {},
 507 |    "outputs": [],
 508 |    "source": [
 509 |     "df2.isna()"
 510 |    ]
 511 |   },
 512 |   {
 513 |    "cell_type": "code",
 514 |    "execution_count": null,
 515 |    "metadata": {},
 516 |    "outputs": [],
 517 |    "source": [
 518 |     "df2.fillna({\"score\": df2[\"score\"].mean()})"
 519 |    ]
 520 |   },
 521 |   {
 522 |    "cell_type": "markdown",
 523 |    "metadata": {},
 524 |    "source": [
 525 |     "## Duplicate Data"
 526 |    ]
 527 |   },
 528 |   {
 529 |    "cell_type": "code",
 530 |    "execution_count": null,
 531 |    "metadata": {},
 532 |    "outputs": [],
 533 |    "source": [
 534 |     "df.drop_duplicates([\"country\", \"continent\"])"
 535 |    ]
 536 |   },
 537 |   {
 538 |    "cell_type": "code",
 539 |    "execution_count": null,
 540 |    "metadata": {},
 541 |    "outputs": [],
 542 |    "source": [
 543 |     "df[\"country\"].is_unique"
 544 |    ]
 545 |   },
 546 |   {
 547 |    "cell_type": "code",
 548 |    "execution_count": null,
 549 |    "metadata": {},
 550 |    "outputs": [],
 551 |    "source": [
 552 |     "df[\"country\"].unique()"
 553 |    ]
 554 |   },
 555 |   {
 556 |    "cell_type": "code",
 557 |    "execution_count": null,
 558 |    "metadata": {},
 559 |    "outputs": [],
 560 |    "source": [
 561 |     "# By default, it marks only duplicates as True, i.e.\n",
 562 |     "# without the first occurrence\n",
 563 |     "df[\"country\"].duplicated()"
 564 |    ]
 565 |   },
 566 |   {
 567 |    "cell_type": "code",
 568 |    "execution_count": null,
 569 |    "metadata": {},
 570 |    "outputs": [],
 571 |    "source": [
 572 |     "# To get all rows where \"country\" is duplicated, use\n",
 573 |     "# keep=False\n",
 574 |     "df.loc[df[\"country\"].duplicated(keep=False), :]"
 575 |    ]
 576 |   },
 577 |   {
 578 |    "cell_type": "markdown",
 579 |    "metadata": {},
 580 |    "source": [
 581 |     "## Arithmetic Operations"
 582 |    ]
 583 |   },
 584 |   {
 585 |    "cell_type": "code",
 586 |    "execution_count": null,
 587 |    "metadata": {},
 588 |    "outputs": [],
 589 |    "source": [
 590 |     "rainfall"
 591 |    ]
 592 |   },
 593 |   {
 594 |    "cell_type": "code",
 595 |    "execution_count": null,
 596 |    "metadata": {},
 597 |    "outputs": [],
 598 |    "source": [
 599 |     "rainfall + 100"
 600 |    ]
 601 |   },
 602 |   {
 603 |    "cell_type": "code",
 604 |    "execution_count": null,
 605 |    "metadata": {},
 606 |    "outputs": [],
 607 |    "source": [
 608 |     "more_rainfall = pd.DataFrame(data=[[100, 200], [300, 400]],\n",
 609 |     "                             index=[1, 2],\n",
 610 |     "                             columns=[\"City 1\", \"City 4\"])\n",
 611 |     "more_rainfall"
 612 |    ]
 613 |   },
 614 |   {
 615 |    "cell_type": "code",
 616 |    "execution_count": null,
 617 |    "metadata": {},
 618 |    "outputs": [],
 619 |    "source": [
 620 |     "rainfall + more_rainfall"
 621 |    ]
 622 |   },
 623 |   {
 624 |    "cell_type": "code",
 625 |    "execution_count": null,
 626 |    "metadata": {},
 627 |    "outputs": [],
 628 |    "source": [
 629 |     "rainfall.add(more_rainfall, fill_value=0)"
 630 |    ]
 631 |   },
 632 |   {
 633 |    "cell_type": "code",
 634 |    "execution_count": null,
 635 |    "metadata": {},
 636 |    "outputs": [],
 637 |    "source": [
 638 |     "# A Series taken from a row\n",
 639 |     "rainfall.loc[1, :]"
 640 |    ]
 641 |   },
 642 |   {
 643 |    "cell_type": "code",
 644 |    "execution_count": null,
 645 |    "metadata": {},
 646 |    "outputs": [],
 647 |    "source": [
 648 |     "rainfall + rainfall.loc[1, :]"
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "code",
 653 |    "execution_count": null,
 654 |    "metadata": {},
 655 |    "outputs": [],
 656 |    "source": [
 657 |     "# A Series taken from a column\n",
 658 |     "rainfall.loc[:, \"City 2\"]"
 659 |    ]
 660 |   },
 661 |   {
 662 |    "cell_type": "code",
 663 |    "execution_count": null,
 664 |    "metadata": {},
 665 |    "outputs": [],
 666 |    "source": [
 667 |     "rainfall.add(rainfall.loc[:, \"City 2\"], axis=0)"
 668 |    ]
 669 |   },
 670 |   {
 671 |    "cell_type": "code",
 672 |    "execution_count": null,
 673 |    "metadata": {},
 674 |    "outputs": [],
 675 |    "source": [
 676 |     "# Let's create a new DataFrame\n",
 677 |     "users = pd.DataFrame(data=[\" mArk \", \"JOHN  \", \"Tim\", \" jenny\"],\n",
 678 |     "                     columns=[\"name\"])\n",
 679 |     "users"
 680 |    ]
 681 |   },
 682 |   {
 683 |    "cell_type": "code",
 684 |    "execution_count": null,
 685 |    "metadata": {},
 686 |    "outputs": [],
 687 |    "source": [
 688 |     "users_cleaned = users.loc[:, \"name\"].str.strip().str.capitalize()\n",
 689 |     "users_cleaned"
 690 |    ]
 691 |   },
 692 |   {
 693 |    "cell_type": "code",
 694 |    "execution_count": null,
 695 |    "metadata": {},
 696 |    "outputs": [],
 697 |    "source": [
 698 |     "users_cleaned.str.startswith(\"J\")"
 699 |    ]
 700 |   },
 701 |   {
 702 |    "cell_type": "markdown",
 703 |    "metadata": {},
 704 |    "source": [
 705 |     "## Applying a Function"
 706 |    ]
 707 |   },
 708 |   {
 709 |    "cell_type": "code",
 710 |    "execution_count": null,
 711 |    "metadata": {},
 712 |    "outputs": [],
 713 |    "source": [
 714 |     "rainfall"
 715 |    ]
 716 |   },
 717 |   {
 718 |    "cell_type": "code",
 719 |    "execution_count": null,
 720 |    "metadata": {},
 721 |    "outputs": [],
 722 |    "source": [
 723 |     "def format_string(x):\n",
 724 |     "    return f\"{x:,.2f}\""
 725 |    ]
 726 |   },
 727 |   {
 728 |    "cell_type": "code",
 729 |    "execution_count": null,
 730 |    "metadata": {},
 731 |    "outputs": [],
 732 |    "source": [
 733 |     "# Note that we pass in the function without calling it,\n",
 734 |     "# i.e., format_string and not format_string()!\n",
 735 |     "rainfall.applymap(format_string)"
 736 |    ]
 737 |   },
 738 |   {
 739 |    "cell_type": "code",
 740 |    "execution_count": null,
 741 |    "metadata": {},
 742 |    "outputs": [],
 743 |    "source": [
 744 |     "rainfall.applymap(lambda x: f\"{x:,.2f}\")"
 745 |    ]
 746 |   },
 747 |   {
 748 |    "cell_type": "markdown",
 749 |    "metadata": {},
 750 |    "source": [
 751 |     "# Combining DataFrames\n",
 752 |     "## Concatenating"
 753 |    ]
 754 |   },
 755 |   {
 756 |    "cell_type": "code",
 757 |    "execution_count": null,
 758 |    "metadata": {},
 759 |    "outputs": [],
 760 |    "source": [
 761 |     "data = [[15, \"France\", 4.1, \"Becky\"],\n",
 762 |     "        [44, \"Canada\", 6.1, \"Leanne\"]]\n",
 763 |     "more_users = pd.DataFrame(data=data,\n",
 764 |     "                          columns=[\"age\", \"country\", \"score\", \"name\"],\n",
 765 |     "                          index=[1000, 1011])\n",
 766 |     "more_users"
 767 |    ]
 768 |   },
 769 |   {
 770 |    "cell_type": "code",
 771 |    "execution_count": null,
 772 |    "metadata": {},
 773 |    "outputs": [],
 774 |    "source": [
 775 |     "pd.concat([df, more_users], axis=0)"
 776 |    ]
 777 |   },
 778 |   {
 779 |    "cell_type": "code",
 780 |    "execution_count": null,
 781 |    "metadata": {},
 782 |    "outputs": [],
 783 |    "source": [
 784 |     "data = [[3, 4],\n",
 785 |     "        [5, 6]]\n",
 786 |     "more_categories = pd.DataFrame(data=data,\n",
 787 |     "                               columns=[\"quizzes\", \"logins\"],\n",
 788 |     "                               index=[1000, 2000])\n",
 789 |     "more_categories"
 790 |    ]
 791 |   },
 792 |   {
 793 |    "cell_type": "code",
 794 |    "execution_count": null,
 795 |    "metadata": {},
 796 |    "outputs": [],
 797 |    "source": [
 798 |     "pd.concat([df, more_categories], axis=1)"
 799 |    ]
 800 |   },
 801 |   {
 802 |    "cell_type": "markdown",
 803 |    "metadata": {},
 804 |    "source": [
 805 |     "## Joining and Merging"
 806 |    ]
 807 |   },
 808 |   {
 809 |    "cell_type": "code",
 810 |    "execution_count": null,
 811 |    "metadata": {},
 812 |    "outputs": [],
 813 |    "source": [
 814 |     "df1 = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]],\n",
 815 |     "                   columns=[\"A\", \"B\"])\n",
 816 |     "df1"
 817 |    ]
 818 |   },
 819 |   {
 820 |    "cell_type": "code",
 821 |    "execution_count": null,
 822 |    "metadata": {},
 823 |    "outputs": [],
 824 |    "source": [
 825 |     "df2 = pd.DataFrame(data=[[10, 20], [30, 40]],\n",
 826 |     "                   columns=[\"C\", \"D\"], index=[1, 3])\n",
 827 |     "df2"
 828 |    ]
 829 |   },
 830 |   {
 831 |    "cell_type": "code",
 832 |    "execution_count": null,
 833 |    "metadata": {},
 834 |    "outputs": [],
 835 |    "source": [
 836 |     "df1.join(df2, how=\"inner\")"
 837 |    ]
 838 |   },
 839 |   {
 840 |    "cell_type": "code",
 841 |    "execution_count": null,
 842 |    "metadata": {},
 843 |    "outputs": [],
 844 |    "source": [
 845 |     "df1.join(df2, how=\"left\")"
 846 |    ]
 847 |   },
 848 |   {
 849 |    "cell_type": "code",
 850 |    "execution_count": null,
 851 |    "metadata": {},
 852 |    "outputs": [],
 853 |    "source": [
 854 |     "df1.join(df2, how=\"right\")"
 855 |    ]
 856 |   },
 857 |   {
 858 |    "cell_type": "code",
 859 |    "execution_count": null,
 860 |    "metadata": {},
 861 |    "outputs": [],
 862 |    "source": [
 863 |     "df1.join(df2, how=\"outer\")"
 864 |    ]
 865 |   },
 866 |   {
 867 |    "cell_type": "code",
 868 |    "execution_count": null,
 869 |    "metadata": {},
 870 |    "outputs": [],
 871 |    "source": [
 872 |     "# Add a column called \"category\" to both DataFrames\n",
 873 |     "df1[\"category\"] = [\"a\", \"b\", \"c\"]\n",
 874 |     "df2[\"category\"] = [\"c\", \"b\"]"
 875 |    ]
 876 |   },
 877 |   {
 878 |    "cell_type": "code",
 879 |    "execution_count": null,
 880 |    "metadata": {},
 881 |    "outputs": [],
 882 |    "source": [
 883 |     "df1"
 884 |    ]
 885 |   },
 886 |   {
 887 |    "cell_type": "code",
 888 |    "execution_count": null,
 889 |    "metadata": {},
 890 |    "outputs": [],
 891 |    "source": [
 892 |     "df2"
 893 |    ]
 894 |   },
 895 |   {
 896 |    "cell_type": "code",
 897 |    "execution_count": null,
 898 |    "metadata": {},
 899 |    "outputs": [],
 900 |    "source": [
 901 |     "df1.merge(df2, how=\"inner\", on=[\"category\"])"
 902 |    ]
 903 |   },
 904 |   {
 905 |    "cell_type": "code",
 906 |    "execution_count": null,
 907 |    "metadata": {},
 908 |    "outputs": [],
 909 |    "source": [
 910 |     "df1.merge(df2, how=\"left\", on=[\"category\"])"
 911 |    ]
 912 |   },
 913 |   {
 914 |    "cell_type": "markdown",
 915 |    "metadata": {},
 916 |    "source": [
 917 |     "# Data Aggregation and Descriptive Statistics\n",
 918 |     "## Descriptive Statistics"
 919 |    ]
 920 |   },
 921 |   {
 922 |    "cell_type": "code",
 923 |    "execution_count": null,
 924 |    "metadata": {},
 925 |    "outputs": [],
 926 |    "source": [
 927 |     "rainfall"
 928 |    ]
 929 |   },
 930 |   {
 931 |    "cell_type": "code",
 932 |    "execution_count": null,
 933 |    "metadata": {},
 934 |    "outputs": [],
 935 |    "source": [
 936 |     "rainfall.mean()"
 937 |    ]
 938 |   },
 939 |   {
 940 |    "cell_type": "code",
 941 |    "execution_count": null,
 942 |    "metadata": {},
 943 |    "outputs": [],
 944 |    "source": [
 945 |     "rainfall.mean(axis=1)"
 946 |    ]
 947 |   },
 948 |   {
 949 |    "cell_type": "markdown",
 950 |    "metadata": {},
 951 |    "source": [
 952 |     "## Grouping"
 953 |    ]
 954 |   },
 955 |   {
 956 |    "cell_type": "code",
 957 |    "execution_count": null,
 958 |    "metadata": {},
 959 |    "outputs": [],
 960 |    "source": [
 961 |     "df.groupby([\"continent\"]).mean()"
 962 |    ]
 963 |   },
 964 |   {
 965 |    "cell_type": "code",
 966 |    "execution_count": null,
 967 |    "metadata": {},
 968 |    "outputs": [],
 969 |    "source": [
 970 |     "df.groupby([\"continent\", \"country\"]).mean()"
 971 |    ]
 972 |   },
 973 |   {
 974 |    "cell_type": "code",
 975 |    "execution_count": null,
 976 |    "metadata": {},
 977 |    "outputs": [],
 978 |    "source": [
 979 |     "selection = df.loc[:, [\"age\", \"score\", \"continent\"]]\n",
 980 |     "selection.groupby([\"continent\"]).agg(lambda x: x.max() - x.min())"
 981 |    ]
 982 |   },
 983 |   {
 984 |    "cell_type": "markdown",
 985 |    "metadata": {},
 986 |    "source": [
 987 |     "## Pivoting and Melting"
 988 |    ]
 989 |   },
 990 |   {
 991 |    "cell_type": "code",
 992 |    "execution_count": null,
 993 |    "metadata": {},
 994 |    "outputs": [],
 995 |    "source": [
 996 |     "data = [[\"Oranges\", \"North\", 12.30],\n",
 997 |     "        [\"Apples\", \"South\", 10.55],\n",
 998 |     "        [\"Oranges\", \"South\", 22.00],\n",
 999 |     "        [\"Bananas\", \"South\", 5.90],\n",
1000 |     "        [\"Bananas\", \"North\", 31.30],\n",
1001 |     "        [\"Oranges\", \"North\", 13.10]]\n",
1002 |     "\n",
1003 |     "sales = pd.DataFrame(data=data,\n",
1004 |     "                     columns=[\"Fruit\", \"Region\", \"Revenue\"])\n",
1005 |     "sales"
1006 |    ]
1007 |   },
1008 |   {
1009 |    "cell_type": "code",
1010 |    "execution_count": null,
1011 |    "metadata": {},
1012 |    "outputs": [],
1013 |    "source": [
1014 |     "pivot = pd.pivot_table(sales,\n",
1015 |     "                       index=\"Fruit\", columns=\"Region\",\n",
1016 |     "                       values=\"Revenue\", aggfunc=\"sum\",\n",
1017 |     "                       margins=True, margins_name=\"Total\")\n",
1018 |     "pivot"
1019 |    ]
1020 |   },
1021 |   {
1022 |    "cell_type": "code",
1023 |    "execution_count": null,
1024 |    "metadata": {},
1025 |    "outputs": [],
1026 |    "source": [
1027 |     "pd.melt(pivot.iloc[:-1,:-1].reset_index(),\n",
1028 |     "        id_vars=\"Fruit\",\n",
1029 |     "        value_vars=[\"North\", \"South\"], value_name=\"Revenue\")"
1030 |    ]
1031 |   },
1032 |   {
1033 |    "cell_type": "markdown",
1034 |    "metadata": {},
1035 |    "source": [
1036 |     "# Plotting\n",
1037 |     "## Matplotlib"
1038 |    ]
1039 |   },
1040 |   {
1041 |    "cell_type": "code",
1042 |    "execution_count": null,
1043 |    "metadata": {},
1044 |    "outputs": [],
1045 |    "source": [
1046 |     "import numpy as np\n",
1047 |     "%matplotlib inline\n",
1048 |     "# Or %matplotlib notebook"
1049 |    ]
1050 |   },
1051 |   {
1052 |    "cell_type": "code",
1053 |    "execution_count": null,
1054 |    "metadata": {},
1055 |    "outputs": [],
1056 |    "source": [
1057 |     "data = pd.DataFrame(data=np.random.rand(4, 4) * 100000,\n",
1058 |     "                    index=[\"Q1\", \"Q2\", \"Q3\", \"Q4\"],\n",
1059 |     "                    columns=[\"East\", \"West\", \"North\", \"South\"])\n",
1060 |     "data.index.name = \"Quarters\"\n",
1061 |     "data.columns.name = \"Region\"\n",
1062 |     "data"
1063 |    ]
1064 |   },
1065 |   {
1066 |    "cell_type": "code",
1067 |    "execution_count": null,
1068 |    "metadata": {},
1069 |    "outputs": [],
1070 |    "source": [
1071 |     "data.plot()  # Shortcut for data.plot.line()"
1072 |    ]
1073 |   },
1074 |   {
1075 |    "cell_type": "markdown",
1076 |    "metadata": {},
1077 |    "source": [
1078 |     "## Plotly"
1079 |    ]
1080 |   },
1081 |   {
1082 |    "cell_type": "code",
1083 |    "execution_count": null,
1084 |    "metadata": {},
1085 |    "outputs": [],
1086 |    "source": [
1087 |     "# Set the plotting backend to Plotly\n",
1088 |     "pd.options.plotting.backend = \"plotly\""
1089 |    ]
1090 |   },
1091 |   {
1092 |    "cell_type": "code",
1093 |    "execution_count": null,
1094 |    "metadata": {},
1095 |    "outputs": [],
1096 |    "source": [
1097 |     "data.plot()"
1098 |    ]
1099 |   },
1100 |   {
1101 |    "cell_type": "code",
1102 |    "execution_count": null,
1103 |    "metadata": {},
1104 |    "outputs": [],
1105 |    "source": [
1106 |     "# Display the same data as bar plot\n",
1107 |     "data.plot.bar(barmode=\"group\")"
1108 |    ]
1109 |   },
1110 |   {
1111 |    "cell_type": "markdown",
1112 |    "metadata": {},
1113 |    "source": [
1114 |     "# Data Import and Export\n",
1115 |     "## Exporting to a CSV file"
1116 |    ]
1117 |   },
1118 |   {
1119 |    "cell_type": "code",
1120 |    "execution_count": null,
1121 |    "metadata": {},
1122 |    "outputs": [],
1123 |    "source": [
1124 |     "df.to_csv(\"course_participants.csv\")"
1125 |    ]
1126 |   },
1127 |   {
1128 |    "cell_type": "markdown",
1129 |    "metadata": {},
1130 |    "source": [
1131 |     "## Importing a CSV file"
1132 |    ]
1133 |   },
1134 |   {
1135 |    "cell_type": "code",
1136 |    "execution_count": null,
1137 |    "metadata": {},
1138 |    "outputs": [],
1139 |    "source": [
1140 |     "msft = pd.read_csv(\"csv/MSFT.csv\")"
1141 |    ]
1142 |   },
1143 |   {
1144 |    "cell_type": "code",
1145 |    "execution_count": null,
1146 |    "metadata": {},
1147 |    "outputs": [],
1148 |    "source": [
1149 |     "msft.info()"
1150 |    ]
1151 |   },
1152 |   {
1153 |    "cell_type": "code",
1154 |    "execution_count": null,
1155 |    "metadata": {},
1156 |    "outputs": [],
1157 |    "source": [
1158 |     "# I am selecting a few columns because of space issues\n",
1159 |     "# You can also just run: msft.head()\n",
1160 |     "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].head()"
1161 |    ]
1162 |   },
1163 |   {
1164 |    "cell_type": "code",
1165 |    "execution_count": null,
1166 |    "metadata": {},
1167 |    "outputs": [],
1168 |    "source": [
1169 |     "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].tail(2)"
1170 |    ]
1171 |   },
1172 |   {
1173 |    "cell_type": "code",
1174 |    "execution_count": null,
1175 |    "metadata": {},
1176 |    "outputs": [],
1177 |    "source": [
1178 |     "msft.loc[:, [\"Adj Close\", \"Volume\"]].describe()"
1179 |    ]
1180 |   },
1181 |   {
1182 |    "cell_type": "code",
1183 |    "execution_count": null,
1184 |    "metadata": {},
1185 |    "outputs": [],
1186 |    "source": [
1187 |     "# The line break in the URL is only to make it fit on the page\n",
1188 |     "url = (\"https://raw.githubusercontent.com/fzumstein/\"\n",
1189 |     "       \"python-for-excel/1st-edition/csv/MSFT.csv\")\n",
1190 |     "msft = pd.read_csv(url)"
1191 |    ]
1192 |   },
1193 |   {
1194 |    "cell_type": "code",
1195 |    "execution_count": null,
1196 |    "metadata": {},
1197 |    "outputs": [],
1198 |    "source": [
1199 |     "msft.loc[:, [\"Date\", \"Adj Close\", \"Volume\"]].head(2)"
1200 |    ]
1201 |   }
1202 |  ],
1203 |  "metadata": {
1204 |   "kernelspec": {
1205 |    "display_name": "Python 3",
1206 |    "language": "python",
1207 |    "name": "python3"
1208 |   },
1209 |   "language_info": {
1210 |    "codemirror_mode": {
1211 |     "name": "ipython",
1212 |     "version": 3
1213 |    },
1214 |    "file_extension": ".py",
1215 |    "mimetype": "text/x-python",
1216 |    "name": "python",
1217 |    "nbconvert_exporter": "python",
1218 |    "pygments_lexer": "ipython3",
1219 |    "version": "3.7.4"
1220 |   }
1221 |  },
1222 |  "nbformat": 4,
1223 |  "nbformat_minor": 4
1224 | }
1225 | 


--------------------------------------------------------------------------------
/ch06.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Time Series\n",
  8 |     "## DatetimeIndex"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "# Let's start by importing the packages we use in this chapter\n",
 18 |     "# and by setting the plotting backend to Plotly\n",
 19 |     "import pandas as pd\n",
 20 |     "import numpy as np\n",
 21 |     "pd.options.plotting.backend = \"plotly\""
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# This creates a DatetimeIndex based on a start timestamp,\n",
 31 |     "# number of periods and frequency (\"D\" = daily).\n",
 32 |     "daily_index = pd.date_range(\"2020-02-28\", periods=4, freq=\"D\")\n",
 33 |     "daily_index"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# This creates a DatetimeIndex based on start/end timestamp.\n",
 43 |     "# The frequency is set to \"weekly on Sundays\" (\"W-SUN\").\n",
 44 |     "weekly_index = pd.date_range(\"2020-01-01\", \"2020-01-31\", freq=\"W-SUN\")\n",
 45 |     "weekly_index"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# Construct a DataFrame based on the weekly_index. This could be\n",
 55 |     "# the visitor count of a museum that only opens on Sundays.\n",
 56 |     "pd.DataFrame(data=[21, 15, 33, 34],\n",
 57 |     "             columns=[\"visitors\"], index=weekly_index)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "msft = pd.read_csv(\"csv/MSFT.csv\")"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "msft.info()"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "msft.loc[:, \"Date\"] = pd.to_datetime(msft[\"Date\"])"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "msft.dtypes"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "msft = pd.read_csv(\"csv/MSFT.csv\",\n",
103 |     "                   index_col=\"Date\", parse_dates=[\"Date\"])"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "msft.info()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "msft.loc[:, \"Volume\"] = msft[\"Volume\"].astype(\"float\")\n",
122 |     "msft[\"Volume\"].dtype"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "msft = msft.sort_index()"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "msft.index.date"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "msft.loc[\"2019\", \"Adj Close\"]"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "msft.loc[\"2019-06\":\"2020-05\", \"Adj Close\"].plot()"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "## Working with Time Zones"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "# Add the time information to the date\n",
175 |     "msft_close = msft.loc[:, [\"Adj Close\"]].copy()\n",
176 |     "msft_close.index = msft_close.index + pd.DateOffset(hours=16)\n",
177 |     "msft_close.head(2)"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "# Make the timestamps time-zone-aware\n",
187 |     "msft_close = msft_close.tz_localize(\"America/New_York\")\n",
188 |     "msft_close.head(2)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "msft_close = msft_close.tz_convert(\"UTC\")\n",
198 |     "msft_close.loc[\"2020-01-02\", \"Adj Close\"]  # 21:00 without DST"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "msft_close.loc[\"2020-05-01\", \"Adj Close\"]  # 20:00 with DST"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "## Shifting and Percentage Changes"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": [
223 |     "msft_close.head()"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "msft_close.shift(1).head()"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": [
241 |     "returns = np.log(msft_close / msft_close.shift(1))\n",
242 |     "returns = returns.rename(columns={\"Adj Close\": \"returns\"})\n",
243 |     "returns.head()"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "# Plot a histogram with the daily log returns\n",
253 |     "returns.plot.hist()"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": [
262 |     "simple_rets = msft_close.pct_change()\n",
263 |     "simple_rets = simple_rets.rename(columns={\"Adj Close\": \"simple rets\"})\n",
264 |     "simple_rets.head()"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "## Rebasing and Correlation"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {},
278 |    "outputs": [],
279 |    "source": [
280 |     "parts = []  # List to collect individual DataFrames\n",
281 |     "for ticker in [\"AAPL\", \"AMZN\", \"GOOGL\", \"MSFT\"]:\n",
282 |     "    # \"usecols\" allows us to only read in the Date and Adj Close\n",
283 |     "    # For a refresher about f-strings, see Chapter 3\n",
284 |     "    adj_close = pd.read_csv(f\"csv/{ticker}.csv\",\n",
285 |     "                            index_col=\"Date\", parse_dates=[\"Date\"],\n",
286 |     "                            usecols=[\"Date\", \"Adj Close\"])\n",
287 |     "    # Rename the column into the ticker symbol\n",
288 |     "    # (If you type this example by hand, make sure to keep the\n",
289 |     "    # following lines correctly indented!)\n",
290 |     "    adj_close = adj_close.rename(columns={\"Adj Close\": ticker})\n",
291 |     "    # Append the stock's DataFrame to the parts list\n",
292 |     "    parts.append(adj_close)"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "# Combine the 4 DataFrames into a single DataFrame\n",
302 |     "adj_close = pd.concat(parts, axis=1)\n",
303 |     "adj_close"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": null,
309 |    "metadata": {},
310 |    "outputs": [],
311 |    "source": [
312 |     "adj_close = adj_close.dropna()\n",
313 |     "adj_close.info()"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "# Use a sample from June 2019 - May 2020\n",
323 |     "adj_close_sample = adj_close.loc[\"2019-06\":\"2020-05\", :]\n",
324 |     "rebased_prices = adj_close_sample / adj_close_sample.iloc[0, :] * 100\n",
325 |     "rebased_prices.head(2)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "rebased_prices.plot()"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "# Correlation of daily log returns\n",
344 |     "returns = np.log(adj_close / adj_close.shift(1))\n",
345 |     "returns.corr()"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": null,
351 |    "metadata": {},
352 |    "outputs": [],
353 |    "source": [
354 |     "import plotly.express as px"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": null,
360 |    "metadata": {},
361 |    "outputs": [],
362 |    "source": [
363 |     "fig = px.imshow(returns.corr(),\n",
364 |     "                x=adj_close.columns,\n",
365 |     "                y=adj_close.columns,\n",
366 |     "                color_continuous_scale=list(\n",
367 |     "                    reversed(px.colors.sequential.RdBu)),\n",
368 |     "                zmin=-1, zmax=1)\n",
369 |     "fig.show()"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "markdown",
374 |    "metadata": {},
375 |    "source": [
376 |     "## Resampling"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": null,
382 |    "metadata": {},
383 |    "outputs": [],
384 |    "source": [
385 |     "end_of_month = adj_close.resample(\"M\").last()\n",
386 |     "end_of_month.head()"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": null,
392 |    "metadata": {},
393 |    "outputs": [],
394 |    "source": [
395 |     "end_of_month.resample(\"D\").asfreq().head()  # No transformation"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": null,
401 |    "metadata": {},
402 |    "outputs": [],
403 |    "source": [
404 |     "end_of_month.resample(\"W-FRI\").ffill().head()  # Forward fill"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "markdown",
409 |    "metadata": {},
410 |    "source": [
411 |     "## Rolling Windows"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": null,
417 |    "metadata": {},
418 |    "outputs": [],
419 |    "source": [
420 |     "# Plot the moving average for MSFT with data from 2019\n",
421 |     "msft19 = msft.loc[\"2019\", [\"Adj Close\"]].copy()\n",
422 |     "\n",
423 |     "# Add the 25 day moving average as a new column to the DataFrame\n",
424 |     "msft19.loc[:, \"25day average\"] = msft19[\"Adj Close\"].rolling(25).mean()\n",
425 |     "msft19.plot()"
426 |    ]
427 |   }
428 |  ],
429 |  "metadata": {
430 |   "kernelspec": {
431 |    "display_name": "Python 3",
432 |    "language": "python",
433 |    "name": "python3"
434 |   },
435 |   "language_info": {
436 |    "codemirror_mode": {
437 |     "name": "ipython",
438 |     "version": 3
439 |    },
440 |    "file_extension": ".py",
441 |    "mimetype": "text/x-python",
442 |    "name": "python",
443 |    "nbconvert_exporter": "python",
444 |    "pygments_lexer": "ipython3",
445 |    "version": "3.7.4"
446 |   }
447 |  },
448 |  "nbformat": 4,
449 |  "nbformat_minor": 4
450 | }


--------------------------------------------------------------------------------
/ch07.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Excel File Manipulation with pandas"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Using pandas with Excel Files"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Case Study: Excel Reporting"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "df = pd.read_excel(\"sales_data/new/January.xlsx\")\n",
 40 |     "df.info()"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "## Reading Excel Files with pandas"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "df = pd.read_excel(\"xl/stores.xlsx\",\n",
 57 |     "                   sheet_name=\"2019\", skiprows=1, usecols=\"B:F\")\n",
 58 |     "df"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "df.info()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "def fix_missing(x):\n",
 77 |     "    return False if x in [\"\", \"MISSING\"] else x"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "df = pd.read_excel(\"xl/stores.xlsx\",\n",
 87 |     "                   sheet_name=\"2019\", skiprows=1, usecols=\"B:F\",\n",
 88 |     "                   converters={\"Flagship\": fix_missing})\n",
 89 |     "df"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "# The Flagship column now has Dtype \"bool\"\n",
 99 |     "df.info()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "sheets = pd.read_excel(\"xl/stores.xlsx\", sheet_name=[\"2019\", \"2020\"],\n",
109 |     "                       skiprows=1, usecols=[\"Store\", \"Employees\"])\n",
110 |     "sheets[\"2019\"].head(2)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "df = pd.read_excel(\"xl/stores.xlsx\", sheet_name=0,\n",
120 |     "                   skiprows=2, skipfooter=3,\n",
121 |     "                   usecols=\"B:C,F\", header=None,\n",
122 |     "                   names=[\"Branch\", \"Employee_Count\", \"Is_Flagship\"])\n",
123 |     "df"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "df = pd.read_excel(\"xl/stores.xlsx\", sheet_name=\"2019\",\n",
133 |     "                   skiprows=1, usecols=\"B,C,F\", skipfooter=2,\n",
134 |     "                   na_values=\"MISSING\", keep_default_na=False)\n",
135 |     "df"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "f = open(\"output.txt\", \"w\")\n",
145 |     "f.write(\"Some text\")\n",
146 |     "f.close()"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "### Context Managers and the with Statement"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "with open(\"output.txt\", \"w\") as f:\n",
163 |     "    f.write(\"Some text\")"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "with pd.ExcelFile(\"xl/stores.xls\") as f:\n",
173 |     "    df1 = pd.read_excel(f, \"2019\", skiprows=1, usecols=\"B:F\", nrows=2)\n",
174 |     "    df2 = pd.read_excel(f, \"2020\", skiprows=1, usecols=\"B:F\", nrows=2)\n",
175 |     "\n",
176 |     "df1"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "stores = pd.ExcelFile(\"xl/stores.xlsx\")\n",
186 |     "stores.sheet_names"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "url = (\"https://raw.githubusercontent.com/fzumstein/\"\n",
196 |     "       \"python-for-excel/1st-edition/xl/stores.xlsx\")\n",
197 |     "pd.read_excel(url, skiprows=1, usecols=\"B:E\", nrows=2)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "## Writing Excel Files with pandas"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "import numpy as np\n",
214 |     "import datetime as dt"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": [
223 |     "data = [[dt.datetime(2020,1,1, 10, 13), 2.222, 1, True],\n",
224 |     "        [dt.datetime(2020,1,2), np.nan, 2, False],\n",
225 |     "        [dt.datetime(2020,1,2), np.inf, 3, True]]\n",
226 |     "df = pd.DataFrame(data=data,\n",
227 |     "                  columns=[\"Dates\", \"Floats\", \"Integers\", \"Booleans\"])\n",
228 |     "df.index.name=\"index\"\n",
229 |     "df"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "df.to_excel(\"written_with_pandas.xlsx\", sheet_name=\"Output\",\n",
239 |     "            startrow=1, startcol=1, index=True, header=True,\n",
240 |     "            na_rep=\"<NA>\", inf_rep=\"<INF>\")"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "with pd.ExcelWriter(\"written_with_pandas2.xlsx\") as writer:\n",
250 |     "    df.to_excel(writer, sheet_name=\"Sheet1\", startrow=1, startcol=1)\n",
251 |     "    df.to_excel(writer, sheet_name=\"Sheet1\", startrow=10, startcol=1)\n",
252 |     "    df.to_excel(writer, sheet_name=\"Sheet2\")"
253 |    ]
254 |   }
255 |  ],
256 |  "metadata": {
257 |   "kernelspec": {
258 |    "display_name": "Python 3",
259 |    "language": "python",
260 |    "name": "python3"
261 |   },
262 |   "language_info": {
263 |    "codemirror_mode": {
264 |     "name": "ipython",
265 |     "version": 3
266 |    },
267 |    "file_extension": ".py",
268 |    "mimetype": "text/x-python",
269 |    "name": "python",
270 |    "nbconvert_exporter": "python",
271 |    "pygments_lexer": "ipython3",
272 |    "version": "3.7.4"
273 |   }
274 |  },
275 |  "nbformat": 4,
276 |  "nbformat_minor": 4
277 | }
278 | 


--------------------------------------------------------------------------------
/ch08.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Reader and Writer Packages"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## OpenPyXL\n",
 15 |     "### Reading with OpenPyXL"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import pandas as pd\n",
 25 |     "import openpyxl\n",
 26 |     "import excel\n",
 27 |     "import datetime as dt"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# Open the workbook to read cell values.\n",
 37 |     "# The file is automatically closed again after loading the data.\n",
 38 |     "book = openpyxl.load_workbook(\"xl/stores.xlsx\", data_only=True)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Get a worksheet object by name or index (0-based)\n",
 48 |     "sheet = book[\"2019\"]\n",
 49 |     "sheet = book.worksheets[0]"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "# Get a list with all sheet names\n",
 59 |     "book.sheetnames"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# Loop through the sheet objects.\n",
 69 |     "# Instead of \"name\", openpyxl uses \"title\".\n",
 70 |     "for i in book.worksheets:\n",
 71 |     "    print(i.title)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# Getting the dimensions,\n",
 81 |     "# i.e., the used range of the sheet\n",
 82 |     "sheet.max_row, sheet.max_column"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "# Read the value of a single cell\n",
 92 |     "# using \"A1\" notation and using cell indices (1-based)\n",
 93 |     "sheet[\"B6\"].value\n",
 94 |     "sheet.cell(row=6, column=2).value"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "# Read in a range of cell values by using our excel module\n",
104 |     "data = excel.read(book[\"2019\"], (2, 2), (8, 6))\n",
105 |     "data[:2]  # Print the first two rows"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "### Writing with OpenPyXL"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "import openpyxl\n",
122 |     "from openpyxl.drawing.image import Image\n",
123 |     "from openpyxl.chart import BarChart, Reference\n",
124 |     "from openpyxl.styles import Font, colors\n",
125 |     "from openpyxl.styles.borders import Border, Side\n",
126 |     "from openpyxl.styles.alignment import Alignment\n",
127 |     "from openpyxl.styles.fills import PatternFill\n",
128 |     "import excel"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "# Instantiate a workbook\n",
138 |     "book = openpyxl.Workbook()\n",
139 |     "\n",
140 |     "# Get the first sheet and give it a name\n",
141 |     "sheet = book.active\n",
142 |     "sheet.title = \"Sheet1\"\n",
143 |     "\n",
144 |     "# Writing individual cells using A1 notation\n",
145 |     "# and cell indices (1-based)\n",
146 |     "sheet[\"A1\"].value = \"Hello 1\"\n",
147 |     "sheet.cell(row=2, column=1, value=\"Hello 2\")\n",
148 |     "\n",
149 |     "# Formatting: fill color, alignment, border and font\n",
150 |     "font_format = Font(color=\"FF0000\", bold=True)\n",
151 |     "thin = Side(border_style=\"thin\", color=\"FF0000\")\n",
152 |     "sheet[\"A3\"].value = \"Hello 3\"\n",
153 |     "sheet[\"A3\"].font = font_format\n",
154 |     "sheet[\"A3\"].border = Border(top=thin, left=thin,\n",
155 |     "                            right=thin, bottom=thin)\n",
156 |     "sheet[\"A3\"].alignment = Alignment(horizontal=\"center\")\n",
157 |     "sheet[\"A3\"].fill = PatternFill(fgColor=\"FFFF00\", fill_type=\"solid\")\n",
158 |     "\n",
159 |     "# Number formatting (using Excel's formatting strings)\n",
160 |     "sheet[\"A4\"].value = 3.3333\n",
161 |     "sheet[\"A4\"].number_format = \"0.00\"\n",
162 |     "\n",
163 |     "# Date formatting (using Excel's formatting strings)\n",
164 |     "sheet[\"A5\"].value = dt.date(2016, 10, 13)\n",
165 |     "sheet[\"A5\"].number_format = \"mm/dd/yy\"\n",
166 |     "\n",
167 |     "# Formula: you must use the English name of the formula\n",
168 |     "# with commas as delimiters\n",
169 |     "sheet[\"A6\"].value = \"=SUM(A4, 2)\"\n",
170 |     "\n",
171 |     "# Image\n",
172 |     "sheet.add_image(Image(\"images/python.png\"), \"C1\")\n",
173 |     "\n",
174 |     "# Two-dimensional list (we're using our excel module)\n",
175 |     "data = [[None, \"North\", \"South\"],\n",
176 |     "        [\"Last Year\", 2, 5],\n",
177 |     "        [\"This Year\", 3, 6]]\n",
178 |     "excel.write(sheet, data, \"A10\")\n",
179 |     "\n",
180 |     "# Chart\n",
181 |     "chart = BarChart()\n",
182 |     "chart.type = \"col\"\n",
183 |     "chart.title = \"Sales Per Region\"\n",
184 |     "chart.x_axis.title = \"Regions\"\n",
185 |     "chart.y_axis.title = \"Sales\"\n",
186 |     "chart_data = Reference(sheet, min_row=11, min_col=1,\n",
187 |     "                       max_row=12, max_col=3)\n",
188 |     "chart_categories = Reference(sheet, min_row=10, min_col=2,\n",
189 |     "                             max_row=10, max_col=3)\n",
190 |     "# from_rows interprets the data in the same way\n",
191 |     "# as if you would add a chart manually in Excel\n",
192 |     "chart.add_data(chart_data, titles_from_data=True, from_rows=True)\n",
193 |     "chart.set_categories(chart_categories)\n",
194 |     "sheet.add_chart(chart, \"A15\")\n",
195 |     "\n",
196 |     "# Saving the workbook creates the file on disk\n",
197 |     "book.save(\"openpyxl.xlsx\")"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "book = openpyxl.Workbook()\n",
207 |     "sheet = book.active\n",
208 |     "sheet[\"A1\"].value = \"This is a template\"\n",
209 |     "book.template = True\n",
210 |     "book.save(\"template.xltx\")"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "### Editing with OpenPyXL"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": [
226 |     "# Read the stores.xlsx file, change a cell\n",
227 |     "# and store it under a new location/name.\n",
228 |     "book = openpyxl.load_workbook(\"xl/stores.xlsx\")\n",
229 |     "book[\"2019\"][\"A1\"].value = \"modified\"\n",
230 |     "book.save(\"stores_edited.xlsx\")"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "book = openpyxl.load_workbook(\"xl/macro.xlsm\", keep_vba=True)\n",
240 |     "book[\"Sheet1\"][\"A1\"].value = \"Click the button!\"\n",
241 |     "book.save(\"macro_openpyxl.xlsm\")"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "## XlsxWriter"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "import datetime as dt\n",
258 |     "import xlsxwriter\n",
259 |     "import excel"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": null,
265 |    "metadata": {},
266 |    "outputs": [],
267 |    "source": [
268 |     "# Instantiate a workbook\n",
269 |     "book = xlsxwriter.Workbook(\"xlsxwriter.xlsx\")\n",
270 |     "\n",
271 |     "# Add a sheet and give it a name\n",
272 |     "sheet = book.add_worksheet(\"Sheet1\")\n",
273 |     "\n",
274 |     "# Writing individual cells using A1 notation\n",
275 |     "# and cell indices (0-based)\n",
276 |     "sheet.write(\"A1\", \"Hello 1\")\n",
277 |     "sheet.write(1, 0, \"Hello 2\")\n",
278 |     "\n",
279 |     "# Formatting: fill color, alignment, border and font\n",
280 |     "formatting = book.add_format({\"font_color\": \"#FF0000\",\n",
281 |     "                              \"bg_color\": \"#FFFF00\",\n",
282 |     "                              \"bold\": True, \"align\": \"center\",\n",
283 |     "                              \"border\": 1, \"border_color\": \"#FF0000\"})\n",
284 |     "sheet.write(\"A3\", \"Hello 3\", formatting)\n",
285 |     "\n",
286 |     "# Number formatting (using Excel's formatting strings)\n",
287 |     "number_format = book.add_format({\"num_format\": \"0.00\"})\n",
288 |     "sheet.write(\"A4\", 3.3333, number_format)\n",
289 |     "\n",
290 |     "# Date formatting (using Excel's formatting strings)\n",
291 |     "date_format = book.add_format({\"num_format\": \"mm/dd/yy\"})\n",
292 |     "sheet.write(\"A5\", dt.date(2016, 10, 13), date_format)\n",
293 |     "\n",
294 |     "# Formula: you must use the English name of the formula\n",
295 |     "# with commas as delimiters\n",
296 |     "sheet.write(\"A6\", \"=SUM(A4, 2)\")\n",
297 |     "\n",
298 |     "# Image\n",
299 |     "sheet.insert_image(0, 2, \"images/python.png\")\n",
300 |     "\n",
301 |     "# Two-dimensional list (we're using our excel module)\n",
302 |     "data = [[None, \"North\", \"South\"],\n",
303 |     "        [\"Last Year\", 2, 5],\n",
304 |     "        [\"This Year\", 3, 6]]\n",
305 |     "excel.write(sheet, data, \"A10\")\n",
306 |     "\n",
307 |     "# Chart: see the file \"sales_report_xlsxwriter.py\" in the\n",
308 |     "# companion repo to see how you can work with indices\n",
309 |     "# instead of cell addresses\n",
310 |     "chart = book.add_chart({\"type\": \"column\"})\n",
311 |     "chart.set_title({\"name\": \"Sales per Region\"})\n",
312 |     "chart.add_series({\"name\": \"=Sheet1!A11\",\n",
313 |     "                  \"categories\": \"=Sheet1!B10:C10\",\n",
314 |     "                  \"values\": \"=Sheet1!B11:C11\"})\n",
315 |     "chart.add_series({\"name\": \"=Sheet1!A12\",\n",
316 |     "                  \"categories\": \"=Sheet1!B10:C10\",\n",
317 |     "                  \"values\": \"=Sheet1!B12:C12\"})\n",
318 |     "chart.set_x_axis({\"name\": \"Regions\"})\n",
319 |     "chart.set_y_axis({\"name\": \"Sales\"})\n",
320 |     "sheet.insert_chart(\"A15\", chart)\n",
321 |     "\n",
322 |     "# Closing the workbook creates the file on disk\n",
323 |     "book.close()"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "book = xlsxwriter.Workbook(\"macro_xlsxwriter.xlsm\")\n",
333 |     "sheet = book.add_worksheet(\"Sheet1\")\n",
334 |     "sheet.write(\"A1\", \"Click the button!\")\n",
335 |     "book.add_vba_project(\"xl/vbaProject.bin\")\n",
336 |     "sheet.insert_button(\"A3\", {\"macro\": \"Hello\", \"caption\": \"Button 1\",\n",
337 |     "                           \"width\": 130, \"height\": 35})\n",
338 |     "book.close()"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "markdown",
343 |    "metadata": {},
344 |    "source": [
345 |     "## pyxlsb"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": null,
351 |    "metadata": {},
352 |    "outputs": [],
353 |    "source": [
354 |     "import pyxlsb\n",
355 |     "import excel"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": [
364 |     "# Loop through sheets. With pyxlsb, the workbook\n",
365 |     "# and sheet objects can be used as context managers.\n",
366 |     "# book.sheets returns a list of sheet names, not objects!\n",
367 |     "# To get a sheet object, use get_sheet() instead.\n",
368 |     "with pyxlsb.open_workbook(\"xl/stores.xlsb\") as book:\n",
369 |     "    for sheet_name in book.sheets:\n",
370 |     "        with book.get_sheet(sheet_name) as sheet:\n",
371 |     "            dim = sheet.dimension\n",
372 |     "            print(f\"Sheet '{sheet_name}' has \"\n",
373 |     "                  f\"{dim.h} rows and {dim.w} cols\")"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": null,
379 |    "metadata": {},
380 |    "outputs": [],
381 |    "source": [
382 |     "# Read in the values of a range of cells by using our excel module.\n",
383 |     "# Instead of \"2019\", you could also use its index (1-based).\n",
384 |     "with pyxlsb.open_workbook(\"xl/stores.xlsb\") as book:\n",
385 |     "    with book.get_sheet(\"2019\") as sheet:\n",
386 |     "        data = excel.read(sheet, \"B2\")\n",
387 |     "data[:2]  # Print the first two rows"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "metadata": {},
394 |    "outputs": [],
395 |    "source": [
396 |     "from pyxlsb import convert_date\n",
397 |     "convert_date(data[1][3])"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "metadata": {},
404 |    "outputs": [],
405 |    "source": [
406 |     "df = pd.read_excel(\"xl/stores.xlsb\", engine=\"pyxlsb\")"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "markdown",
411 |    "metadata": {},
412 |    "source": [
413 |     "## xlrd, xlwt and xlutils"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "markdown",
418 |    "metadata": {},
419 |    "source": [
420 |     "### Reading with xlrd"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {},
427 |    "outputs": [],
428 |    "source": [
429 |     "import xlrd\n",
430 |     "import xlwt\n",
431 |     "from xlwt.Utils import cell_to_rowcol2\n",
432 |     "import xlutils\n",
433 |     "import excel"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "code",
438 |    "execution_count": null,
439 |    "metadata": {},
440 |    "outputs": [],
441 |    "source": [
442 |     "# Open the workbook to read cell values. The file is\n",
443 |     "# automatically closed again after loading the data.\n",
444 |     "book = xlrd.open_workbook(\"xl/stores.xls\")"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": null,
450 |    "metadata": {},
451 |    "outputs": [],
452 |    "source": [
453 |     "# Get a list with all sheet names\n",
454 |     "book.sheet_names()"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": null,
460 |    "metadata": {},
461 |    "outputs": [],
462 |    "source": [
463 |     "# Loop through the sheet objects\n",
464 |     "for sheet in book.sheets():\n",
465 |     "    print(sheet.name)"
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "code",
470 |    "execution_count": null,
471 |    "metadata": {},
472 |    "outputs": [],
473 |    "source": [
474 |     "# Get a sheet object by name or index (0-based)\n",
475 |     "sheet = book.sheet_by_index(0)\n",
476 |     "sheet = book.sheet_by_name(\"2019\")"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": null,
482 |    "metadata": {},
483 |    "outputs": [],
484 |    "source": [
485 |     "# Dimensions\n",
486 |     "sheet.nrows, sheet.ncols"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": null,
492 |    "metadata": {},
493 |    "outputs": [],
494 |    "source": [
495 |     "# Read the value of a single cell\n",
496 |     "# using \"A1\" notation and using cell indices (0-based).\n",
497 |     "# The \"*\" unpacks the tuple that cell_to_rowcol2 returns\n",
498 |     "# into individual arguments.\n",
499 |     "sheet.cell(*cell_to_rowcol2(\"B3\")).value\n",
500 |     "sheet.cell(2, 1).value"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": null,
506 |    "metadata": {},
507 |    "outputs": [],
508 |    "source": [
509 |     "# Read in a range of cell values by using our excel module\n",
510 |     "data = excel.read(sheet, \"B2\")\n",
511 |     "data[:2]  # Print the first two rows"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "markdown",
516 |    "metadata": {},
517 |    "source": [
518 |     "### Writing with xlwt"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "metadata": {},
525 |    "outputs": [],
526 |    "source": [
527 |     "import xlwt\n",
528 |     "from xlwt.Utils import cell_to_rowcol2\n",
529 |     "import datetime as dt\n",
530 |     "import excel"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": null,
536 |    "metadata": {},
537 |    "outputs": [],
538 |    "source": [
539 |     "# Instantiate a workbook\n",
540 |     "book = xlwt.Workbook()\n",
541 |     "\n",
542 |     "# Add a sheet and give it a name\n",
543 |     "sheet = book.add_sheet(\"Sheet1\")\n",
544 |     "\n",
545 |     "# Writing individual cells using A1 notation\n",
546 |     "# and cell indices (0-based)\n",
547 |     "sheet.write(*cell_to_rowcol2(\"A1\"), \"Hello 1\")\n",
548 |     "sheet.write(r=1, c=0, label=\"Hello 2\")\n",
549 |     "\n",
550 |     "# Formatting: fill color, alignment, border and font\n",
551 |     "formatting = xlwt.easyxf(\"font: bold on, color red;\"\n",
552 |     "                         \"align: horiz center;\"\n",
553 |     "                         \"borders: top_color red, bottom_color red,\"\n",
554 |     "                                  \"right_color red, left_color red,\"\n",
555 |     "                                  \"left thin, right thin,\"\n",
556 |     "                                  \"top thin, bottom thin;\"\n",
557 |     "                         \"pattern: pattern solid, fore_color yellow;\")\n",
558 |     "sheet.write(r=2, c=0, label=\"Hello 3\", style=formatting)\n",
559 |     "\n",
560 |     "# Number formatting (using Excel's formatting strings)\n",
561 |     "number_format = xlwt.easyxf(num_format_str=\"0.00\")\n",
562 |     "sheet.write(3, 0, 3.3333, number_format)\n",
563 |     "\n",
564 |     "# Date formatting (using Excel's formatting strings)\n",
565 |     "date_format = xlwt.easyxf(num_format_str=\"mm/dd/yyyy\")\n",
566 |     "sheet.write(4, 0, dt.datetime(2012, 2, 3), date_format)\n",
567 |     "\n",
568 |     "# Formula: you must use the English name of the formula\n",
569 |     "# with commas as delimiters\n",
570 |     "sheet.write(5, 0, xlwt.Formula(\"SUM(A4, 2)\"))\n",
571 |     "\n",
572 |     "# Two-dimensional list (we're using our excel module)\n",
573 |     "data = [[None, \"North\", \"South\"],\n",
574 |     "        [\"Last Year\", 2, 5],\n",
575 |     "        [\"This Year\", 3, 6]]\n",
576 |     "excel.write(sheet, data, \"A10\")\n",
577 |     "\n",
578 |     "# Picture (only allows to add bmp format)\n",
579 |     "sheet.insert_bitmap(\"images/python.bmp\", 0, 2)\n",
580 |     "\n",
581 |     "# This writes the file to disk\n",
582 |     "book.save(\"xlwt.xls\")"
583 |    ]
584 |   },
585 |   {
586 |    "cell_type": "markdown",
587 |    "metadata": {},
588 |    "source": [
589 |     "### Editing with xlutils"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "code",
594 |    "execution_count": null,
595 |    "metadata": {},
596 |    "outputs": [],
597 |    "source": [
598 |     "import xlutils.copy"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": null,
604 |    "metadata": {},
605 |    "outputs": [],
606 |    "source": [
607 |     "book = xlrd.open_workbook(\"xl/stores.xls\", formatting_info=True)\n",
608 |     "book = xlutils.copy.copy(book)\n",
609 |     "book.get_sheet(0).write(0, 0, \"changed!\")\n",
610 |     "book.save(\"stores_edited.xls\")"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "markdown",
615 |    "metadata": {},
616 |    "source": [
617 |     "# Advanced Topics\n",
618 |     "## Working with Big Files"
619 |    ]
620 |   },
621 |   {
622 |    "cell_type": "markdown",
623 |    "metadata": {},
624 |    "source": [
625 |     "### Writing with OpenPyXL"
626 |    ]
627 |   },
628 |   {
629 |    "cell_type": "code",
630 |    "execution_count": null,
631 |    "metadata": {},
632 |    "outputs": [],
633 |    "source": [
634 |     "book = openpyxl.Workbook(write_only=True)\n",
635 |     "# With write_only=True, book.active doesn't work\n",
636 |     "sheet = book.create_sheet()\n",
637 |     "# This will produce a sheet with 1000 x 200 cells\n",
638 |     "for row in range(1000):\n",
639 |     "    sheet.append(list(range(200)))\n",
640 |     "book.save(\"openpyxl_optimized.xlsx\")"
641 |    ]
642 |   },
643 |   {
644 |    "cell_type": "markdown",
645 |    "metadata": {},
646 |    "source": [
647 |     "### Writing with XlsxWriter"
648 |    ]
649 |   },
650 |   {
651 |    "cell_type": "code",
652 |    "execution_count": null,
653 |    "metadata": {},
654 |    "outputs": [],
655 |    "source": [
656 |     "book = xlsxwriter.Workbook(\"xlsxwriter_optimized.xlsx\",\n",
657 |     "                           options={\"constant_memory\": True})\n",
658 |     "sheet = book.add_worksheet()\n",
659 |     "# This will produce a sheet with 1000 x 200 cells\n",
660 |     "for row in range(1000):\n",
661 |     "    sheet.write_row(row , 0, list(range(200)))\n",
662 |     "book.close()"
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "markdown",
667 |    "metadata": {},
668 |    "source": [
669 |     "### Reading with xlrd"
670 |    ]
671 |   },
672 |   {
673 |    "cell_type": "code",
674 |    "execution_count": null,
675 |    "metadata": {},
676 |    "outputs": [],
677 |    "source": [
678 |     "with xlrd.open_workbook(\"xl/stores.xls\", on_demand=True) as book:\n",
679 |     "    sheet = book.sheet_by_index(0)  # Only loads the first sheet"
680 |    ]
681 |   },
682 |   {
683 |    "cell_type": "code",
684 |    "execution_count": null,
685 |    "metadata": {},
686 |    "outputs": [],
687 |    "source": [
688 |     "with xlrd.open_workbook(\"xl/stores.xls\", on_demand=True) as book:\n",
689 |     "    with pd.ExcelFile(book, engine=\"xlrd\") as f:\n",
690 |     "        df = pd.read_excel(f, sheet_name=0)"
691 |    ]
692 |   },
693 |   {
694 |    "cell_type": "markdown",
695 |    "metadata": {},
696 |    "source": [
697 |     "### Reading with OpenPyXL"
698 |    ]
699 |   },
700 |   {
701 |    "cell_type": "code",
702 |    "execution_count": null,
703 |    "metadata": {},
704 |    "outputs": [],
705 |    "source": [
706 |     "book = openpyxl.load_workbook(\"xl/big.xlsx\",\n",
707 |     "                              data_only=True, read_only=True,\n",
708 |     "                              keep_links=False)\n",
709 |     "# Perform the desired read operations here\n",
710 |     "book.close()  # Required with read_only=True"
711 |    ]
712 |   },
713 |   {
714 |    "cell_type": "markdown",
715 |    "metadata": {},
716 |    "source": [
717 |     "### Reading in Parallel"
718 |    ]
719 |   },
720 |   {
721 |    "cell_type": "code",
722 |    "execution_count": null,
723 |    "metadata": {},
724 |    "outputs": [],
725 |    "source": [
726 |     "%%time\n",
727 |     "data = pd.read_excel(\"xl/big.xlsx\",\n",
728 |     "                     sheet_name=None, engine=\"openpyxl\")"
729 |    ]
730 |   },
731 |   {
732 |    "cell_type": "code",
733 |    "execution_count": null,
734 |    "metadata": {},
735 |    "outputs": [],
736 |    "source": [
737 |     "%%time\n",
738 |     "import parallel_pandas\n",
739 |     "data = parallel_pandas.read_excel(\"xl/big.xlsx\", sheet_name=None)"
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "markdown",
744 |    "metadata": {},
745 |    "source": [
746 |     "## Formatting DataFrames in Excel"
747 |    ]
748 |   },
749 |   {
750 |    "cell_type": "code",
751 |    "execution_count": null,
752 |    "metadata": {},
753 |    "outputs": [],
754 |    "source": [
755 |     "with pd.ExcelFile(\"xl/stores.xlsx\", engine=\"openpyxl\") as xlfile:\n",
756 |     "    # Read a DataFrame\n",
757 |     "    df = pd.read_excel(xlfile, sheet_name=\"2020\")\n",
758 |     "\n",
759 |     "    # Get the OpenPyXL workbook object\n",
760 |     "    book = xlfile.book\n",
761 |     "\n",
762 |     "    # From here on, it's OpenPyXL code\n",
763 |     "    sheet = book[\"2019\"]\n",
764 |     "    value = sheet[\"B3\"].value  # Read a single value"
765 |    ]
766 |   },
767 |   {
768 |    "cell_type": "code",
769 |    "execution_count": null,
770 |    "metadata": {},
771 |    "outputs": [],
772 |    "source": [
773 |     "with pd.ExcelWriter(\"pandas_and_openpyxl.xlsx\",\n",
774 |     "                    engine=\"openpyxl\") as writer:\n",
775 |     "    df = pd.DataFrame({\"col1\": [1, 2, 3, 4], \"col2\": [5, 6, 7, 8]})\n",
776 |     "    # Write a DataFrame\n",
777 |     "    df.to_excel(writer, \"Sheet1\", startrow=4, startcol=2)\n",
778 |     "\n",
779 |     "    # Get the OpenPyXL workbook and sheet objects\n",
780 |     "    book = writer.book\n",
781 |     "    sheet = writer.sheets[\"Sheet1\"]\n",
782 |     "\n",
783 |     "    # From here on, it's OpenPyXL code\n",
784 |     "    sheet[\"A1\"].value = \"This is a Title\"  # Write a single cell value"
785 |    ]
786 |   },
787 |   {
788 |    "cell_type": "code",
789 |    "execution_count": null,
790 |    "metadata": {},
791 |    "outputs": [],
792 |    "source": [
793 |     "df = pd.DataFrame({\"col1\": [1, -2], \"col2\": [-3, 4]},\n",
794 |     "                   index=[\"row1\", \"row2\"])\n",
795 |     "df.index.name = \"ix\"\n",
796 |     "df"
797 |    ]
798 |   },
799 |   {
800 |    "cell_type": "code",
801 |    "execution_count": null,
802 |    "metadata": {},
803 |    "outputs": [],
804 |    "source": [
805 |     "from openpyxl.styles import PatternFill"
806 |    ]
807 |   },
808 |   {
809 |    "cell_type": "code",
810 |    "execution_count": null,
811 |    "metadata": {},
812 |    "outputs": [],
813 |    "source": [
814 |     "with pd.ExcelWriter(\"formatting_openpyxl.xlsx\",\n",
815 |     "                    engine=\"openpyxl\") as writer:\n",
816 |     "    # Write out the df with the default formatting to A1\n",
817 |     "    df.to_excel(writer, startrow=0, startcol=0)\n",
818 |     "\n",
819 |     "    # Write out the df with custom index/header formatting to A6\n",
820 |     "    startrow, startcol = 0, 5\n",
821 |     "    # 1. Write out the data part of the DataFrame\n",
822 |     "    df.to_excel(writer, header=False, index=False,\n",
823 |     "                startrow=startrow + 1, startcol=startcol + 1)\n",
824 |     "    # Get the sheet object and create a style object\n",
825 |     "    sheet = writer.sheets[\"Sheet1\"]\n",
826 |     "    style = PatternFill(fgColor=\"D9D9D9\", fill_type=\"solid\")\n",
827 |     "\n",
828 |     "    # 2. Write out the styled column headers\n",
829 |     "    for i, col in enumerate(df.columns):\n",
830 |     "        sheet.cell(row=startrow + 1, column=i + startcol + 2,\n",
831 |     "                   value=col).fill = style\n",
832 |     "\n",
833 |     "    # 3. Write out the styled index\n",
834 |     "    index = [df.index.name if df.index.name else None] + list(df.index)\n",
835 |     "    for i, row in enumerate(index):\n",
836 |     "        sheet.cell(row=i + startrow + 1, column=startcol + 1,\n",
837 |     "                   value=row).fill = style"
838 |    ]
839 |   },
840 |   {
841 |    "cell_type": "code",
842 |    "execution_count": null,
843 |    "metadata": {},
844 |    "outputs": [],
845 |    "source": [
846 |     "# Formatting index/headers with XlsxWriter\n",
847 |     "with pd.ExcelWriter(\"formatting_xlsxwriter.xlsx\",\n",
848 |     "                    engine=\"xlsxwriter\") as writer:\n",
849 |     "    # Write out the df with the default formatting to A1\n",
850 |     "    df.to_excel(writer, startrow=0, startcol=0)\n",
851 |     "\n",
852 |     "    # Write out the df with custom index/header formatting to A6\n",
853 |     "    startrow, startcol = 0, 5\n",
854 |     "    # 1. Write out the data part of the DataFrame\n",
855 |     "    df.to_excel(writer, header=False, index=False,\n",
856 |     "                startrow=startrow + 1, startcol=startcol + 1)\n",
857 |     "    # Get the book and sheet object and create a style object\n",
858 |     "    book = writer.book\n",
859 |     "    sheet = writer.sheets[\"Sheet1\"]\n",
860 |     "    style = book.add_format({\"bg_color\": \"#D9D9D9\"})\n",
861 |     "\n",
862 |     "    # 2. Write out the styled column headers\n",
863 |     "    for i, col in enumerate(df.columns):\n",
864 |     "        sheet.write(startrow, startcol + i + 1, col, style)\n",
865 |     "\n",
866 |     "    # 3. Write out the styled index\n",
867 |     "    index = [df.index.name if df.index.name else None] + list(df.index)\n",
868 |     "    for i, row in enumerate(index):\n",
869 |     "        sheet.write(startrow + i, startcol, row, style)"
870 |    ]
871 |   },
872 |   {
873 |    "cell_type": "code",
874 |    "execution_count": null,
875 |    "metadata": {},
876 |    "outputs": [],
877 |    "source": [
878 |     "from openpyxl.styles import Alignment"
879 |    ]
880 |   },
881 |   {
882 |    "cell_type": "code",
883 |    "execution_count": null,
884 |    "metadata": {},
885 |    "outputs": [],
886 |    "source": [
887 |     "with pd.ExcelWriter(\"data_format_openpyxl.xlsx\",\n",
888 |     "                    engine=\"openpyxl\") as writer:\n",
889 |     "    # Write out the DataFrame\n",
890 |     "    df.to_excel(writer)\n",
891 |     "    \n",
892 |     "    # Get the book and sheet objects\n",
893 |     "    book = writer.book\n",
894 |     "    sheet = writer.sheets[\"Sheet1\"]\n",
895 |     "    \n",
896 |     "    # Formatting individual cells\n",
897 |     "    nrows, ncols = df.shape\n",
898 |     "    for row in range(nrows):\n",
899 |     "        for col in range(ncols):\n",
900 |     "            # +1 to account for the header/index\n",
901 |     "            # +1 since OpenPyXL is 1-based\n",
902 |     "            cell = sheet.cell(row=row + 2,\n",
903 |     "                              column=col + 2)\n",
904 |     "            cell.number_format = \"0.000\"\n",
905 |     "            cell.alignment = Alignment(horizontal=\"center\")"
906 |    ]
907 |   },
908 |   {
909 |    "cell_type": "code",
910 |    "execution_count": null,
911 |    "metadata": {},
912 |    "outputs": [],
913 |    "source": [
914 |     "with pd.ExcelWriter(\"data_format_xlsxwriter.xlsx\",\n",
915 |     "                    engine=\"xlsxwriter\") as writer:\n",
916 |     "    # Write out the DataFrame\n",
917 |     "    df.to_excel(writer)\n",
918 |     "\n",
919 |     "    # Get the book and sheet objects\n",
920 |     "    book = writer.book\n",
921 |     "    sheet = writer.sheets[\"Sheet1\"]\n",
922 |     "    \n",
923 |     "    # Formatting the columns (individual cells can't be formatted)\n",
924 |     "    number_format = book.add_format({\"num_format\": \"0.000\",\n",
925 |     "                                     \"align\": \"center\"})\n",
926 |     "    sheet.set_column(first_col=1, last_col=2,\n",
927 |     "                     cell_format=number_format)"
928 |    ]
929 |   },
930 |   {
931 |    "cell_type": "code",
932 |    "execution_count": null,
933 |    "metadata": {},
934 |    "outputs": [],
935 |    "source": [
936 |     "df.style.applymap(lambda x: \"number-format: 0.000;\"\n",
937 |     "                            \"text-align: center\")\\\n",
938 |     "        .to_excel(\"styled.xlsx\")"
939 |    ]
940 |   },
941 |   {
942 |    "cell_type": "code",
943 |    "execution_count": null,
944 |    "metadata": {},
945 |    "outputs": [],
946 |    "source": [
947 |     "df = pd.DataFrame({\"Date\": [dt.date(2020, 1, 1)],\n",
948 |     "                   \"Datetime\": [dt.datetime(2020, 1, 1, 10)]})\n",
949 |     "with pd.ExcelWriter(\"date.xlsx\",\n",
950 |     "                    date_format=\"yyyy-mm-dd\",\n",
951 |     "                    datetime_format=\"yyyy-mm-dd hh:mm:ss\") as writer:\n",
952 |     "    df.to_excel(writer)"
953 |    ]
954 |   }
955 |  ],
956 |  "metadata": {
957 |   "kernelspec": {
958 |    "display_name": "Python 3",
959 |    "language": "python",
960 |    "name": "python3"
961 |   },
962 |   "language_info": {
963 |    "codemirror_mode": {
964 |     "name": "ipython",
965 |     "version": 3
966 |    },
967 |    "file_extension": ".py",
968 |    "mimetype": "text/x-python",
969 |    "name": "python",
970 |    "nbconvert_exporter": "python",
971 |    "pygments_lexer": "ipython3",
972 |    "version": "3.7.4"
973 |   }
974 |  },
975 |  "nbformat": 4,
976 |  "nbformat_minor": 4
977 | }
978 | 


--------------------------------------------------------------------------------
/ch09.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Excel Automation"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Getting Started with xlwings"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Using Excel as Data Viewer"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# First, let's import the packages that we'll use in this chapter\n",
 31 |     "import datetime as dt\n",
 32 |     "import xlwings as xw\n",
 33 |     "import pandas as pd\n",
 34 |     "import numpy as np"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# Let's create a DataFrame based on pseudorandom numbers and\n",
 44 |     "# with enough rows that only the head and tail are shown\n",
 45 |     "df = pd.DataFrame(data=np.random.randn(100, 5),\n",
 46 |     "                  columns=[f\"Trial {i}\" for i in range(1, 6)])\n",
 47 |     "df"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "# View the DataFrame in Excel\n",
 57 |     "xw.view(df)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## The Excel Object Model"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# Create a new empty workbook and print its name. This is the\n",
 74 |     "# book we will use to run most of the code samples in this chapter.\n",
 75 |     "book = xw.Book()\n",
 76 |     "book.name"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# Accessing the sheets collection\n",
 86 |     "book.sheets"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "# Get a sheet object by index or name. You will need to adjust\n",
 96 |     "# \"Sheet1\" if your sheet is called differently.\n",
 97 |     "sheet1 = book.sheets[0]\n",
 98 |     "sheet1 = book.sheets[\"Sheet1\"]"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "sheet1.range(\"A1\")"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "# Most common tasks: write values...\n",
117 |     "sheet1.range(\"A1\").value = [[1, 2],\n",
118 |     "                            [3, 4]]\n",
119 |     "sheet1.range(\"A4\").value = \"Hello!\""
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "# ...and read values\n",
129 |     "sheet1.range(\"A1:B2\").value"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "sheet1.range(\"A4\").value"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "# Indexing\n",
148 |     "sheet1.range(\"A1:B2\")[0, 0]"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "# Slicing\n",
158 |     "sheet1.range(\"A1:B2\")[:, 1]"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "# Single cell: A1 notation\n",
168 |     "sheet1[\"A1\"]"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "# Multiple cells: A1 notation\n",
178 |     "sheet1[\"A1:B2\"]"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "# Single cell: indexing\n",
188 |     "sheet1[0, 0]"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "# Multiple cells: slicing\n",
198 |     "sheet1[:2, :2]"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# D10 via sheet indexing\n",
208 |     "sheet1[9, 3]"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "# D10 via range object\n",
218 |     "sheet1.range((10, 4))"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": [
227 |     "# D10:F11 via sheet slicing\n",
228 |     "sheet1[9:11, 3:6]"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "# D10:F11 via range object\n",
238 |     "sheet1.range((10, 4), (11, 6))"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": [
247 |     "sheet1[\"A1\"].sheet.book.app"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "# Get one app object from the open workbook\n",
257 |     "# and create an additional invisible app instance\n",
258 |     "visible_app = sheet1.book.app\n",
259 |     "invisible_app = xw.App(visible=False)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": null,
265 |    "metadata": {},
266 |    "outputs": [],
267 |    "source": [
268 |     "# List the book names that are open in each instance\n",
269 |     "# by using a list comprehension\n",
270 |     "[book.name for book in visible_app.books]"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "[book.name for book in invisible_app.books]"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {},
286 |    "outputs": [],
287 |    "source": [
288 |     "# An app key represents the process ID (PID)\n",
289 |     "xw.apps.keys()"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": [
298 |     "# It can also be accessed via the pid attribute\n",
299 |     "xw.apps.active.pid"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "# Work with the book in the invisible Excel instance\n",
309 |     "invisible_book = invisible_app.books[0]\n",
310 |     "invisible_book.sheets[0][\"A1\"].value = \"Created by an invisible app.\""
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": null,
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": [
319 |     "# Save the Excel workbook in the xl directory\n",
320 |     "invisible_book.save(\"xl/invisible.xlsx\")"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": [
329 |     "# Quit the invisible Excel instance\n",
330 |     "invisible_app.quit()"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "## Running VBA Code"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": null,
343 |    "metadata": {},
344 |    "outputs": [],
345 |    "source": [
346 |     "vba_book = xw.Book(\"xl/vba.xlsm\")"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": null,
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "# Instantiate a macro object with the VBA function\n",
356 |     "mysum = vba_book.macro(\"Module1.MySum\")\n",
357 |     "# Call a VBA function\n",
358 |     "mysum(5, 4)"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "# It works the same with a VBA Sub procedure\n",
368 |     "show_msgbox = vba_book.macro(\"Module1.ShowMsgBox\")\n",
369 |     "show_msgbox(\"Hello xlwings!\")"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": [
378 |     "# Close the book again (make sure to close the MessageBox first)\n",
379 |     "vba_book.close()"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "# Converters, Options and Collections"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "markdown",
391 |    "metadata": {},
392 |    "source": [
393 |     "## Working with DataFrames"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": null,
399 |    "metadata": {},
400 |    "outputs": [],
401 |    "source": [
402 |     "data = [[\"Mark\", 55, \"Italy\", 4.5, \"Europe\"],\n",
403 |     "        [\"John\", 33, \"USA\", 6.7, \"America\"]]\n",
404 |     "df = pd.DataFrame(data=data,\n",
405 |     "                  columns=[\"name\", \"age\", \"country\",\n",
406 |     "                           \"score\", \"continent\"],\n",
407 |     "                  index=[1001, 1000])\n",
408 |     "df.index.name = \"user_id\"\n",
409 |     "df"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {},
416 |    "outputs": [],
417 |    "source": [
418 |     "sheet1[\"A6\"].value = df"
419 |    ]
420 |   },
421 |   {
422 |    "cell_type": "code",
423 |    "execution_count": null,
424 |    "metadata": {},
425 |    "outputs": [],
426 |    "source": [
427 |     "sheet1[\"B10\"].options(header=False, index=False).value = df"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "code",
432 |    "execution_count": null,
433 |    "metadata": {},
434 |    "outputs": [],
435 |    "source": [
436 |     "df2 = sheet1[\"A6\"].expand().options(pd.DataFrame).value\n",
437 |     "df2"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": null,
443 |    "metadata": {},
444 |    "outputs": [],
445 |    "source": [
446 |     "# If you want the index to be an integer index,\n",
447 |     "# you can change its data type\n",
448 |     "df2.index = df2.index.astype(int)\n",
449 |     "df2"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": null,
455 |    "metadata": {},
456 |    "outputs": [],
457 |    "source": [
458 |     "# By setting index=False, it will put all the values from Excel into\n",
459 |     "# the data part of the DataFrame and will use the default index\n",
460 |     "sheet1[\"A6\"].expand().options(pd.DataFrame, index=False).value"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "markdown",
465 |    "metadata": {},
466 |    "source": [
467 |     "## Converters and Options"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": null,
473 |    "metadata": {},
474 |    "outputs": [],
475 |    "source": [
476 |     "# Horizontal range (one-dimensional)\n",
477 |     "sheet1[\"A1:B1\"].value"
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": null,
483 |    "metadata": {},
484 |    "outputs": [],
485 |    "source": [
486 |     "# Vertical range (one-dimensional)\n",
487 |     "sheet1[\"A1:A2\"].value"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "code",
492 |    "execution_count": null,
493 |    "metadata": {},
494 |    "outputs": [],
495 |    "source": [
496 |     "# Horizontal range (two-dimensional)\n",
497 |     "sheet1[\"A1:B1\"].options(ndim=2).value"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": null,
503 |    "metadata": {},
504 |    "outputs": [],
505 |    "source": [
506 |     "# Vertical range (two-dimensional)\n",
507 |     "sheet1[\"A1:A2\"].options(ndim=2).value"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "code",
512 |    "execution_count": null,
513 |    "metadata": {},
514 |    "outputs": [],
515 |    "source": [
516 |     "# Using the NumPy array converter behaves the same:\n",
517 |     "# vertical range leads to a one-dimensional array\n",
518 |     "sheet1[\"A1:A2\"].options(np.array).value"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "metadata": {},
525 |    "outputs": [],
526 |    "source": [
527 |     "# Preserving the column orientation\n",
528 |     "sheet1[\"A1:A2\"].options(np.array, ndim=2).value"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "code",
533 |    "execution_count": null,
534 |    "metadata": {},
535 |    "outputs": [],
536 |    "source": [
537 |     "# If you need to write out a list vertically,\n",
538 |     "# the \"transpose\" option comes in handy\n",
539 |     "sheet1[\"D1\"].options(transpose=True).value = [100, 200]"
540 |    ]
541 |   },
542 |   {
543 |    "cell_type": "code",
544 |    "execution_count": null,
545 |    "metadata": {},
546 |    "outputs": [],
547 |    "source": [
548 |     "# Write out some sample data\n",
549 |     "sheet1[\"A13\"].value = [dt.datetime(2020, 1, 1), None, 1.0]"
550 |    ]
551 |   },
552 |   {
553 |    "cell_type": "code",
554 |    "execution_count": null,
555 |    "metadata": {},
556 |    "outputs": [],
557 |    "source": [
558 |     "# Read it back using the default options\n",
559 |     "sheet1[\"A13:C13\"].value"
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "code",
564 |    "execution_count": null,
565 |    "metadata": {},
566 |    "outputs": [],
567 |    "source": [
568 |     "# Read it back using non-default options\n",
569 |     "sheet1[\"A13:C13\"].options(empty=\"NA\",\n",
570 |     "                          dates=dt.date,\n",
571 |     "                          numbers=int).value"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "markdown",
576 |    "metadata": {},
577 |    "source": [
578 |     "## Charts, Pictures and Defined Names"
579 |    ]
580 |   },
581 |   {
582 |    "cell_type": "code",
583 |    "execution_count": null,
584 |    "metadata": {},
585 |    "outputs": [],
586 |    "source": [
587 |     "sheet1[\"A15\"].value = [[None, \"North\", \"South\"],\n",
588 |     "                       [\"Last Year\", 2, 5],\n",
589 |     "                       [\"This Year\", 3, 6]]"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "code",
594 |    "execution_count": null,
595 |    "metadata": {},
596 |    "outputs": [],
597 |    "source": [
598 |     "chart = sheet1.charts.add(top=sheet1[\"A19\"].top,\n",
599 |     "                          left=sheet1[\"A19\"].left)\n",
600 |     "chart.chart_type = \"column_clustered\"\n",
601 |     "chart.set_source_data(sheet1[\"A15\"].expand())"
602 |    ]
603 |   },
604 |   {
605 |    "cell_type": "code",
606 |    "execution_count": null,
607 |    "metadata": {},
608 |    "outputs": [],
609 |    "source": [
610 |     "# Read in the chart data as DataFrame\n",
611 |     "df = sheet1[\"A15\"].expand().options(pd.DataFrame).value\n",
612 |     "df"
613 |    ]
614 |   },
615 |   {
616 |    "cell_type": "code",
617 |    "execution_count": null,
618 |    "metadata": {},
619 |    "outputs": [],
620 |    "source": [
621 |     "# Enable Matplotlib by using the notebook magic command\n",
622 |     "# and switch to the \"seaborn\" style\n",
623 |     "%matplotlib inline\n",
624 |     "import matplotlib.pyplot as plt\n",
625 |     "plt.style.use(\"seaborn\")"
626 |    ]
627 |   },
628 |   {
629 |    "cell_type": "code",
630 |    "execution_count": null,
631 |    "metadata": {},
632 |    "outputs": [],
633 |    "source": [
634 |     "# The pandas plot method returns an \"axis\" object from\n",
635 |     "# where you can get the figure. \"T\" transposes the\n",
636 |     "# DataFrame to bring the plot into the desired orientation\n",
637 |     "ax = df.T.plot.bar()\n",
638 |     "fig = ax.get_figure()"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": null,
644 |    "metadata": {},
645 |    "outputs": [],
646 |    "source": [
647 |     "# Send the plot to Excel\n",
648 |     "plot = sheet1.pictures.add(fig, name=\"SalesPlot\",\n",
649 |     "                           top=sheet1[\"H19\"].top,\n",
650 |     "                           left=sheet1[\"H19\"].left)\n",
651 |     "# Let's scale the plot to 70%\n",
652 |     "plot.width, plot.height = plot.width * 0.7, plot.height * 0.7"
653 |    ]
654 |   },
655 |   {
656 |    "cell_type": "code",
657 |    "execution_count": null,
658 |    "metadata": {},
659 |    "outputs": [],
660 |    "source": [
661 |     "ax = (df + 1).T.plot.bar()\n",
662 |     "plot = plot.update(ax.get_figure())"
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": null,
668 |    "metadata": {},
669 |    "outputs": [],
670 |    "source": [
671 |     "# The book scope is the default scope\n",
672 |     "sheet1[\"A1:B2\"].name = \"matrix1\""
673 |    ]
674 |   },
675 |   {
676 |    "cell_type": "code",
677 |    "execution_count": null,
678 |    "metadata": {},
679 |    "outputs": [],
680 |    "source": [
681 |     "# For the sheet scope, prepend the sheet name with\n",
682 |     "# an exclamation point\n",
683 |     "sheet1[\"B10:E11\"].name = \"Sheet1!matrix2\""
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "code",
688 |    "execution_count": null,
689 |    "metadata": {},
690 |    "outputs": [],
691 |    "source": [
692 |     "# Now you can access the range by name\n",
693 |     "sheet1[\"matrix1\"]"
694 |    ]
695 |   },
696 |   {
697 |    "cell_type": "code",
698 |    "execution_count": null,
699 |    "metadata": {},
700 |    "outputs": [],
701 |    "source": [
702 |     "# If you access the names collection via the \"sheet1\" object,\n",
703 |     "# it contains only names with that sheet's scope\n",
704 |     "sheet1.names"
705 |    ]
706 |   },
707 |   {
708 |    "cell_type": "code",
709 |    "execution_count": null,
710 |    "metadata": {},
711 |    "outputs": [],
712 |    "source": [
713 |     "# If you access the names collection via the \"book\" object,\n",
714 |     "# it contains all names, including book and sheet scope\n",
715 |     "book.names"
716 |    ]
717 |   },
718 |   {
719 |    "cell_type": "code",
720 |    "execution_count": null,
721 |    "metadata": {},
722 |    "outputs": [],
723 |    "source": [
724 |     "# Names have various methods and attributes.\n",
725 |     "# You can, for example, get the respective range object.\n",
726 |     "book.names[\"matrix1\"].refers_to_range"
727 |    ]
728 |   },
729 |   {
730 |    "cell_type": "code",
731 |    "execution_count": null,
732 |    "metadata": {},
733 |    "outputs": [],
734 |    "source": [
735 |     "# If you want to assign a name to a constant\n",
736 |     "# or a formula, use the \"add\" method.\n",
737 |     "# You may need to replace the decimal point with a comma\n",
738 |     "# if your are using an international version of Excel.\n",
739 |     "book.names.add(\"EURUSD\", \"=1.1151\")"
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "markdown",
744 |    "metadata": {},
745 |    "source": [
746 |     "# Advanced Topics"
747 |    ]
748 |   },
749 |   {
750 |    "cell_type": "markdown",
751 |    "metadata": {},
752 |    "source": [
753 |     "## Performance"
754 |    ]
755 |   },
756 |   {
757 |    "cell_type": "code",
758 |    "execution_count": null,
759 |    "metadata": {},
760 |    "outputs": [],
761 |    "source": [
762 |     "# Add a new sheet and write 150 values\n",
763 |     "# to it to have something to work with\n",
764 |     "sheet2 = book.sheets.add()\n",
765 |     "sheet2[\"A1\"].value = np.arange(150).reshape(30, 5)"
766 |    ]
767 |   },
768 |   {
769 |    "cell_type": "code",
770 |    "execution_count": null,
771 |    "metadata": {},
772 |    "outputs": [],
773 |    "source": [
774 |     "%%time\n",
775 |     "# This makes 150 cross-application calls\n",
776 |     "for cell in sheet2[\"A1:E30\"]:\n",
777 |     "    cell.value += 1"
778 |    ]
779 |   },
780 |   {
781 |    "cell_type": "code",
782 |    "execution_count": null,
783 |    "metadata": {},
784 |    "outputs": [],
785 |    "source": [
786 |     "%%time\n",
787 |     "# This makes just two cross-application calls\n",
788 |     "values = sheet2[\"A1:E30\"].options(np.array).value\n",
789 |     "sheet2[\"A1:E30\"].value = values + 1"
790 |    ]
791 |   },
792 |   {
793 |    "cell_type": "code",
794 |    "execution_count": null,
795 |    "metadata": {},
796 |    "outputs": [],
797 |    "source": [
798 |     "# With raw values, you must provide the full\n",
799 |     "# target range, sheet[\"A35\"] doesn't work anymore\n",
800 |     "sheet1[\"A35:B36\"].options(\"raw\").value = [[1, 2], [3, 4]]"
801 |    ]
802 |   }
803 |  ],
804 |  "metadata": {
805 |   "kernelspec": {
806 |    "display_name": "Python 3",
807 |    "language": "python",
808 |    "name": "python3"
809 |   },
810 |   "language_info": {
811 |    "codemirror_mode": {
812 |     "name": "ipython",
813 |     "version": 3
814 |    },
815 |    "file_extension": ".py",
816 |    "mimetype": "text/x-python",
817 |    "name": "python",
818 |    "nbconvert_exporter": "python",
819 |    "pygments_lexer": "ipython3",
820 |    "version": "3.7.4"
821 |   }
822 |  },
823 |  "nbformat": 4,
824 |  "nbformat_minor": 4
825 | }
826 | 


--------------------------------------------------------------------------------
/ch11.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Case Study Preliminaries"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Web APIs"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import json"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {
 30 |     "pycharm": {
 31 |      "name": "#%%\n"
 32 |     }
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# A Python dictionary...\n",
 37 |     "user_dict = {\"name\": \"Jane Doe\",\n",
 38 |     "             \"age\": 23,\n",
 39 |     "             \"married\": False,\n",
 40 |     "             \"children\": None,\n",
 41 |     "             \"hobbies\": [\"hiking\", \"reading\"]}"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {
 48 |     "pycharm": {
 49 |      "name": "#%%\n"
 50 |     }
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# ...converted to a JSON string\n",
 55 |     "# by json.dumps (\"dump string\"). The \"indent\" parameter is\n",
 56 |     "# optional and prettifies the printing.\n",
 57 |     "user_json = json.dumps(user_dict, indent=4)\n",
 58 |     "print(user_json)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {
 65 |     "pycharm": {
 66 |      "name": "#%%\n"
 67 |     }
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# Convert the JSON string back to a native Python data structure\n",
 72 |     "json.loads(user_json)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "import requests"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {
 88 |     "pycharm": {
 89 |      "name": "#%%\n"
 90 |     }
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "response = requests.get(\"https://pypi.org/pypi/pandas/json\")\n",
 95 |     "response.status_code"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "pycharm": {
103 |      "name": "#%%\n"
104 |     }
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "# response.json()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "pycharm": {
116 |      "name": "#%%\n"
117 |     }
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "releases = []\n",
122 |     "for version, files in response.json()['releases'].items():\n",
123 |     "    releases.append(f\"{version}: {files[0]['upload_time']}\")\n",
124 |     "releases[:3]  # show the first 3 elements of the list"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "## Databases"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "import urllib.parse"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "pycharm": {
148 |      "name": "#%%\n"
149 |     }
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "urllib.parse.quote_plus(\"pa$$word\")"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "# Let's start with the imports\n",
163 |     "import sqlite3\n",
164 |     "from sqlalchemy import create_engine\n",
165 |     "import pandas as pd"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {
172 |     "pycharm": {
173 |      "name": "#%%\n"
174 |     }
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "# Our SQL query: \"select all columns from the packages table\"\n",
179 |     "sql = \"SELECT * FROM packages\""
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "pycharm": {
187 |      "name": "#%%\n"
188 |     }
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "# Option 1: Database driver (sqlite3 is part of the standard library)\n",
193 |     "# Using the connection as context manager automatically commits\n",
194 |     "# the transaction or rolls it back in case of an error.\n",
195 |     "with sqlite3.connect(\"packagetracker/packagetracker.db\") as con:\n",
196 |     "    cursor = con.cursor()  # We need a cursor to run SQL queries\n",
197 |     "    result = cursor.execute(sql).fetchall()  # Return all records\n",
198 |     "result"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {
205 |     "pycharm": {
206 |      "name": "#%%\n"
207 |     }
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "# Option 2: SQLAlchemy\n",
212 |     "# \"create_engine\" expects the connection string of your database.\n",
213 |     "# Here, we can execute a query as a method of the connection object.\n",
214 |     "engine = create_engine(\"sqlite:///packagetracker/packagetracker.db\")\n",
215 |     "with engine.connect() as con:\n",
216 |     "    result = con.execute(sql).fetchall()\n",
217 |     "result"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {
224 |     "pycharm": {
225 |      "name": "#%%\n"
226 |     }
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "# Option 3: pandas\n",
231 |     "# Providing a table name to \"read_sql\" reads the full table.\n",
232 |     "# Pandas requires an SQLAlchemy engine that we reuse from\n",
233 |     "# the previous example.\n",
234 |     "df = pd.read_sql(\"packages\", engine, index_col=\"package_id\")\n",
235 |     "df"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "pycharm": {
243 |      "name": "#%%\n"
244 |     }
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "# \"read_sql\" also accepts an SQL query\n",
249 |     "pd.read_sql(sql, engine, index_col=\"package_id\")"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {
256 |     "pycharm": {
257 |      "name": "#%%\n"
258 |     }
259 |    },
260 |    "outputs": [],
261 |    "source": [
262 |     "# The DataFrame method \"to_sql\" writes DataFrames to tables\n",
263 |     "# \"if_exists\" has to be either \"fail\", \"append\" or \"replace\"\n",
264 |     "# and defines what happens if the table already exists\n",
265 |     "df.to_sql(\"packages2\", con=engine, if_exists=\"append\")"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {
272 |     "pycharm": {
273 |      "name": "#%%\n"
274 |     }
275 |    },
276 |    "outputs": [],
277 |    "source": [
278 |     "# The previous command created a new table \"packages2\" and\n",
279 |     "# inserted the records from the DataFrame df as we can\n",
280 |     "# verify by reading it back\n",
281 |     "pd.read_sql(\"packages2\", engine, index_col=\"package_id\")"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {
288 |     "pycharm": {
289 |      "name": "#%%\n"
290 |     }
291 |    },
292 |    "outputs": [],
293 |    "source": [
294 |     "# Let's get rid of the table again by running the\n",
295 |     "# \"drop table\" command via SQLAlchemy\n",
296 |     "with engine.connect() as con:\n",
297 |     "    con.execute(\"DROP TABLE packages2\")"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "# Let's start by importing SQLAlchemy's text function\n",
307 |     "from sqlalchemy.sql import text"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "metadata": {
314 |     "pycharm": {
315 |      "name": "#%%\n"
316 |     }
317 |    },
318 |    "outputs": [],
319 |    "source": [
320 |     "# \":package_id\" is the placeholder\n",
321 |     "sql = \"\"\"\n",
322 |     "SELECT v.uploaded_at, v.version_string\n",
323 |     "FROM packages p\n",
324 |     "INNER JOIN package_versions v ON p.package_id = v.package_id\n",
325 |     "WHERE p.package_id = :package_id\n",
326 |     "ORDER BY v.uploaded_at\n",
327 |     "\"\"\""
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {
334 |     "pycharm": {
335 |      "name": "#%%\n"
336 |     }
337 |    },
338 |    "outputs": [],
339 |    "source": [
340 |     "# Via SQLAlchemy\n",
341 |     "with engine.connect() as con:\n",
342 |     "    result = con.execute(text(sql), package_id=1).fetchall()\n",
343 |     "result[:3]  # Print the first 3 records"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {
350 |     "pycharm": {
351 |      "name": "#%%\n"
352 |     }
353 |    },
354 |    "outputs": [],
355 |    "source": [
356 |     "# Via pandas\n",
357 |     "pd.read_sql(text(sql), engine, parse_dates=[\"uploaded_at\"],\n",
358 |     "            params={\"package_id\": 1},\n",
359 |     "            index_col=[\"uploaded_at\"]).head(3)"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "markdown",
364 |    "metadata": {},
365 |    "source": [
366 |     "## Exceptions"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": null,
372 |    "metadata": {
373 |     "pycharm": {
374 |      "name": "#%%\n"
375 |     }
376 |    },
377 |    "outputs": [],
378 |    "source": [
379 |     "def print_reciprocal(number):\n",
380 |     "    result = 1 / number\n",
381 |     "    print(f\"The reciprocal is: {result}\")"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": null,
387 |    "metadata": {
388 |     "pycharm": {
389 |      "name": "#%%\n"
390 |     }
391 |    },
392 |    "outputs": [],
393 |    "source": [
394 |     "print_reciprocal(0)  # This will raise an error"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {
401 |     "pycharm": {
402 |      "name": "#%%\n"
403 |     }
404 |    },
405 |    "outputs": [],
406 |    "source": [
407 |     "def print_reciprocal(number):\n",
408 |     "    try:\n",
409 |     "        result = 1 / number\n",
410 |     "    except Exception as e:\n",
411 |     "        # \"as e\" makes the Exception object available as variable \"e\"\n",
412 |     "        # \"repr\" stands for \"printable representation\" of an object\n",
413 |     "        # and gives you back a string with the error message\n",
414 |     "        print(f\"There was an error: {repr(e)}\")\n",
415 |     "        result = \"N/A\"\n",
416 |     "    else:\n",
417 |     "        print(\"There was no error!\")\n",
418 |     "    finally:\n",
419 |     "        print(f\"The reciprocal is: {result}\")"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": null,
425 |    "metadata": {
426 |     "pycharm": {
427 |      "name": "#%%\n"
428 |     }
429 |    },
430 |    "outputs": [],
431 |    "source": [
432 |     "print_reciprocal(10)"
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": null,
438 |    "metadata": {
439 |     "pycharm": {
440 |      "name": "#%%\n"
441 |     }
442 |    },
443 |    "outputs": [],
444 |    "source": [
445 |     "print_reciprocal(\"a\")"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": null,
451 |    "metadata": {
452 |     "pycharm": {
453 |      "name": "#%%\n"
454 |     }
455 |    },
456 |    "outputs": [],
457 |    "source": [
458 |     "print_reciprocal(0)"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": null,
464 |    "metadata": {
465 |     "pycharm": {
466 |      "name": "#%%\n"
467 |     }
468 |    },
469 |    "outputs": [],
470 |    "source": [
471 |     "def print_reciprocal(number):\n",
472 |     "    try:\n",
473 |     "        result = 1 / number\n",
474 |     "        print(f\"The reciprocal is: {result}\")\n",
475 |     "    except (TypeError, ZeroDivisionError):\n",
476 |     "        print(\"Please type in any number except 0.\")"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": null,
482 |    "metadata": {
483 |     "pycharm": {
484 |      "name": "#%%\n"
485 |     }
486 |    },
487 |    "outputs": [],
488 |    "source": [
489 |     "print_reciprocal(\"a\")"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": null,
495 |    "metadata": {
496 |     "pycharm": {
497 |      "name": "#%%\n"
498 |     }
499 |    },
500 |    "outputs": [],
501 |    "source": [
502 |     "def print_reciprocal(number):\n",
503 |     "    try:\n",
504 |     "        result = 1 / number\n",
505 |     "        print(f\"The reciprocal is: {result}\")\n",
506 |     "    except TypeError:\n",
507 |     "        print(\"Please type in a number.\")\n",
508 |     "    except ZeroDivisionError:\n",
509 |     "        print(\"The reciprocal of 0 is not defined.\")"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "code",
514 |    "execution_count": null,
515 |    "metadata": {},
516 |    "outputs": [],
517 |    "source": [
518 |     "print_reciprocal(\"a\")"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "metadata": {},
525 |    "outputs": [],
526 |    "source": [
527 |     "print_reciprocal(0)"
528 |    ]
529 |   }
530 |  ],
531 |  "metadata": {
532 |   "kernelspec": {
533 |    "display_name": "Python 3",
534 |    "language": "python",
535 |    "name": "python3"
536 |   },
537 |   "language_info": {
538 |    "codemirror_mode": {
539 |     "name": "ipython",
540 |     "version": 3
541 |    },
542 |    "file_extension": ".py",
543 |    "mimetype": "text/x-python",
544 |    "name": "python",
545 |    "nbconvert_exporter": "python",
546 |    "pygments_lexer": "ipython3",
547 |    "version": "3.7.4"
548 |   }
549 |  },
550 |  "nbformat": 4,
551 |  "nbformat_minor": 4
552 | }
553 | 


--------------------------------------------------------------------------------
/ch12.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# User-Defined Functions (UDFs)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Function Decorators"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# This is the definition of the function decorator\n",
 24 |     "def verbose(func):\n",
 25 |     "    def wrapper():\n",
 26 |     "        print(\"Before calling the function.\")\n",
 27 |     "        func()\n",
 28 |     "        print(\"After calling the function.\")\n",
 29 |     "    return wrapper"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# Using a function decorator\n",
 39 |     "@verbose\n",
 40 |     "def print_hello():\n",
 41 |     "    print(\"hello!\")"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Effect of calling the decorated function\n",
 51 |     "print_hello()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "## Fetching Data from Google Trends"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "from pytrends.request import TrendReq"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "# First, let's instantiate a TrendRequest object\n",
 77 |     "trend = TrendReq()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# Now we can print the suggestions as they would appear\n",
 87 |     "# online in the dropdown of Google Trends after typing in \"Python\"\n",
 88 |     "trend.suggestions(\"Python\")"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Caching"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "import time"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "cache = {}\n",
114 |     "\n",
115 |     "def slow_sum(a, b):\n",
116 |     "    key = (a, b)\n",
117 |     "    if key in cache:\n",
118 |     "        return cache[key]\n",
119 |     "    else:\n",
120 |     "        time.sleep(2)  # sleep for 2 seconds\n",
121 |     "        result = a + b\n",
122 |     "        cache[key] = result\n",
123 |     "        return result"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "%%time\n",
133 |     "slow_sum(1, 2)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "%%time\n",
143 |     "slow_sum(1, 2)"
144 |    ]
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 3",
150 |    "language": "python",
151 |    "name": "python3"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 3
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython3",
163 |    "version": "3.7.4"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 4
168 | }
169 | 


--------------------------------------------------------------------------------
/conda/xl310.yml:
--------------------------------------------------------------------------------
 1 | name: xl310
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.10
 6 |   - pip=21.2.4
 7 |   - pip:
 8 |     - flake8==4.0.1
 9 |     - lxml==4.7.1
10 |     - matplotlib==3.5.1
11 |     - notebook==6.4.6
12 |     - openpyxl==3.0.9
13 |     - pandas==1.3.5
14 |     - numpy==1.21.0
15 |     - pillow==8.4.0
16 |     - plotly==5.4.0
17 |     - python-dateutil==2.8.2
18 |     - requests==2.26.0
19 |     - sqlalchemy==1.4.28
20 |     - xlrd==2.0.1
21 |     - xlsxwriter==3.0.2
22 |     - xlutils==2.0.0
23 |     - xlwings==0.25.3
24 |     - xlwt==1.3.0
25 |     - pytrends==4.7.3
26 |     - pyxlsb==1.0.9
27 | 


--------------------------------------------------------------------------------
/conda/xl38.yml:
--------------------------------------------------------------------------------
 1 | name: xl38
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - flake8=3.8.4
 6 |   - lxml=4.6.1
 7 |   - matplotlib=3.3.2
 8 |   - notebook=6.1.4
 9 |   - openpyxl=3.0.5
10 |   - pandas=1.1.3
11 |   - numpy=1.19.2
12 |   - pillow=8.0.1
13 |   - pip=20.2.4
14 |   - plotly=4.14.1
15 |   - python=3.8.5
16 |   - python-dateutil=2.8.1
17 |   - requests=2.24.0
18 |   - sqlalchemy=1.3.20
19 |   - xlrd=1.2.0
20 |   - xlsxwriter=1.3.7
21 |   - xlutils=2.0.0
22 |   - xlwings=0.20.8
23 |   - xlwt=1.3.0
24 |   - pip:
25 |     - pytrends==4.7.3
26 |     - pyxlsb==1.0.7


--------------------------------------------------------------------------------
/debugging.py:
--------------------------------------------------------------------------------
1 | a = 3
2 | b = 4
3 | 
4 | c = a + b
5 | 
6 | print(c)
7 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | # This is a copy of conda/xl38.yml (but without xlwings) for Binder
 2 | name: xl38
 3 | channels:
 4 |   - defaults
 5 | dependencies:
 6 |   - flake8=3.8.4
 7 |   - lxml=4.6.1
 8 |   - matplotlib=3.3.2
 9 |   - notebook=6.1.4
10 |   - openpyxl=3.0.5
11 |   - pandas=1.1.3
12 |   - numpy=1.19.2
13 |   - pillow=8.0.1
14 |   - pip=20.2.4
15 |   - plotly=4.14.1
16 |   - python=3.8.5
17 |   - python-dateutil=2.8.1
18 |   - requests=2.24.0
19 |   - sqlalchemy=1.3.20
20 |   - xlrd=1.2.0
21 |   - xlsxwriter=1.3.7
22 |   - xlutils=2.0.0
23 |   - xlwt=1.3.0
24 |   - pip:
25 |     - pytrends==4.7.3
26 |     - pyxlsb==1.0.7
27 | 


--------------------------------------------------------------------------------
/excel.py:
--------------------------------------------------------------------------------
  1 | """This module offers a read and write function to get
  2 | 2-dimensional lists in and out of Excel files.
  3 | """
  4 | import re
  5 | import itertools
  6 | import datetime as dt
  7 | 
  8 | # Optional dependencies
  9 | try:
 10 |     import openpyxl
 11 | except ImportError:
 12 |     openpyxl = None
 13 | try:
 14 |     import pyxlsb
 15 | except ImportError:
 16 |     pyxlsb = None
 17 | try:
 18 |     import xlrd
 19 |     from xlrd.biffh import error_text_from_code
 20 | except ImportError:
 21 |     xlrd = None
 22 | try:
 23 |     import xlwt
 24 | except ImportError:
 25 |     xlwt = None
 26 | try:
 27 |     import xlsxwriter
 28 | except ImportError:
 29 |     xlsxwriter = None
 30 | 
 31 | 
 32 | def read(sheet, first_cell="A1", last_cell=None):
 33 |     """Read a 2-dimensional list from an Excel range.
 34 | 
 35 |     Parameters
 36 |     ----------
 37 |     sheet : object
 38 |         An xlrd, openpyxl or pyxlsb sheet object
 39 |     first_cell : str or tuple, optional
 40 |         Top-left corner of the Excel range you want to read.
 41 |         Can be a string like "A1" or a row/col tuple like (1, 1),
 42 |         default is "A1".
 43 |     last_cell : str or tuple, optional
 44 |         Bottom-right corner of the Excel range you want to read.
 45 |         Can be a string like "A1" or a row/col tuple like (1, 1),
 46 |         default is the bottom-right cell of the used range.
 47 | 
 48 |     Returns
 49 |     -------
 50 |     list
 51 |         A 2-dimensional list with the values of the Excel range
 52 |     """
 53 |     # xlrd
 54 |     if xlrd and isinstance(sheet, xlrd.sheet.Sheet):
 55 |         # isinstance returns True if sheet is of type xlrd.sheet.Sheet
 56 |         if last_cell is None:
 57 |             # actual range with data, not used range
 58 |             last_cell = (sheet.nrows, sheet.ncols)
 59 |         # Transform "A1" notation into tuples of 1-based indices
 60 |         if not isinstance(first_cell, tuple):
 61 |             first_cell = xl_cell_to_rowcol(first_cell)
 62 |             first_cell = (first_cell[0] + 1, first_cell[1] + 1)
 63 |         if not isinstance(last_cell, tuple):
 64 |             last_cell = xl_cell_to_rowcol(last_cell)
 65 |             last_cell = (last_cell[0] + 1, last_cell[1] + 1)
 66 |         values = []
 67 |         for r in range(first_cell[0] - 1, last_cell[0]):
 68 |             row = []
 69 |             for c in range(first_cell[1] - 1, last_cell[1]):
 70 |                 # Handle the different cell types
 71 |                 if sheet.cell(r, c).ctype == xlrd.XL_CELL_DATE:
 72 |                     value = xlrd.xldate.xldate_as_datetime(
 73 |                         sheet.cell(r, c).value, sheet.book.datemode)
 74 |                 elif sheet.cell(r, c).ctype in [xlrd.XL_CELL_EMPTY,
 75 |                                                 xlrd.XL_CELL_BLANK]:
 76 |                     value = None
 77 |                 elif sheet.cell(r, c).ctype == xlrd.XL_CELL_ERROR:
 78 |                     value = error_text_from_code[sheet.cell(r, c).value]
 79 |                 elif sheet.cell(r, c).ctype == xlrd.XL_CELL_BOOLEAN:
 80 |                     value = bool(sheet.cell(r, c).value)
 81 |                 else:
 82 |                     value = sheet.cell(r, c).value
 83 |                 row.append(value)
 84 |             values.append(row)
 85 |         return values
 86 | 
 87 |     # OpenPyXL
 88 |     elif openpyxl and isinstance(
 89 |             sheet,
 90 |             (openpyxl.worksheet.worksheet.Worksheet,
 91 |              openpyxl.worksheet._read_only.ReadOnlyWorksheet)):
 92 |         if last_cell is None:
 93 |             # used range
 94 |             last_cell = (sheet.max_row, sheet.max_column)
 95 |         if not isinstance(first_cell, tuple):
 96 |             first_cell = openpyxl.utils.cell.coordinate_to_tuple(first_cell)
 97 |         if not isinstance(last_cell, tuple):
 98 |             last_cell = openpyxl.utils.cell.coordinate_to_tuple(last_cell)
 99 |         data = []
100 |         for row in sheet.iter_rows(min_row=first_cell[0], min_col=first_cell[1],
101 |                                    max_row=last_cell[0], max_col=last_cell[1],
102 |                                    values_only=True):
103 |             data.append(list(row))
104 |         return data
105 | 
106 |     # pyxlsb
107 |     elif pyxlsb and isinstance(sheet, pyxlsb.worksheet.Worksheet):
108 |         errors = {"0x0": "#NULL!", "0x7": "#DIV/0!", "0xf": "#VALUE!",
109 |                   "0x17": "#REF!", "0x1d": "#NAME?", "0x24": "#NUM!",
110 |                   "0x2a": "#N/A"}
111 |         if not isinstance(first_cell, tuple):
112 |             first_cell = xl_cell_to_rowcol(first_cell)
113 |             first_cell = (first_cell[0] + 1, first_cell[1] + 1)
114 |         if last_cell and not isinstance(last_cell, tuple):
115 |             last_cell = xl_cell_to_rowcol(last_cell)
116 |             last_cell = (last_cell[0] + 1, last_cell[1] + 1)
117 |         data = []
118 |         # sheet.rows() is a generator that requires islice to slice it
119 |         for row in itertools.islice(sheet.rows(),
120 |                                     first_cell[0] - 1,
121 |                                     last_cell[0] if last_cell else None):
122 |             data.append([errors.get(cell.v, cell.v) for cell in row]
123 |                         [first_cell[1] - 1 : last_cell[1] if last_cell else None])
124 |         return data
125 |     else:
126 |         raise TypeError(f"Couldn't handle sheet of type {type(sheet)}")
127 | 
128 | 
129 | def write(sheet, values, first_cell="A1", date_format=None):
130 |     """Write a 2-dimensional list to an Excel range.
131 | 
132 |     Parameters
133 |     ----------
134 |     sheet : object
135 |         An openpyxl, xlsxwriter or xlwt sheet object. openpyxl's
136 |         write_only=True mode is not supported.
137 |     values : list
138 |         A 2-dimensional list of values
139 |     first_cell : str or tuple, optional
140 |         Top-left corner of the Excel range where you want to write out
141 |         the DataFrame. Can be a string like "A1" or a row/col tuple
142 |         like (1, 1), default is "A1".
143 |     date_format : str, optional
144 |         Only accepted if sheet is an openpyxl or xlwt sheet. By default,
145 |         formats dates in the following format: "mm/dd/yy". For xlsxwriter,
146 |         set the format when you instantiate a Workbook by providing:
147 |         options={"default_date_format": "mm/dd/yy"}
148 |     """
149 |     # OpenPyXL
150 |     if openpyxl and isinstance(
151 |             sheet, openpyxl.worksheet.worksheet.Worksheet):
152 |         if date_format is None:
153 |             date_format = "mm/dd/yy"
154 |         if not isinstance(first_cell, tuple):
155 |             first_cell = openpyxl.utils.coordinate_to_tuple(first_cell)
156 |         for i, row in enumerate(values):
157 |             for j, value in enumerate(row):
158 |                 cell = sheet.cell(row=first_cell[0] + i,
159 |                                   column=first_cell[1] + j)
160 |                 cell.value = value
161 |                 if date_format and isinstance(value, (dt.datetime, dt.date)):
162 |                     cell.number_format = date_format
163 | 
164 |     # XlsxWriter
165 |     elif xlsxwriter and isinstance(sheet, xlsxwriter.worksheet.Worksheet):
166 |         if date_format is not None:
167 |             raise ValueError("date_format must be set as Workbook option")
168 |         if isinstance(first_cell, tuple):
169 |             first_cell = first_cell[0] - 1, first_cell[1] - 1
170 |         else:
171 |             first_cell = xl_cell_to_rowcol(first_cell)
172 |         for r, row_data in enumerate(values):
173 |             sheet.write_row(first_cell[0] + r, first_cell[1], row_data)
174 | 
175 |     # xlwt
176 |     elif xlwt and isinstance(sheet, xlwt.Worksheet):
177 |         if date_format is None:
178 |             date_format = "mm/dd/yy"
179 |         date_format = xlwt.easyxf(num_format_str=date_format)
180 |         if isinstance(first_cell, tuple):
181 |             first_cell = (first_cell[0] - 1, first_cell[1] - 1)
182 |         else:
183 |             first_cell = xl_cell_to_rowcol(first_cell)
184 |         for i, row in enumerate(values):
185 |             for j, cell in enumerate(row):
186 |                 if isinstance(cell, (dt.datetime, dt.date)):
187 |                     sheet.write(i + first_cell[0], j + first_cell[1],
188 |                                 cell, date_format)
189 |                 else:
190 |                     sheet.write(i + first_cell[0], j + first_cell[1],
191 |                                 cell)
192 |     else:
193 |         raise TypeError(f"Couldn't handle sheet of type {type(sheet)}")
194 | 
195 | 
196 | def xl_cell_to_rowcol(cell_str):
197 |     """
198 |     Convert a cell reference in A1 notation to a zero indexed row and column.
199 | 
200 |     Args:
201 |        cell_str:  A1 style string.
202 | 
203 |     Returns:
204 |         row, col: Zero indexed cell row and column indices.
205 | 
206 |     This function is from XlsxWriter
207 |     Copyright (c) 2013-2020, John McNamara <jmcnamara@cpan.org>
208 |     All rights reserved.
209 | 
210 |     Redistribution and use in source and binary forms, with or without
211 |     modification, are permitted provided that the following conditions are met:
212 | 
213 |     1. Redistributions of source code must retain the above copyright notice, this
214 |        list of conditions and the following disclaimer.
215 |     2. Redistributions in binary form must reproduce the above copyright notice,
216 |        this list of conditions and the following disclaimer in the documentation
217 |        and/or other materials provided with the distribution.
218 | 
219 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
220 |     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
221 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
222 |     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
223 |     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
224 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
225 |     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
226 |     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
227 |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
228 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
229 | 
230 |     The views and conclusions contained in the software and documentation are those
231 |     of the authors and should not be interpreted as representing official policies,
232 |     either expressed or implied, of the FreeBSD Project.
233 | 
234 |     """
235 |     if not cell_str:
236 |         return 0, 0
237 | 
238 |     match = re.compile(r"(\$?)([A-Z]{1,3})(\$?)(\d+)").match(cell_str)
239 |     col_str = match.group(2)
240 |     row_str = match.group(4)
241 | 
242 |     # Convert base26 column string to number.
243 |     expn = 0
244 |     col = 0
245 |     for char in reversed(col_str):
246 |         col += (ord(char) - ord("A") + 1) * (26 ** expn)
247 |         expn += 1
248 | 
249 |     # Convert 1-index to zero-index
250 |     row = int(row_str) - 1
251 |     col -= 1
252 | 
253 |     return row, col
254 | 


--------------------------------------------------------------------------------
/images/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/images/cover.png


--------------------------------------------------------------------------------
/images/python.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/images/python.bmp


--------------------------------------------------------------------------------
/images/python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/images/python.png


--------------------------------------------------------------------------------
/packagetracker/database.py:
--------------------------------------------------------------------------------
  1 | """This module handles all database interactions"""
  2 | 
  3 | from pathlib import Path
  4 | from sqlite3 import Connection as SQLite3Connection
  5 | 
  6 | import sqlalchemy
  7 | from sqlalchemy import event
  8 | from sqlalchemy.sql import text
  9 | from sqlalchemy.engine import Engine
 10 | import pandas as pd
 11 | 
 12 | 
 13 | # Have SQLAlchemy enforce foreign keys with SQLite, see:
 14 | # https://docs.sqlalchemy.org/en/latest/dialects/sqlite.html#foreign-key-support
 15 | @event.listens_for(Engine, "connect")
 16 | def set_sqlite_pragma(dbapi_connection, connection_record):
 17 |     if isinstance(dbapi_connection, SQLite3Connection):
 18 |         cursor = dbapi_connection.cursor()
 19 |         cursor.execute("PRAGMA foreign_keys=ON")
 20 |         cursor.close()
 21 | 
 22 | 
 23 | # We want the database file to sit next to this file.
 24 | # Here, we are turning the path into an absolute path.
 25 | this_dir = Path(__file__).resolve().parent
 26 | db_path = this_dir / "packagetracker.db"
 27 | 
 28 | # Database engine
 29 | engine = sqlalchemy.create_engine(f"sqlite:///{db_path}")
 30 | 
 31 | 
 32 | def get_packages():
 33 |     """Get all packages as DataFrame"""
 34 | 
 35 |     return pd.read_sql_table("packages", con=engine, index_col="package_id")
 36 | 
 37 | 
 38 | def store_package(package_name):
 39 |     """Insert a new package_name into the packages table"""
 40 | 
 41 |     try:
 42 |         with engine.connect() as con:
 43 |             con.execute(text("INSERT INTO packages (package_name) VALUES (:package_name)"),
 44 |                         package_name=package_name)
 45 |         return None
 46 |     except sqlalchemy.exc.IntegrityError:
 47 |         return f"{package_name} already exists"
 48 |     except Exception as e:
 49 |         return repr(e)
 50 | 
 51 | 
 52 | def get_versions(package_name):
 53 |     """Get all versions for the package with the name package_name"""
 54 | 
 55 |     sql = """
 56 |     SELECT v.uploaded_at, v.version_string
 57 |     FROM packages p
 58 |     INNER JOIN package_versions v ON p.package_id = v.package_id
 59 |     WHERE p.package_name = :package_name
 60 |     """
 61 |     return pd.read_sql_query(text(sql), engine, parse_dates=["uploaded_at"],
 62 |                              params={"package_name": package_name},
 63 |                              index_col=["uploaded_at"])
 64 | 
 65 | 
 66 | def store_versions(df):
 67 |     """Insert the records of the provided DataFrame df into the package_versions table"""
 68 | 
 69 |     df.to_sql("package_versions", con=engine, if_exists="append", index=False)
 70 | 
 71 | 
 72 | def delete_versions():
 73 |     """Delete all records from the version table"""
 74 | 
 75 |     with engine.connect() as con:
 76 |         con.execute("DELETE FROM package_versions")
 77 | 
 78 | 
 79 | def create_db():
 80 |     """Run this function to create the database tables.
 81 |     In case of sqlite, this is also creating the database file.
 82 |     """
 83 | 
 84 |     sql_table_packages = """
 85 |     CREATE TABLE packages (
 86 |         package_id INTEGER PRIMARY KEY,
 87 |         package_name TEXT NOT NULL,
 88 |         UNIQUE(package_name)
 89 |     )
 90 |     """
 91 | 
 92 |     sql_table_versions = """
 93 |     CREATE TABLE package_versions (
 94 |         package_id INTEGER,
 95 |         version_string TEXT,
 96 |         uploaded_at TIMESTAMP NOT NULL,
 97 |         PRIMARY KEY (package_id, version_string),
 98 |         FOREIGN KEY (package_id) REFERENCES packages (package_id)
 99 |     )
100 |     """
101 | 
102 |     sql_statements = [sql_table_packages, sql_table_versions]
103 |     with engine.connect() as con:
104 |         for sql in sql_statements:
105 |             con.execute(sql)
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     # Run this as a script to create the packagetracker.db database
110 |     create_db()
111 | 


--------------------------------------------------------------------------------
/packagetracker/packagetracker.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/packagetracker/packagetracker.db


--------------------------------------------------------------------------------
/packagetracker/packagetracker.py:
--------------------------------------------------------------------------------
  1 | """This module contains all functions that are either called from Excel
  2 | or manipulate Excel.
  3 | """
  4 | 
  5 | import datetime as dt
  6 | 
  7 | from dateutil import tz
  8 | import requests
  9 | import pandas as pd
 10 | import matplotlib.pyplot as plt
 11 | import xlwings as xw
 12 | 
 13 | import database
 14 | 
 15 | 
 16 | # This is the part of the URL that is the same for every request
 17 | BASE_URL = "https://pypi.org/pypi"
 18 | 
 19 | 
 20 | def add_package():
 21 |     """ Adds a new package including the version history to the database.
 22 |     Triggers an update of the dropdown on the Tracker tab.
 23 |     """
 24 |     # Excel objects
 25 |     db_sheet = xw.Book.caller().sheets["Database"]
 26 |     package_name = db_sheet["new_package"].value
 27 |     feedback_cell = db_sheet["new_package"].offset(column_offset=1)
 28 | 
 29 |     # Clear feedback cell
 30 |     feedback_cell.clear_contents()
 31 | 
 32 |     # Check if the package exists on PyPI
 33 |     if not package_name:
 34 |         feedback_cell.value = "Error: Please provide a name!"
 35 |         return
 36 |     if requests.get(f"{BASE_URL}/{package_name}/json",
 37 |                     timeout=6).status_code != 200:
 38 |         feedback_cell.value = "Error: Package not found!"
 39 |         return
 40 | 
 41 |     # Insert the package name into the packages table
 42 |     error = database.store_package(package_name)
 43 |     db_sheet["new_package"].clear_contents()
 44 | 
 45 |     # Show any errors, otherwise kick off a database update and
 46 |     # refresh the dropdown so you can select the new package
 47 |     if error:
 48 |         feedback_cell.value = f"Error: {error}"
 49 |     else:
 50 |         feedback_cell.value = f"Added {package_name} successfully."
 51 |         update_database()
 52 |         refresh_dropdown()
 53 | 
 54 | 
 55 | def update_database():
 56 |     """ Deletes all records from the versions table, fetches all
 57 |     data again from PyPI and stores the versions again in the table.
 58 |     """
 59 |     # Excel objects
 60 |     sheet_db = xw.Book.caller().sheets["Database"]
 61 | 
 62 |     # Clear logs
 63 |     sheet_db["log"].expand().clear_contents()
 64 | 
 65 |     # Keeping things super simple: Delete all versions for all packages
 66 |     # and repopulate the package_versions table from scratch
 67 |     database.delete_versions()
 68 |     df_packages = database.get_packages()
 69 |     logs = []
 70 | 
 71 |     # Query the PyPI REST API
 72 |     for package_id, row in df_packages.iterrows():
 73 |         ret = requests.get(f"{BASE_URL}/{row['package_name']}/json",
 74 |                            timeout=6)
 75 |         if ret.status_code == 200:
 76 |             ret = ret.json()  # parse the JSON string into a dictionary
 77 |             logs.append(f"INFO: {row['package_name']} downloaded successfully")
 78 |         else:
 79 |             logs.append(f"ERROR: Could not download data for {row['package_name']}")
 80 |             continue
 81 | 
 82 |         # Instantiate a DataFrame by extracting data from the REST API response
 83 |         releases = []
 84 |         for version, files in ret["releases"].items():
 85 |             if ret["releases"][version]:  # ignore releases without info
 86 |                 releases.append((files[0]["upload_time"], version, package_id))
 87 |         df_releases = pd.DataFrame(columns=["uploaded_at", "version_string", "package_id"],
 88 |                                    data=releases)
 89 |         df_releases["uploaded_at"] = pd.to_datetime(df_releases["uploaded_at"])
 90 |         df_releases = df_releases.sort_values("uploaded_at")
 91 |         database.store_versions(df_releases)
 92 |         logs.append(f"INFO: {row['package_name']} stored to database successfully")
 93 | 
 94 |     # Write out the last updated timestamp and logs
 95 |     sheet_db["updated_at"].value = (f"Last updated: "
 96 |                                     f"{dt.datetime.now(tz.UTC).isoformat()}")
 97 |     sheet_db["log"].options(transpose=True).value = logs
 98 | 
 99 | 
100 | def show_history():
101 |     """ Shows the latest release and plots the release history
102 |     (number of releases per year)
103 |     """
104 |     # Excel objects
105 |     book = xw.Book.caller()
106 |     tracker_sheet = book.sheets["Tracker"]
107 |     package_name = tracker_sheet["package_selection"].value
108 |     feedback_cell = tracker_sheet["package_selection"].offset(column_offset=1)
109 |     picture_cell = tracker_sheet["latest_release"].offset(row_offset=2)
110 | 
111 |     # Use the "seaborn" style for the Matplotlib plots produced by pandas
112 |     plt.style.use("seaborn")
113 | 
114 |     # Check input
115 |     if not package_name:
116 |         feedback_cell.value = ("Error: Please select a package first! "
117 |                                "You may first have to add one to the database.")
118 |         return
119 | 
120 |     # Clear output cells and picture
121 |     feedback_cell.clear_contents()
122 |     tracker_sheet["latest_release"].clear_contents()
123 |     if "releases_per_year" in tracker_sheet.pictures:
124 |         tracker_sheet.pictures["releases_per_year"].delete()
125 | 
126 |     # Get all versions of the package from the database
127 |     try:
128 |         df_releases = database.get_versions(package_name)
129 |     except Exception as e:
130 |         feedback_cell.value = repr(e)
131 |         return
132 |     if df_releases.empty:
133 |         feedback_cell.value = f"Error: Didn't find any releases for {package_name}"
134 |         return
135 | 
136 |     # Calculate the number of releases per year and plot it
137 |     df_releases_yearly = df_releases.resample("Y").count()
138 |     df_releases_yearly.index = df_releases_yearly.index.year
139 |     df_releases_yearly.index.name = "Years"
140 |     df_releases_yearly = df_releases_yearly.rename(
141 |         columns={"version_string": "Number of Releases"})
142 |     ax = df_releases_yearly.plot.bar(
143 |         title=f"Number of Releases per Year "
144 |               f"({tracker_sheet['package_selection'].value})")
145 | 
146 |     # Write the results and plot to Excel
147 |     version = df_releases.loc[df_releases.index.max(), "version_string"]
148 |     tracker_sheet["latest_release"].value = (
149 |         f"{version} ({df_releases.index.max():%B %d, %Y})")
150 |     tracker_sheet.pictures.add(ax.get_figure(), name="releases_per_year",
151 |                                top=picture_cell.top,
152 |                                left=picture_cell.left)
153 | 
154 | 
155 | def refresh_dropdown():
156 |     """ Refreshes the dropdown on the Tracker tab with the content of
157 |     the packages table.
158 |     """
159 |     # Excel objects
160 |     book = xw.Book.caller()
161 |     dropdown_sheet = book.sheets["Dropdown"]
162 |     tracker_sheet = book.sheets["Tracker"]
163 | 
164 |     # Clear the current value in the dropdown
165 |     tracker_sheet["package_selection"].clear_contents()
166 | 
167 |     # If the Excel table has non-empty rows, delete them before repopulating
168 |     # it again with the values from the packages database table
169 |     if dropdown_sheet["dropdown_content"].value:
170 |         dropdown_sheet["dropdown_content"].delete()
171 |     dropdown_sheet["dropdown_content"].options(
172 |         header=False, index=False).value = database.get_packages()
173 | 
174 | 
175 | if __name__ == "__main__":
176 |     xw.Book("packagetracker.xlsm").set_mock_caller()
177 |     add_package()
178 | 


--------------------------------------------------------------------------------
/packagetracker/packagetracker.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/packagetracker/packagetracker.xlsm


--------------------------------------------------------------------------------
/parallel_openpyxl.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | from itertools import repeat
 3 | 
 4 | import openpyxl
 5 | import excel
 6 | 
 7 | 
 8 | def _read_sheet(filename, sheetname):
 9 |     # The leading underscore in the function name is used by convention
10 |     # to mark it as "private", i.e., it shouldn't be used directly outside
11 |     # of this module.
12 |     book = openpyxl.load_workbook(filename,
13 |                                   read_only=True, data_only=True)
14 |     sheet = book[sheetname]
15 |     data = excel.read(sheet)
16 |     book.close()
17 |     return sheet.title, data
18 | 
19 | def load_workbook(filename, sheetnames=None):
20 |     if sheetnames is None:
21 |         book = openpyxl.load_workbook(filename,
22 |                                       read_only=True, data_only=True)
23 |         sheetnames = book.sheetnames
24 |         book.close()
25 |     with multiprocessing.Pool() as pool:
26 |         # By default, Pool spawns as many processes as there are CPU cores.
27 |         # starmap maps a tuple of arguments to a function. The zip expression
28 |         # produces a list with tuples of the following form:
29 |         # [('filename.xlsx', 'Sheet1'), ('filename.xlsx', 'Sheet2)]
30 |         data = pool.starmap(_read_sheet, zip(repeat(filename), sheetnames))
31 |     return {i[0]: i[1] for i in data}
32 | 


--------------------------------------------------------------------------------
/parallel_pandas.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | from itertools import repeat
 3 | 
 4 | import pandas as pd
 5 | import openpyxl
 6 | 
 7 | 
 8 | def _read_sheet(filename, sheet_name):
 9 |     # The leading underscore in the function name is used by convention
10 |     # to mark it as "private", i.e., it shouldn't be used directly outside
11 |     # of this module.
12 |     df = pd.read_excel(filename, sheet_name=sheet_name, engine='openpyxl')
13 |     return sheet_name, df
14 | 
15 | 
16 | def read_excel(filename, sheet_name=None):
17 |     if sheet_name is None:
18 |         book = openpyxl.load_workbook(filename,
19 |                                       read_only=True, data_only=True)
20 |         sheet_name = book.sheetnames
21 |         book.close()
22 |     with multiprocessing.Pool() as pool:
23 |         # By default, Pool spawns as many processes as there are CPU cores.
24 |         # starmap maps a tuple of arguments to a function. The zip expression
25 |         # produces a list with tuples of the following form:
26 |         # [('filename.xlsx', 'Sheet1'), ('filename.xlsx', 'Sheet2)]
27 |         data = pool.starmap(_read_sheet, zip(repeat(filename), sheet_name))
28 |     return {i[0]: i[1] for i in data}
29 | 


--------------------------------------------------------------------------------
/parallel_xlrd.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | from itertools import repeat
 3 | 
 4 | import xlrd
 5 | import excel
 6 | 
 7 | 
 8 | def _read_sheet(filename, sheetname):
 9 |     # The leading underscore in the function name is used by convention
10 |     # to mark it as "private", i.e., it shouldn't be used directly outside
11 |     # of this module.
12 |     with xlrd.open_workbook(filename, on_demand=True) as book:
13 |         sheet = book.sheet_by_name(sheetname)
14 |         data = excel.read(sheet)
15 |     return sheet.name, data
16 | 
17 | 
18 | def open_workbook(filename, sheetnames=None):
19 |     if sheetnames is None:
20 |         with xlrd.open_workbook(filename, on_demand=True) as book:
21 |             sheetnames = book.sheet_names()
22 |     with multiprocessing.Pool() as pool:
23 |         # By default, Pool spawns as many processes as there are CPU cores.
24 |         # starmap maps a tuple of arguments to a function. The zip expression
25 |         # produces a list with tuples of the following form:
26 |         # [('filename.xlsx', 'Sheet1'), ('filename.xlsx', 'Sheet2)]
27 |         data = pool.starmap(_read_sheet, zip(repeat(filename), sheetnames))
28 |     return {i[0]: i[1] for i in data}
29 | 


--------------------------------------------------------------------------------
/pep8_sample.py:
--------------------------------------------------------------------------------
 1 | """This script shows a few PEP 8 rules.
 2 | """
 3 | 
 4 | import datetime as dt
 5 | 
 6 | 
 7 | TEMPERATURE_SCALES = ("fahrenheit", "kelvin",
 8 |                       "celsius")
 9 | 
10 | 
11 | class TemperatureConverter:
12 |     pass  # Doesn't do anything at the moment
13 | 
14 | 
15 | def convert_to_celsius(degrees, source="fahrenheit"):
16 |     """This function converts degrees Fahrenheit or Kelvin
17 |     into degrees Celsius.
18 |     """
19 |     if source.lower() == "fahrenheit":
20 |         return (degrees-32) * (5/9)
21 |     elif source.lower() == "kelvin":
22 |         return degrees - 273.15
23 |     else:
24 |         return f"Don't know how to convert from {source}"
25 | 
26 | 
27 | celsius = convert_to_celsius(44, source="fahrenheit")
28 | non_celsius_scales = TEMPERATURE_SCALES[:-1]
29 | 
30 | print("Current time: " + dt.datetime.now().isoformat())
31 | print(f"The temperature in Celsius is: {celsius}")
32 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # If you don't use Anaconda, you can use this file to install all dependencies.
 2 | # Run it like so from a Command Prompt or Terminal:
 3 | #
 4 | # pip install -r requirements.txt
 5 | 
 6 | flake8==3.8.4
 7 | lxml==4.6.2
 8 | matplotlib==3.3.2
 9 | notebook==6.1.5
10 | openpyxl==3.0.5
11 | pandas==1.1.3
12 | numpy==1.19.2
13 | pillow==8.0.1
14 | plotly==4.12.0
15 | python-dateutil==2.8.1
16 | requests==2.25.0
17 | sqlalchemy==1.3.20
18 | xlrd==1.2.0
19 | xlsxwriter==1.3.7
20 | xlutils==2.0.0
21 | xlwings==0.20.8
22 | xlwt==1.3.0
23 | pytrends==4.7.3
24 | pyxlsb==1.0.6
25 | 


--------------------------------------------------------------------------------
/sales_data/existing/April.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/April.xls


--------------------------------------------------------------------------------
/sales_data/existing/August.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/August.xls


--------------------------------------------------------------------------------
/sales_data/existing/December.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/December.xls


--------------------------------------------------------------------------------
/sales_data/existing/February.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/February.xls


--------------------------------------------------------------------------------
/sales_data/existing/January.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/January.xls


--------------------------------------------------------------------------------
/sales_data/existing/July.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/July.xls


--------------------------------------------------------------------------------
/sales_data/existing/June.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/June.xls


--------------------------------------------------------------------------------
/sales_data/existing/March.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/March.xls


--------------------------------------------------------------------------------
/sales_data/existing/May.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/May.xls


--------------------------------------------------------------------------------
/sales_data/existing/November.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/November.xls


--------------------------------------------------------------------------------
/sales_data/existing/October.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/October.xls


--------------------------------------------------------------------------------
/sales_data/existing/September.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/existing/September.xls


--------------------------------------------------------------------------------
/sales_data/new/April.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/April.xlsx


--------------------------------------------------------------------------------
/sales_data/new/August.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/August.xlsx


--------------------------------------------------------------------------------
/sales_data/new/December.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/December.xlsx


--------------------------------------------------------------------------------
/sales_data/new/February.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/February.xlsx


--------------------------------------------------------------------------------
/sales_data/new/January.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/January.xlsx


--------------------------------------------------------------------------------
/sales_data/new/July.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/July.xlsx


--------------------------------------------------------------------------------
/sales_data/new/June.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/June.xlsx


--------------------------------------------------------------------------------
/sales_data/new/March.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/March.xlsx


--------------------------------------------------------------------------------
/sales_data/new/May.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/May.xlsx


--------------------------------------------------------------------------------
/sales_data/new/November.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/November.xlsx


--------------------------------------------------------------------------------
/sales_data/new/October.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/October.xlsx


--------------------------------------------------------------------------------
/sales_data/new/September.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/sales_data/new/September.xlsx


--------------------------------------------------------------------------------
/sales_report_openpyxl.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pandas as pd
  4 | from openpyxl.styles import Font, Alignment
  5 | from openpyxl.formatting.rule import CellIsRule
  6 | from openpyxl.chart import BarChart, Reference
  7 | from openpyxl.chart.shapes import GraphicalProperties
  8 | from openpyxl.drawing.line import LineProperties
  9 | 
 10 | 
 11 | # Directory of this file
 12 | this_dir = Path(__file__).resolve().parent
 13 | 
 14 | # Read in all files
 15 | parts = []
 16 | for path in (this_dir / "sales_data").rglob("*.xls*"):
 17 |     print(f'Reading {path.name}')
 18 |     part = pd.read_excel(path)
 19 |     parts.append(part)
 20 | 
 21 | # Combine the DataFrames from each file into a single DataFrame
 22 | df = pd.concat(parts)
 23 | 
 24 | # Pivot each store into a column and sum up all transactions per date
 25 | pivot = pd.pivot_table(df,
 26 |                        index="transaction_date", columns="store",
 27 |                        values="amount", aggfunc="sum")
 28 | 
 29 | # Resample to end of month and assign an index name
 30 | summary = pivot.resample("M").sum()
 31 | summary.index.name = "Month"
 32 | 
 33 | # Sort columns by total revenue
 34 | summary = summary.loc[:, summary.sum().sort_values().index]
 35 | 
 36 | # Add row and column totals: Using "append" together with "rename"
 37 | # is a convenient way to add a row to the bottom of a DataFrame
 38 | summary.loc[:, "Total"] = summary.sum(axis=1)
 39 | summary = summary.append(summary.sum(axis=0).rename("Total"))
 40 | 
 41 | #### Write summary report to Excel file ####
 42 | 
 43 | # DataFrame position and number of rows/columns
 44 | # openpxyl uses 1-based indices
 45 | startrow, startcol = 3, 2
 46 | nrows, ncols = summary.shape
 47 | 
 48 | # Starting with pandas 1.3.0, the following line will raise a FutureWarning.
 49 | # To fix this, replace write_only=True with engine_kwargs={"write_only": True}
 50 | with pd.ExcelWriter(this_dir / "sales_report_openpyxl.xlsx",
 51 |                     engine="openpyxl", write_only=True) as writer:
 52 |     # pandas uses 0-based indices
 53 |     summary.to_excel(writer, sheet_name="Sheet1",
 54 |                      startrow=startrow - 1, startcol=startcol - 1)
 55 | 
 56 |     # Get openpyxl book and sheet object
 57 |     book = writer.book
 58 |     sheet = writer.sheets["Sheet1"]
 59 | 
 60 |     # Set title
 61 |     sheet.cell(row=1, column=startcol, value="Sales Report")
 62 |     sheet.cell(row=1, column=startcol).font = Font(size=24, bold=True)
 63 | 
 64 |     # Sheet formatting
 65 |     sheet.sheet_view.showGridLines = False
 66 | 
 67 |     # Format the DataFrame with
 68 |     # - number format
 69 |     # - column width
 70 |     # - conditional formatting
 71 |     for row in range(startrow + 1, startrow + nrows + 1):
 72 |         for col in range(startcol + 1, startcol + ncols + 1):
 73 |             cell = sheet.cell(row=row, column=col)
 74 |             cell.number_format = "#,##0"
 75 |             cell.alignment = Alignment(horizontal="center")
 76 | 
 77 |     for cell in sheet["B"]:
 78 |         cell.number_format = "mmm yy"
 79 | 
 80 |     for col in range(startcol, startcol + ncols + 1):
 81 |         cell = sheet.cell(row=startrow, column=col)
 82 |         sheet.column_dimensions[cell.column_letter].width = 14
 83 | 
 84 |     first_cell = sheet.cell(row=startrow + 1, column=startcol + 1)
 85 |     last_cell = sheet.cell(row=startrow + nrows, column=startcol + ncols)
 86 |     range_address = f"{first_cell.coordinate}:{last_cell.coordinate}"
 87 |     sheet.conditional_formatting.add(range_address,
 88 |                                      CellIsRule(operator="lessThan",
 89 |                                                 formula=["20000"],
 90 |                                                 stopIfTrue=True,
 91 |                                                 font=Font(color="E93423")))
 92 | 
 93 |     # Chart
 94 |     chart = BarChart()
 95 |     chart.type = "col"
 96 |     chart.title = "Sales per Month and Store"
 97 |     chart.height = 11.5
 98 |     chart.width = 20.5
 99 | 
100 |     # Add each column as a series, ignoring total row and col
101 |     data = Reference(sheet, min_col=startcol + 1, min_row=startrow,
102 |                      max_row=startrow + nrows - 1,
103 |                      max_col=startcol + ncols - 1)
104 |     categories = Reference(sheet, min_col=startcol, min_row=startrow + 1,
105 |                            max_row=startrow + nrows - 1)
106 |     chart.add_data(data, titles_from_data=True)
107 |     chart.set_categories(categories)
108 |     cell = sheet.cell(row=startrow + nrows + 2, column=startcol)
109 |     sheet.add_chart(chart=chart, anchor=cell.coordinate)
110 | 
111 |     # Chart formatting
112 |     chart.y_axis.title = "Sales"
113 |     chart.x_axis.title = summary.index.name
114 |     # Hide y-axis line: spPR stands for ShapeProperties 
115 |     chart.y_axis.spPr = GraphicalProperties(ln=LineProperties(noFill=True))
116 | 


--------------------------------------------------------------------------------
/sales_report_pandas.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pandas as pd
 4 | 
 5 | 
 6 | # Directory of this file
 7 | this_dir = Path(__file__).resolve().parent
 8 | 
 9 | # Read in all Excel files from all subfolders of sales_data
10 | parts = []
11 | for path in (this_dir / "sales_data").rglob("*.xls*"):
12 |     print(f'Reading {path.name}')
13 |     part = pd.read_excel(path, index_col="transaction_id")
14 |     parts.append(part)
15 | 
16 | # Combine the DataFrames from each file into a single DataFrame
17 | # pandas takes care of properly aligning the columns
18 | df = pd.concat(parts)
19 | 
20 | # Pivot each store into a column and sum up all transactions per date
21 | pivot = pd.pivot_table(df,
22 |                        index="transaction_date", columns="store",
23 |                        values="amount", aggfunc="sum")
24 | 
25 | # Resample to end of month and assign an index name
26 | summary = pivot.resample("M").sum()
27 | summary.index.name = "Month"
28 | 
29 | # Write summary report to Excel file
30 | summary.to_excel(this_dir / "sales_report_pandas.xlsx")
31 | 


--------------------------------------------------------------------------------
/sales_report_xlsxwriter.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pandas as pd
  4 | 
  5 | 
  6 | # Directory of this file
  7 | this_dir = Path(__file__).resolve().parent
  8 | 
  9 | # Read in all files
 10 | parts = []
 11 | for path in (this_dir / "sales_data").rglob("*.xls*"):
 12 |     print(f'Reading {path.name}')
 13 |     part = pd.read_excel(path)
 14 |     parts.append(part)
 15 | 
 16 | # Combine the DataFrames from each file into a single DataFrame
 17 | df = pd.concat(parts)
 18 | 
 19 | # Pivot each store into a column and sum up all transactions per date
 20 | pivot = pd.pivot_table(df,
 21 |                        index="transaction_date", columns="store",
 22 |                        values="amount", aggfunc="sum")
 23 | 
 24 | # Resample to end of month and assign an index name
 25 | summary = pivot.resample("M").sum()
 26 | summary.index.name = "Month"
 27 | 
 28 | # Sort columns by total revenue
 29 | summary = summary.loc[:, summary.sum().sort_values().index]
 30 | 
 31 | # Add row and column totals: Using "append" together with "rename"
 32 | # is a convenient way to add a row to the bottom of a DataFrame
 33 | summary.loc[:, "Total"] = summary.sum(axis=1)
 34 | summary = summary.append(summary.sum(axis=0).rename("Total"))
 35 | 
 36 | #### Write summary report to Excel file ####
 37 | 
 38 | # DataFrame position and number of rows/columns
 39 | # xlsxwriter uses 0-based indices
 40 | startrow, startcol = 2, 1
 41 | nrows, ncols = summary.shape
 42 | 
 43 | with pd.ExcelWriter(this_dir / "sales_report_xlsxwriter.xlsx",
 44 |                     engine="xlsxwriter", datetime_format="mmm yy") as writer:
 45 |     summary.to_excel(writer, sheet_name="Sheet1",
 46 |                      startrow=startrow, startcol=startcol)
 47 | 
 48 |     # Get xlsxwriter book and sheet object
 49 |     book = writer.book
 50 |     sheet = writer.sheets["Sheet1"]
 51 | 
 52 |     # Set title
 53 |     title_format = book.add_format({"bold": True, "size": 24})
 54 |     sheet.write(0, startcol, "Sales Report", title_format)
 55 | 
 56 |     # Sheet formatting
 57 |     # 2 = hide on screen and when printing
 58 |     sheet.hide_gridlines(2)
 59 | 
 60 |     # Format the DataFrame with
 61 |     # - number format
 62 |     # - column width
 63 |     # - conditional formatting
 64 |     number_format = book.add_format({"num_format": "#,##0",
 65 |                                      "align": "center"})
 66 |     below_target_format = book.add_format({"font_color": "#E93423"})
 67 |     sheet.set_column(first_col=startcol, last_col=startcol + ncols,
 68 |                      width=14, cell_format=number_format)
 69 |     sheet.conditional_format(first_row=startrow + 1,
 70 |                              first_col=startcol + 1,
 71 |                              last_row=startrow + nrows,
 72 |                              last_col=startcol + ncols,
 73 |                              options={"type": "cell", "criteria": "<=",
 74 |                                       "value": 20000,
 75 |                                       "format": below_target_format})
 76 | 
 77 |     # Chart
 78 |     chart = book.add_chart({"type": "column"})
 79 |     chart.set_title({"name": "Sales per Month and Store"})
 80 |     chart.set_size({"width": 830, "height": 450})
 81 | 
 82 |     # Add each column as a series, ignoring total row and col
 83 |     for col in range(1, ncols):
 84 |         chart.add_series({
 85 |             # [sheetname, first_row, first_col, last_row, last_col]
 86 |             "name": ["Sheet1", startrow, startcol + col],
 87 |             "categories": ["Sheet1", startrow + 1, startcol,
 88 |                            startrow + nrows - 1, startcol],
 89 |             "values": ["Sheet1", startrow + 1, startcol + col,
 90 |                        startrow + nrows - 1, startcol + col],
 91 |         })
 92 | 
 93 |     # Chart formatting
 94 |     chart.set_x_axis({"name": summary.index.name,
 95 |                       "major_tick_mark": "none"})
 96 |     chart.set_y_axis({"name": "Sales",
 97 |                       "line": {"none": True},
 98 |                       "major_gridlines": {"visible": True},
 99 |                       "major_tick_mark": "none"})
100 | 
101 |     # Add the chart to the sheet
102 |     sheet.insert_chart(startrow + nrows + 2, startcol, chart)
103 | 


--------------------------------------------------------------------------------
/sales_report_xlwings.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pandas as pd
 4 | import xlwings as xw
 5 | 
 6 | 
 7 | # Directory of this file
 8 | this_dir = Path(__file__).resolve().parent
 9 | 
10 | # Read in all files
11 | parts = []
12 | for path in (this_dir / "sales_data").rglob("*.xls*"):
13 |     print(f'Reading {path.name}')
14 |     part = pd.read_excel(path)
15 |     parts.append(part)
16 | 
17 | # Combine the DataFrames from each file into a single DataFrame
18 | df = pd.concat(parts)
19 | 
20 | # Pivot each store into a column and sum up all transactions per date
21 | pivot = pd.pivot_table(df,
22 |                        index="transaction_date", columns="store",
23 |                        values="amount", aggfunc="sum")
24 | 
25 | # Resample to end of month and assign an index name
26 | summary = pivot.resample("M").sum()
27 | summary.index.name = "Month"
28 | 
29 | # Sort columns by total revenue
30 | summary = summary.loc[:, summary.sum().sort_values().index]
31 | 
32 | # Add row and column totals: Using "append" together with "rename"
33 | # is a convenient way to add a row to the bottom of a DataFrame
34 | summary.loc[:, "Total"] = summary.sum(axis=1)
35 | summary = summary.append(summary.sum(axis=0).rename("Total"))
36 | 
37 | #### Write summary report to Excel file ####
38 | 
39 | # Open the template, paste the data, autofit the columns
40 | # and adjust the chart source. Then save it under a different name.
41 | template = xw.Book(this_dir / "xl" / "sales_report_template.xlsx")
42 | sheet = template.sheets["Sheet1"]
43 | sheet["B3"].value = summary
44 | sheet["B3"].expand().columns.autofit()
45 | sheet.charts["Chart 1"].set_source_data(sheet["B3"].expand()[:-1, :-1])
46 | template.save(this_dir / "sales_report_xlwings.xlsx")
47 | 


--------------------------------------------------------------------------------
/temperature.py:
--------------------------------------------------------------------------------
 1 | TEMPERATURE_SCALES = ("fahrenheit", "kelvin", "celsius")
 2 | 
 3 | 
 4 | def convert_to_celsius(degrees, source="fahrenheit"):
 5 |     if source.lower() == "fahrenheit":
 6 |         return (degrees-32) * (5/9)
 7 |     elif source.lower() == "kelvin":
 8 |         return degrees - 273.15
 9 |     else:
10 |         return f"Don't know how to convert from {source}"
11 | 
12 | 
13 | print("This is the temperature module.")
14 | 


--------------------------------------------------------------------------------
/udfs/describe/describe.py:
--------------------------------------------------------------------------------
1 | import xlwings as xw
2 | import pandas as pd
3 | 
4 | 
5 | @xw.func
6 | @xw.arg("df", pd.DataFrame, index=True, header=True)
7 | def describe(df):
8 |     return df.describe()
9 | 


--------------------------------------------------------------------------------
/udfs/describe/describe.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/describe/describe.xlsm


--------------------------------------------------------------------------------
/udfs/first_udf/first_udf.py:
--------------------------------------------------------------------------------
 1 | import xlwings as xw
 2 | 
 3 | 
 4 | def main():
 5 |     wb = xw.Book.caller()
 6 |     sheet = wb.sheets[0]
 7 |     if sheet["A1"].value == "Hello xlwings!":
 8 |         sheet["A1"].value = "Bye xlwings!"
 9 |     else:
10 |         sheet["A1"].value = "Hello xlwings!"
11 | 
12 | 
13 | @xw.func
14 | def hello(name):
15 |     return f"Hello {name}!"
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     xw.Book("first_udf.xlsm").set_mock_caller()
20 |     main()
21 | 


--------------------------------------------------------------------------------
/udfs/first_udf/first_udf.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/first_udf/first_udf.xlsm


--------------------------------------------------------------------------------
/udfs/google_trends/google_trends.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pytrends.request import TrendReq
 3 | import matplotlib.pyplot as plt
 4 | import xlwings as xw
 5 | 
 6 | 
 7 | @xw.func(call_in_wizard=False)
 8 | @xw.arg("mids", doc="Machine IDs: A range of max 5 cells")
 9 | @xw.arg("start_date", doc="A date-formatted cell")
10 | @xw.arg("end_date", doc="A date-formatted cell")
11 | def get_interest_over_time(mids, start_date, end_date):
12 |     """Query Google Trends - replaces the Machine ID (mid) of
13 |     common programming languages with their human-readable
14 |     equivalent in the return value, e.g., instead of "/m/05z1_"
15 |     it returns "Python".
16 |     """
17 |     # Check and transform parameters
18 |     assert len(mids) <= 5, "Too many mids (max: 5)"
19 |     start_date = start_date.date().isoformat()
20 |     end_date = end_date.date().isoformat()
21 | 
22 |     # Make the Google Trends request and return the DataFrame
23 |     trend = TrendReq(timeout=10)
24 |     trend.build_payload(kw_list=mids,
25 |                         timeframe=f"{start_date} {end_date}")
26 |     df = trend.interest_over_time()
27 | 
28 |     # Replace Google's "mid" with a human-readable word
29 |     mids = {"/m/05z1_": "Python", "/m/02p97": "JavaScript",
30 |             "/m/0jgqg": "C++", "/m/07sbkfb": "Java", "/m/060kv": "PHP"}
31 |     df = df.rename(columns=mids)
32 | 
33 |     # Drop the isPartial column
34 |     return df.drop(columns="isPartial")
35 | 
36 | 
37 | @xw.func
38 | @xw.arg("df", pd.DataFrame)
39 | def plot(df, name, caller):
40 |     plt.style.use("seaborn")
41 |     if not df.empty:
42 |         caller.sheet.pictures.add(df.plot().get_figure(),
43 |                                   top=caller.offset(row_offset=1).top,
44 |                                   left=caller.left,
45 |                                   name=name, update=True)
46 |     return f"<Plot: {name}>"
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     xw.serve()
51 | 


--------------------------------------------------------------------------------
/udfs/google_trends/google_trends.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/google_trends/google_trends.xlsm


--------------------------------------------------------------------------------
/udfs/google_trends_cache/google_trends_cache.py:
--------------------------------------------------------------------------------
 1 | from functools import lru_cache
 2 | 
 3 | import pandas as pd
 4 | from pytrends.request import TrendReq
 5 | import matplotlib.pyplot as plt
 6 | import xlwings as xw
 7 | 
 8 | 
 9 | @lru_cache()
10 | @xw.func(call_in_wizard=False)
11 | @xw.arg("mids", xw.Range, doc="Machine IDs: A range of max 5 cells")
12 | @xw.arg("start_date", doc="A date-formatted cell")
13 | @xw.arg("end_date", doc="A date-formatted cell")
14 | def get_interest_over_time(mids, start_date, end_date):
15 |     """Query Google Trends - replaces the Machine ID (mid) of
16 |     common programming languages with their human-readable
17 |     equivalent in the return value, e.g., instead of "/m/05z1_"
18 |     it returns "Python".
19 |     """
20 |     mids = mids.value
21 | 
22 |     # Check and transform parameters
23 |     assert len(mids) <= 5, "Too many mids (max: 5)"
24 |     start_date = start_date.date().isoformat()
25 |     end_date = end_date.date().isoformat()
26 | 
27 |     # Make the Google Trends request and return the DataFrame
28 |     trend = TrendReq(timeout=10)
29 |     trend.build_payload(kw_list=mids,
30 |                         timeframe=f"{start_date} {end_date}")
31 |     df = trend.interest_over_time()
32 | 
33 |     # Replace Google's "mid" with a human-readable word
34 |     mids = {"/m/05z1_": "Python", "/m/02p97": "JavaScript",
35 |             "/m/0jgqg": "C++", "/m/07sbkfb": "Java", "/m/060kv": "PHP"}
36 |     df = df.rename(columns=mids)
37 | 
38 |     # Drop the isPartial column
39 |     return df.drop(columns="isPartial")
40 | 
41 | 
42 | @xw.func
43 | @xw.arg("df", pd.DataFrame)
44 | def plot(df, name, caller):
45 |     plt.style.use("seaborn")
46 |     if not df.empty:
47 |         caller.sheet.pictures.add(df.plot().get_figure(),
48 |                                   top=caller.offset(row_offset=1).top,
49 |                                   left=caller.left,
50 |                                   name=name, update=True)
51 |     return f"<Plot: {name}>"
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     xw.serve()
56 | 


--------------------------------------------------------------------------------
/udfs/google_trends_cache/google_trends_cache.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/google_trends_cache/google_trends_cache.xlsm


--------------------------------------------------------------------------------
/udfs/importsub/importsub.py:
--------------------------------------------------------------------------------
 1 | import xlwings as xw
 2 | 
 3 | 
 4 | @xw.sub
 5 | def main():
 6 |     wb = xw.Book.caller()
 7 |     sheet = wb.sheets[0]
 8 |     if sheet["A1"].value == "Hello xlwings!":
 9 |         sheet["A1"].value = "Bye xlwings!"
10 |     else:
11 |         sheet["A1"].value = "Hello xlwings!"
12 | 
13 | 
14 | @xw.func
15 | def hello(name):
16 |     return f"Hello {name}!"
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     xw.Book("importsub.xlsm").set_mock_caller()
21 |     main()
22 | 


--------------------------------------------------------------------------------
/udfs/importsub/importsub.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/importsub/importsub.xlsm


--------------------------------------------------------------------------------
/udfs/raw_values/raw_values.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import xlwings as xw
 3 | 
 4 | 
 5 | @xw.func
 6 | @xw.ret("raw")
 7 | def randn(i=1000, j=1000):
 8 |     """Returns an array with dimensions (i, j) with normally distributed
 9 |     pseudorandom numbers provided by NumPy's random.randn
10 |     """
11 |     return np.random.randn(i, j)
12 | 


--------------------------------------------------------------------------------
/udfs/raw_values/raw_values.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/raw_values/raw_values.xlsm


--------------------------------------------------------------------------------
/udfs/revenues/revenues.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import xlwings as xw
 3 | 
 4 | 
 5 | @xw.func
 6 | def revenue(base_fee, users, price):
 7 |     return base_fee + users * price
 8 | 
 9 | 
10 | @xw.func
11 | @xw.arg("users", np.array, ndim=2)
12 | @xw.arg("price", np.array)
13 | def revenue2(base_fee, users, price):
14 |     return base_fee + users * price
15 | 


--------------------------------------------------------------------------------
/udfs/revenues/revenues.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/udfs/revenues/revenues.xlsm


--------------------------------------------------------------------------------
/xl/array_calculations.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/array_calculations.xlsx


--------------------------------------------------------------------------------
/xl/big.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/big.xlsx


--------------------------------------------------------------------------------
/xl/course_participants.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/course_participants.xlsx


--------------------------------------------------------------------------------
/xl/currency_converter.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/currency_converter.xlsx


--------------------------------------------------------------------------------
/xl/macro.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/macro.xlsm


--------------------------------------------------------------------------------
/xl/sales_report_template.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/sales_report_template.xlsx


--------------------------------------------------------------------------------
/xl/stores.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/stores.xls


--------------------------------------------------------------------------------
/xl/stores.xlsb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/stores.xlsb


--------------------------------------------------------------------------------
/xl/stores.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/stores.xlsx


--------------------------------------------------------------------------------
/xl/vba.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/vba.xlsm


--------------------------------------------------------------------------------
/xl/vbaProject.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fzumstein/python-for-excel/ffbb631e1a9e0dee9bc9b3098f1448db58736aec/xl/vbaProject.bin


--------------------------------------------------------------------------------