├── 05-12
└── guess-the-number.py
├── 07-17
└── character-count.py
├── 10-29
└── phone-and-email.py
├── 11-30
└── file-sizes.py
├── 12-35
└── box-print.py
├── 13-38
└── map-it.py
├── 13-40
└── get-amazon-price.py
├── 14-42
└── example.xlsx
├── 14-44
├── combinedminutes.pdf
├── meetingminutes1.pdf
└── meetingminutes2.pdf
├── 14-45
├── demo.docx
├── demo2.docx
└── get-text.py
└── README.md
/05-12/guess-the-number.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | print('Hello, what is your name?')
4 |
5 | name = str(input())
6 | answer = random.randint(1, 20)
7 |
8 | print('Well, ' + name + ', I am thinking of a number from 1 to 20.')
9 |
10 | guessesTaken = 0
11 | guessedCorrectAnswer = False
12 |
13 | while guessesTaken < 6:
14 | print('Take a guess.')
15 |
16 | guess = input()
17 | guessesTaken = guessesTaken + 1
18 |
19 | try:
20 | if int(guess) == answer:
21 | guessedCorrectAnswer = True
22 | print('Good job, ' + name +
23 | '! You guessed my number in ' + str(guessesTaken) +
24 | (' guess!' if guessesTaken == 1 else ' guesses!'))
25 | break
26 | elif int(guess) < answer:
27 | print('Your guess is too low.')
28 | else:
29 | print('Your guess is too high.')
30 | except ValueError:
31 | guessesTaken = guessesTaken - 1
32 | print('You must enter a number.')
33 |
34 | if not guessedCorrectAnswer:
35 | print('Nope. The number I was thinking of was ' + str(answer) + '.')
36 |
--------------------------------------------------------------------------------
/07-17/character-count.py:
--------------------------------------------------------------------------------
1 | import pprint
2 |
3 | message = 'All cows eat grass'
4 |
5 | count = {}
6 |
7 | for character in message:
8 | count.setdefault(character.lower(), 0)
9 | count[character.lower()] += 1
10 |
11 | pprint.pprint(count)
--------------------------------------------------------------------------------
/10-29/phone-and-email.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | import re
4 | import pyperclip
5 |
6 | # Create regex for phone numbers:
7 |
8 | phoneRegex = re.compile(r'''
9 | # Valid phone number formats:
10 | # 415-555-0000, 555-0000, (415) 555-0000, 555-0000 ext 12345, ext. 12345, x12345
11 |
12 | (
13 | ((\d\d\d) | (\(\d\d\d\)))? # Area code (optional)
14 | (\s|-) # First separator
15 | \d\d\d # First 3 digits
16 | - # Separator
17 | \d\d\d\d # Last 4 digits
18 | ((ext(\.)?\s|x) # Extension word (optional)
19 | (\d{2,5}))? # Extension number (optional)
20 | )
21 | ''', re.VERBOSE)
22 |
23 | # Create regex for email addresses:
24 |
25 | emailRegex = re.compile(r'''
26 | [a-zA-Z0-9_.+]+ # Name part
27 | @ # @ symbol
28 | [a-zA-Z0-9_.+]+ # Domain name part
29 | ''', re.VERBOSE)
30 |
31 | # Get text from clipboard:
32 |
33 | text = pyperclip.paste()
34 |
35 | # Extract phone numbers and email addresses from text:
36 |
37 | extractedPhone = phoneRegex.findall(text)
38 | extractedEmail = emailRegex.findall(text)
39 |
40 | allPhoneNumbers = []
41 | for phoneNumber in extractedPhone:
42 | allPhoneNumbers.append(phoneNumber[0])
43 |
44 | # Copy extracted phone numbers and email addresses to clipboard:
45 |
46 | results = '\n'.join(allPhoneNumbers) + '\n' + '\n'.join(extractedEmail)
47 | pyperclip.copy(results)
48 |
--------------------------------------------------------------------------------
/11-30/file-sizes.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | import os
4 |
5 | pathname = '/Users/bronson/Downloads'
6 | totalSize = 0
7 |
8 | for filename in os.listdir(pathname):
9 | # Skip to next filename if the current item is not a file:
10 | if not os.path.isfile(os.path.join(pathname, filename)):
11 | continue
12 | # Add to totalSize if the the current item is a file:
13 | totalSize += os.path.getsize(os.path.join(pathname, filename))
14 |
15 | print(totalSize)
16 |
--------------------------------------------------------------------------------
/12-35/box-print.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | """
4 |
5 | ***************
6 | * *
7 | * *
8 | * *
9 | ***************
10 |
11 | """
12 |
13 | import sys
14 |
15 | # Stop program if user enters fewer than 3 arguments:
16 | if len(sys.argv) < 4:
17 | raise Exception('Use the following syntax: ./box-print.py "*" 15 5')
18 |
19 |
20 | def boxPrint(symbol, width, height):
21 | if len(symbol) != 1:
22 | raise Exception('"symbol" must be a string of length 1.')
23 | if (int(width) < 2) or (int(height) < 2):
24 | raise Exception('"width" and "height" must be 2 or greater."')
25 |
26 | print(symbol * int(width))
27 |
28 | for i in range(int(height) - 2):
29 | print(symbol + (' ' * (int(width) - 2)) + symbol)
30 |
31 | print(symbol * int(width))
32 |
33 |
34 | boxPrint(sys.argv[1], sys.argv[2], sys.argv[3])
35 |
--------------------------------------------------------------------------------
/13-38/map-it.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | import webbrowser
4 | import sys
5 | import pyperclip
6 |
7 | # Check if command line arguments were passed:
8 | if len(sys.argv) > 1:
9 | # Concatenate arguments to form a valid street address, e.g.,
10 | # ['mapit.py', '123', 'Main', 'St.'] -> '123 Main St.'
11 | address = ' '.join(sys.argv[1:])
12 | else:
13 | # Get address from the user's clipboard if no arguments provided:
14 | address = pyperclip.paste()
15 |
16 | # https://www.google.com/maps/place/
17 | webbrowser.open('https://www.google.com/maps/place/' + address)
18 |
--------------------------------------------------------------------------------
/13-40/get-amazon-price.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | import sys
4 | import pyperclip
5 | import bs4
6 | import requests
7 |
8 |
9 | def getAmazonPrice(productUrl):
10 | res = requests.get(productUrl)
11 | res.raise_for_status()
12 |
13 | soup = bs4.BeautifulSoup(res.text, 'html.parser')
14 | # Only works on products with a '#newOfferAccordionRow' element:
15 | elements = soup.select(
16 | """#newOfferAccordionRow > div > div.a-accordion-row-a11y > a > h5 >
17 | div > div.a-column.a-span4.a-text-right.a-span-last >
18 | span.a-size-medium.a-color-price.header-price"""
19 | )
20 | return elements[0].text.strip()
21 |
22 |
23 | if len(sys.argv) > 1:
24 | # Set product URL if included as an argument:
25 | productUrl = sys.argv[1]
26 | else:
27 | # Get product URL from the user's clipboard if no argument provided:
28 | productUrl = pyperclip.paste()
29 |
30 |
31 | price = getAmazonPrice(productUrl)
32 |
33 | print('The price is ' + price)
34 |
--------------------------------------------------------------------------------
/14-42/example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-42/example.xlsx
--------------------------------------------------------------------------------
/14-44/combinedminutes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-44/combinedminutes.pdf
--------------------------------------------------------------------------------
/14-44/meetingminutes1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-44/meetingminutes1.pdf
--------------------------------------------------------------------------------
/14-44/meetingminutes2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-44/meetingminutes2.pdf
--------------------------------------------------------------------------------
/14-45/demo.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-45/demo.docx
--------------------------------------------------------------------------------
/14-45/demo2.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-45/demo2.docx
--------------------------------------------------------------------------------
/14-45/get-text.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | import docx
4 |
5 | # Returns a single string value of all text in a Word document:
6 | def getText(filename):
7 | documentObject = docx.Document(filename)
8 | fullText = []
9 | for paragraph in documentObject.paragraphs:
10 | fullText.append(paragraph.text)
11 | return '\n'.join(fullText)
12 |
13 |
14 | print(getText(
15 | '/Users/bronson/Udemy/automate-the-boring-stuff-with-python/14-45/demo.docx'
16 | ))
17 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Notes on "Automate the Boring Stuff with Python Programming"
2 |
3 | ## About
4 |
5 | This repository is derived from the lectures covered in [Automate the Boring Stuff with Python Programming](https://www.udemy.com/automate/) by Al Sweigart. This repository is intended to serve as a personal quick reference guide and not a full-fledged tutorial. For more in-depth coverage, please consult the cited Udemy course, or review the free e-book available [here](http://automatetheboringstuff.com/).
6 |
7 |
8 |
9 | ## Table of Contents
10 |
11 | - Section 1: [Python Basics](#id-section1)
12 | - Section 2: [Flow Control](#id-section2)
13 | - Section 3: [Functions](#id-section3)
14 | - Section 4: [Handling Errors with Try/Except](#id-section4)
15 | - Section 6: [Lists](#id-section6)
16 | - Section 7: [Dictionaries](#id-section7)
17 | - Section 8: [More About Strings](#id-section8)
18 | - Section 9: [Running Programs from the Command Line](#id-section9)
19 | - Section 10: [Regular Expressions](#id-section10)
20 | - Section 11: [Files](#id-section11)
21 | - Section 12: [Debugging](#id-section12)
22 | - Section 13: [Web Scraping](#id-section13)
23 | - Section 14: [Excel, Word, and PDF Documents](#id-section14)
24 | - Section 15: [Email](#id-section15)
25 | - Section 16: [GUI Automation](#id-section16)
26 |
27 |
28 |
29 | ## Section 1: Python Basics
30 |
31 | ### 1.2 - Basic Terminology and Using IDLE
32 |
33 | #### Expressions
34 |
35 | - Expressions consist of values and operators that reduce down to a single value (including combinations of numbers and strings):
36 |
37 | ```python
38 | 2 + 2 # 4
39 |
40 | 'Alice' + 'Bob' # 'AliceBob'
41 |
42 | 'Hello' + '!' * 10 # 'Hello!!!!!!!!!!'
43 | ```
44 |
45 | #### Variables
46 |
47 | - Declaring a variable:
48 |
49 | ```python
50 | spam = 'Hello'
51 |
52 | spam + ' World' # Hello World
53 | ```
54 |
55 | ### 1.3 - Writing Our First Program
56 |
57 | - Create a file named `file.py` containing the following code:
58 |
59 | ```python
60 | # This program says hello and asks for your name:
61 |
62 | print('What is your name?')
63 | myName = input()
64 | print('Nice to meet you, ' + myName)
65 | print('The length of your name is:')
66 | print(len(myName))
67 | print('What is your age?')
68 | myAge = input()
69 | print('You will be ' + str(int(myAge) + 1) + ' in a year.')
70 | ```
71 |
72 | - `print()` displays the contents (arguments) within its parentheses on the screen.
73 |
74 | - `input()` accepts the value of the user's keyboard input and returns a **string** value.
75 |
76 | - **NOTE:** The program will wait until the input is entered before continuing to execute the remaining code.
77 |
78 | - `len()` takes a string argument and evaluates to the integer value of the string's length:
79 |
80 | ```python
81 | len('Al') # 2
82 | ```
83 |
84 | - `str()` takes an argument and converts it into a string data type:
85 |
86 | ```python
87 | str(42) # '42'
88 | ```
89 |
90 | - `int()` takes an argument and converts it into an integer data type:
91 |
92 | ```python
93 | int('42') # 42
94 | ```
95 |
96 | - **NOTE:** If you want to convert to a **floating point** number (i.e., a number with a decimal point) rather than an integer (i.e., a whole number), use `float()`:
97 |
98 | ```python
99 | float('3.14') # 3.14
100 | ```
101 |
102 | - **NOTE:** On OS X, you may need to run `python3` rather than `python` to run the current version of Python.
103 |
104 | [Back to TOC](#id-toc)
105 |
106 |
107 |
108 | ## Section 2: Flow Control
109 |
110 | ### 2.4 - Flow Charts and Basic Flow Control Concepts
111 |
112 | #### Booleans
113 |
114 | - Booleans have two values: `True` and `False` (which **must** be capitalized).
115 |
116 | #### Comparison Operators
117 |
118 | - Overview:
119 |
120 | | Operator | Meaning |
121 | | :------: | ------------------------ |
122 | | == | Equal to |
123 | | != | Not equal to |
124 | | < | Less than |
125 | | > | Greater than |
126 | | <= | Less than or equal to |
127 | | >= | Greater than or equal to |
128 |
129 | - Expressions with comparison operators evaluate to a Boolean value:
130 |
131 | ```python
132 | 42 == 42 # True
133 |
134 | 42 >= 100 # False
135 |
136 | # Integers and strings will never be equal to each other:
137 |
138 | 42 == '42' # False
139 |
140 | # However, floats and integers can be equal to each other:
141 |
142 | 42.0 == 42 # True
143 | ```
144 |
145 | #### Boolean Operators
146 |
147 | - Overview:
148 |
149 | ```python
150 | # The "and" operator returns true when all values are true:
151 |
152 | True and True # True
153 |
154 | True and False # False
155 |
156 | # The "or" operator returns true when at least one value is true:
157 |
158 | True or False # True
159 |
160 | # The "not" operator evaluates to the opposite Boolean value:
161 |
162 | not True # False
163 | ```
164 |
165 | - Example:
166 |
167 | ```python
168 | myAge = 26
169 |
170 | myPet = 'cat'
171 |
172 | myAge > 20 and myPet == 'cat' # True
173 | ```
174 |
175 | ### 2.5 - If, Else, and Elif Statements
176 |
177 | - Example:
178 |
179 | ```python
180 | # If the condition after the "if" statement is true, then the indented line
181 | # below the conditional statement will run, and the "else" block is skipped:
182 |
183 | if answer < 42:
184 | print('Too low')
185 |
186 | # If the preceding "if" statement (or "elif" statement) is false, then the
187 | # subsequent "elif" statement will be evaluated:
188 |
189 | elif answer > 42:
190 | print('Too high')
191 |
192 | # If all prior conditional statements are false, the "else" block will run:
193 |
194 | else:
195 | print('Correct')
196 | ```
197 |
198 | - **NOTE:** New "blocks" are designated by increasing indentation and begin only after statements that end with a colon (`:`).
199 |
200 | - Python allows for "truthy" and "falsey" evaluations:
201 |
202 | ```python
203 | print('Enter a name.')
204 |
205 | name = input()
206 |
207 | if name:
208 | print('Thank you for entering a name.')
209 | else:
210 | print('You did not enter a name.')
211 | ```
212 |
213 | - **TIP:** If you want to evaluate the truthiness of a value, execute the `bool()` function with the value passed in as an argument:
214 |
215 | ```python
216 | bool(42) # True
217 |
218 | bool(0) # False
219 |
220 | bool('Hello') # True
221 |
222 | bool('') # False
223 | ```
224 |
225 | ### 2.6 - While Loops
226 |
227 | - Examples:
228 |
229 | ```python
230 | # Prints "Hello, world." to the console five times:
231 |
232 | spam = 0
233 |
234 | while spam < 5:
235 | print('Hello, world.')
236 | spam = spam + 1
237 |
238 | # Requests input until user enters required string:
239 |
240 | name = ''
241 |
242 | while name != 'your name':
243 | print('Please type your name.')
244 | name = input()
245 |
246 | print('Thank you.')
247 | ```
248 |
249 | - The `break` statement is used to break out of a loop (including an infinite loop):
250 |
251 | ```python
252 | name = ''
253 |
254 | while True:
255 | print('Please type your name.')
256 | name = input()
257 | if name == 'your name':
258 | break
259 |
260 | print('Thank you.')
261 | ```
262 |
263 | - The `continue` statement is used to return to the start of the loop and reevaluate the loop's condition:
264 |
265 | ```python
266 | # Prints 1, 2, 4, and 5. Number 3 is not printed due to "continue":
267 |
268 | spam = 0
269 |
270 | while spam < 5:
271 | spam = spam + 1
272 | if spam == 3:
273 | continue
274 | print(spam)
275 | ```
276 |
277 | ### 2.7 - For Loops
278 |
279 | - Example:
280 |
281 | ```python
282 | # The variable "i" is set to 0 on the first iteration, and its value is
283 | # printed to the console on each iteration. The value of "i" increases
284 | # by 1 on each iteration up to (but not including) 5. The iteration
285 | # process terminates once the value of "i" is set to 5:
286 |
287 | for i in range(5):
288 | print(i)
289 | ```
290 |
291 | - **NOTE:** If `range()` is given only **one** argument, then Python will generate a sequence of numbers starting at 0 (as a range object data type, which is a list-like value known as a "sequence"), and the stopping point will be the value of the argument (which must be an integer). However, `range()` can accept up to three arguments (all of which must be integers):
292 |
293 | ```python
294 | # range([start], stop[, step])
295 | ```
296 |
297 | - `start`: Starting number of the sequence.
298 | - `stop`: Generate numbers up to (but not including) this number.
299 | - `step`: Difference between each number in the sequence.
300 |
301 | - For loops are able to use `break` and `continue` statements in the same manner as while loops.
302 |
303 | [Back to TOC](#id-toc)
304 |
305 |
306 |
307 | ## Section 3: Functions
308 |
309 | ### 3.8 - Python's Built-In Functions
310 |
311 | #### Standard Library
312 |
313 | - Python comes with a set of modules called the **[Standard Library](https://docs.python.org/3/library/)**. Each module is a Python program that contains a related group of functions you can use in your programs (e.g., numeric and mathematical modules). Before you can use the functions in a module, you must **import** the module with an `import` statement:
314 |
315 | ```python
316 | # Returns a random integer from 1 to 10:
317 |
318 | import random
319 |
320 | random.randint(1, 10)
321 | ```
322 |
323 | - In the example above, `randint()` is a function within the `random` module. You specify which function you want to use in a module by using dot notation.
324 |
325 | - You can specify **multiple** modules for import by separating their names with **commas**:
326 |
327 | ```python
328 | import random, sys, os, math
329 | ```
330 |
331 | - It is generally considered best to use the syntax outlined above when using a function in a Standard Library module. However, if you want to import and call a function **directly** without needing to reference the module name each time, use the `from` form of an import statement:
332 |
333 | ```python
334 | # Imports all functions from the "random" module, not the module itself:
335 |
336 | from random import *
337 |
338 | randint(1, 10)
339 | ```
340 |
341 | - **TIP:** To terminate a program early, use the `exit()` function of the `sys` module:
342 |
343 | ```python
344 | # Terminates after printing "Hello":
345 |
346 | import sys
347 |
348 | print('Hello')
349 | sys.exit()
350 | print('Goodbye')
351 | ```
352 |
353 | #### Third-Party Modules
354 |
355 | - Modules can be installed by using the `pip` (or `pip3`) tool from the terminal:
356 |
357 | ```
358 | $ pip install ${MODULE_NAME}
359 | ```
360 |
361 | - **NOTE:** See [here](http://automatetheboringstuff.com/appendixa/) for more information on installing third-party modules.
362 |
363 | - One noteworthy module is **[pyperclip](https://pypi.org/project/pyperclip/)** which allows you to copy and paste text to and from the clipboard:
364 |
365 | ```python
366 | import pyperclip
367 |
368 | pyperclip.copy('The text to be copied to the clipboard.')
369 | pyperclip.paste() # 'The text to be copied to the clipboard.'
370 | ```
371 |
372 | ### 3.9 - Writing Your Own Functions
373 |
374 | - Define a function by using the `def` keyword:
375 |
376 | ```python
377 | # Define a function called "hello()" that accepts a "name" parameter:
378 |
379 | def hello(name):
380 | print('Hello, ' + name)
381 |
382 | hello('Alice') # "Hello, Alice"
383 | ```
384 |
385 | - All function calls return a value. You can specify what value should be returned by the function by using a `return` statement:
386 |
387 | ```python
388 | def plusOne(number):
389 | return number + 1
390 |
391 | newNumber = plusOne(5)
392 |
393 | print(newNumber) # 6
394 | ```
395 |
396 | - **NOTE:** If the value returned is considered "empty" (or if the return statement is omitted entirely), Python still returns a value called `None` (i.e., a value that represents a lack of a value). The `None` value will not be visibly displayed in the console.
397 |
398 | - Some functions accept **keyword arguments**, which are used as optional arguments to pass to a function call. For example, the `print()` function adds a newline character by default to the end of the string it prints. However, this behavior can be modified by changing the value of the `end` keyword argument:
399 |
400 | ```python
401 | # Prints "Hello" and "World" on two separate lines:
402 |
403 | print('Hello')
404 | print('World')
405 |
406 | # Prints "Hello World" on one line:
407 |
408 | print('Hello', end=' ')
409 | print('World')
410 | ```
411 |
412 | - **NOTE:** The `print()` function also contains a `sep` keyword argument that specifies what character should be used to separate multiple arguments (an empty space by default):
413 |
414 | ```python
415 | # Prints 'cat dog mouse':
416 |
417 | print('cat', 'dog', 'mouse')
418 |
419 | # Prints 'cat, dog, mouse':
420 |
421 | print('cat', 'dog', 'mouse', sep=', ')
422 | ```
423 |
424 | ### 3.10 - Global and Local Scopes
425 |
426 | - Variables inside of a function can have the same name as variables outside of the function, but they are considered two separate variables due to scope. Variables defined in a function belong to that function's **local scope**, whereas all variables defined outside of functions belong to the application's **global scope**:
427 |
428 | ```python
429 | spam = 42 # Global variable
430 |
431 | def eggs():
432 | spam = 42 # Local variable
433 | ```
434 |
435 | - Key Points:
436 |
437 | **1**\. Code in the global scope cannot use any local variables.
438 |
439 | **2**\. Code in a local scope can access global variables.
440 |
441 | **3**\. Code in one function's local scope cannot use variables in another local scope.
442 |
443 | **4**\. You can use the same name for different variables if they are in different scopes.
444 |
445 | - If you want to reassign the value of a global variable (e.g. `eggs = 42`) from within a local scope, you cannot simply say `eggs = 'Hello'`, as this will merely create a local variable named "eggs" within the local scope. Rather, you must use a `global` statement:
446 |
447 | ```python
448 | eggs = 42
449 |
450 | def spam():
451 | global eggs
452 | eggs = 'Hello' # Overwrites 42 in global "eggs" variable
453 | print(eggs) # Prints 'Hello'
454 |
455 | spam()
456 |
457 | print(eggs) # Prints 'Hello'
458 | ```
459 |
460 | [Back to TOC](#id-toc)
461 |
462 |
463 |
464 | ## Section 4: Handling Errors with Try/Except
465 |
466 | ### 4.11 - Try and Except Statements
467 |
468 | - By default, a Python application will crash if an error occurs while executing code:
469 |
470 | ```python
471 | def div42by(divideBy):
472 | return 42 / divideBy
473 |
474 | print(div42by(2)) # 21.0
475 | print(div42by(0)) # (Will crash the application)
476 | print(div42by(21)) # (Will not be printed)
477 | ```
478 |
479 | - In order to detect and handle errors while still allowing the program to run, you must use `try`/`except` statements:
480 |
481 | ```python
482 | def div42by(divideBy):
483 | try:
484 | return 42 / divideBy
485 | except ZeroDivisionError:
486 | print('Error: You tried to divide by zero.')
487 |
488 | print(div42by(2)) # 21.0
489 | print(div42by(0)) # 'Error: You tried to divide by zero.'
490 | print(div42by(21)) # 2.0
491 | ```
492 |
493 | - **NOTE:** `ZeroDivisionError` is one of Python's [Built-in Exceptions](https://docs.python.org/2/library/exceptions.html). You can omit the exception type if you want Python to handle all errors via the code in the `except` block.
494 |
495 | [Back to TOC](#id-toc)
496 |
497 |
498 |
499 | ## Section 6: Lists
500 |
501 | ### 6.13 - The List Data Type
502 |
503 | - A **list** is a value containing sequential, comma-delimited items within square brackets. To access items in a list, use an integer index for the item's position in the list (starting with 0):
504 |
505 | ```python
506 | spam = [['cat', 'bat', 'rat'], 'elephant']
507 |
508 | spam[0] # ['cat', 'bat', 'rat']
509 |
510 | spam[1] # 'elephant'
511 |
512 | spam[0][1] # 'bat'
513 |
514 | # You can also access items in reverse order by using a negative integer,
515 | # with -1 starting as the last item in the list:
516 |
517 | spam[0][-1] # 'rat'
518 |
519 | # An item's value can be reassigned by accessing the index:
520 |
521 | spam[0] = 'mouse'
522 |
523 | spam # ['mouse', 'elephant']
524 | ```
525 |
526 | - To view the **length** of a list, use the `len()` function:
527 |
528 | ```python
529 | spam = ['cat', 'bat', 'rat']
530 |
531 | len(spam) # 3
532 | ```
533 |
534 | - A **slice** can access (not mutate) multiple items in a list by specifying the index at which the slice begins and the index at which the slice ends (non-inclusive):
535 |
536 | ```python
537 | spam = ['cat', 'bat', 'rat']
538 |
539 | spam[0:2] # ['cat', 'bat']
540 |
541 | # You can redefine multiple items in a list by using a slice:
542 |
543 | spam[1:3] = ['dog', 'fish']
544 |
545 | spam # ['cat', 'dog', 'fish']
546 | ```
547 |
548 | - **TIP:** You can omit either number on each side of the colon. If you omit the number to the left of the colon, the slice will start at index 0 and end at the number to the right. If you omit the number to the right, the slice will start from the number on the left and end at the number that is the length of the list (allowing the slice to include the last item in the list):
549 |
550 | ```python
551 | spam = ['cat', 'bat', 'rat']
552 |
553 | spam[:2] # ['cat', 'bat']
554 | ```
555 |
556 | - To **delete** items from a list, use the `del` statement:
557 |
558 | ```python
559 | spam = ['cat', 'bat', 'elephant', 'rat']
560 |
561 | del spam[2]
562 |
563 | spam # ['cat', 'bat', 'rat']
564 | ```
565 |
566 | - To **concatenate** lists, use the `+` or `*` operators:
567 |
568 | ```python
569 | [1, 2, 3] + [4, 5, 6] # [1, 2, 3, 4, 5, 6]
570 |
571 | [1, 2, 3] * 3 # [1, 2, 3, 1, 2, 3, 1, 2, 3]
572 | ```
573 |
574 | - To **convert** another iterable data type (e.g., a string) into a list, use the `list()` function:
575 |
576 | ```python
577 | list('hello') # ['h', 'e', 'l', 'l', 'o']
578 | ```
579 |
580 | - To determine whether an item is **contained** in a list, you can use the `in` and `not in` operators:
581 |
582 | ```python
583 | 'elephant' in ['cat', 'bat', 'rat'] # False
584 |
585 | 'elephant' not in ['cat', 'bat', 'rat'] # True
586 | ```
587 |
588 | ### 6.14 - For Loops with Lists, Multiple Assignment, and Augmented Operators
589 |
590 | - A list can be iterated over in a for loop in the same manner as a `range` object:
591 |
592 | ```python
593 | # Both loops produce the same output:
594 |
595 | for i in range(4):
596 | print(i)
597 |
598 | for i in [0, 1, 2, 3]:
599 | print(i)
600 | ```
601 |
602 | - **TIP:** The ability to transform a `range` object into a list allows for you to take advantage of the `range()` function's step parameter:
603 |
604 | ```python
605 | # Prints all even numbers from 0 to 98:
606 |
607 | list(range(0, 100, 2))
608 | ```
609 |
610 | - **TIP:** You can access both the index and value of each item while iterating over a list by using the `range()` and `len()` functions:
611 |
612 | ```python
613 | supplies = ['pens', 'staplers', 'binders']
614 |
615 | for i in range(len(supplies)):
616 | print ('Index ' + str(i) + ' in supplies is: ' + supplies[i])
617 |
618 | # Index 0 in supplies is: pens
619 | # Index 1 in supplies is: staplers
620 | # Index 2 in supplies is: binders
621 | ```
622 |
623 | - You can use Python's **multiple assignments** feature to iterate over a list and assign each item's value to a variable:
624 |
625 | ```python
626 | cat = ['fat', 'orange', 'loud']
627 |
628 | size, color, disposition = cat
629 |
630 | size # 'fat'
631 | color # 'orange'
632 | disposition # 'loud'
633 | ```
634 |
635 | - **NOTE:** Multiple assignments work not only with lists, but also comma-delimited values outside of a list:
636 |
637 | ```python
638 | size, color, disposition = 'slim', 'gray', 'quiet'
639 | ```
640 |
641 | - **TIP:** Multiple assignments are also useful for quick variable swapping:
642 |
643 | ```python
644 | a = 'AAA'
645 | b = 'BBB'
646 |
647 | a, b = b, a
648 |
649 | a # 'BBB'
650 | b # 'AAA'
651 | ```
652 |
653 | - **Augmented Operators** (`+=`, `-=`, `*=`, `/=`, `%=`):
654 |
655 | ```python
656 | spam = 42
657 |
658 | spam = spam + 1
659 |
660 | spam += 1
661 |
662 | spam # 44
663 | ```
664 |
665 | ### 6.15 - List Methods
666 |
667 | - The `index()` method returns the index of the first occurrence of the specified value:
668 |
669 | ```python
670 | spam = ['hello', 'hi', 'howdy', 'hi']
671 |
672 | spam.index('hi') # 1
673 |
674 | spam.index('hey') # (Raises an exception if value not found)
675 | ```
676 |
677 | - The `append()` method appends an item to the end of the list:
678 |
679 | ```python
680 | spam = ['cat', 'dog', 'bat']
681 |
682 | spam.append('moose')
683 |
684 | spam[3] # 'moose'
685 | ```
686 |
687 | - The `insert()` method inserts the specified value at the specified position:
688 |
689 | ```python
690 | spam = ['cat', 'dog', 'bat']
691 |
692 | spam.insert(1, 'chicken')
693 |
694 | spam # ['cat', 'chicken', 'dog', 'bat']
695 | ```
696 |
697 | - The `remove()` method removes the first occurrence of the item with the specified value:
698 |
699 | ```python
700 | spam = ['cat', 'bat', 'elephant', 'rat']
701 |
702 | spam.remove('elephant')
703 |
704 | spam # ['cat', 'bat', 'rat']
705 |
706 | spam.remove('oat') # (Throws an error)
707 | ```
708 |
709 | - The `sort()` method sorts a list in ascending order by default. The sorting direction can be reversed by using the `reverse` keyword argument:
710 |
711 | ```python
712 | spam = [2, 5, 3.14, 1, -7]
713 |
714 | spam.sort()
715 |
716 | spam # [-7, 1, 2, 3.14, 5]
717 |
718 | spam = ['ants', 'cats', 'badgers']
719 |
720 | spam.sort()
721 |
722 | spam # ['ants', 'badgers', 'cats']
723 |
724 | spam.sort(reverse=True)
725 |
726 | spam # ['cats', 'badgers', 'ants']
727 | ```
728 |
729 | - **NOTE:** You cannot sort an array that contains both number and string types.
730 |
731 | - **ALSO:** When working with strings, `sort()` actually sorts by "**ASCII-betical**" order rather than alphabetical order (resulting in uppercase letters being sorted before lowercase letters, because uppercase letters appears before lowercase letters in ASCII code). However, you can sort by true alphabetical order by using the `key` keyword argument:
732 |
733 | ```python
734 | spam = ['a', 'z', 'A', 'Z']
735 |
736 | spam.sort()
737 |
738 | spam # ['A', 'Z', 'a', 'z']
739 |
740 | # str.lower is a string method that converts a string input to lowercase:
741 |
742 | spam.sort(key=str.lower)
743 |
744 | spam # ['A', 'a', 'Z', 'z']
745 | ```
746 |
747 | ### 6.16 - Similarities Between Lists and Strings
748 |
749 | - A string is essentially a list of single character strings (which is why `list()` can accept a string as an argument). However, they are significantly different in the sense that a list is a **mutable** data type (i.e., it can have values added, moved, or changed), whereas a string is an **immutable** data type (i.e., its value cannot be changed). Because strings are immutable, the proper way to create a new string derived from an existing variable is by using **slices**:
750 |
751 | ```python
752 | name = 'Zophie a cat'
753 |
754 | newName = name[0:7] + 'the' + name[8:12]
755 |
756 | newName # 'Zophie the cat'
757 | ```
758 |
759 | - When a list is assigned to a variable, Python actually stores a **reference** to the list in memory, not the actual list itself. Thus, if a list is referenced in two separate variables, a modification to one variable will affect the value stored in the other variable as well:
760 |
761 | ```python
762 | spam = [0, 1, 2, 3, 4, 5]
763 |
764 | cheese = spam
765 |
766 | cheese[1] = 'Hello'
767 |
768 | cheese # [0, 'Hello', 2, 3, 4, 5]
769 |
770 | spam # [0, 'Hello', 2, 3, 4, 5]
771 | ```
772 |
773 | - If you want to make a true copy of a list (rather than having two or more variables point to the same list by reference), use the `copy` module's `deepcopy()` function:
774 |
775 | ```python
776 | import copy
777 |
778 | spam = ['A', 'B', 'C', 'D']
779 |
780 | # Creates a list with items identical to (but separate from) those in "spam":
781 |
782 | cheese = copy.deepcopy(spam)
783 | ```
784 |
785 | - When working within lists, Python is aware that instances of **line continuation** should not be considered a new block:
786 |
787 | ```python
788 | spam = ['apples',
789 | 'oranges',
790 | 'bananas']
791 | ```
792 |
793 | - **TIP:** You can take advantage of line continuation even without a list by using the line continuation character (`\`):
794 |
795 | ```python
796 | print('Four score and seven ' + \
797 | 'years ago...')
798 |
799 | # 'Four score and seven years ago...'
800 | ```
801 |
802 | [Back to TOC](#id-toc)
803 |
804 |
805 |
806 | ## Section 7: Dictionaries
807 |
808 | ### 7.17 - The Dictionary Data Type
809 |
810 | - A dictionary is a mutable collection of key-value pairs:
811 |
812 | ```python
813 | myCat = {'size': 'large', 'color': 'gray', 'disposition': 'loud'}
814 |
815 | myCat['size'] # 'large'
816 |
817 | myCat['age'] # (Results in a KeyError message)
818 |
819 | # Check if a key exists with the "in" and "not in" operators:
820 |
821 | 'name' in myCat # False
822 |
823 | 'name' not in myCat # True
824 | ```
825 |
826 | - Two dictionaries with identical key-value pairs will be considered equivalent regardless of the order in which those key-value pairs are arranged:
827 |
828 | ```python
829 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8}
830 | ham = {'species': 'cat', 'age': 8, 'name': 'Zophie'}
831 |
832 | eggs == ham # True
833 | ```
834 |
835 | - Three major dictionary iteration methods (`keys()`, `values()`, `items()`):
836 |
837 | ```python
838 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8}
839 |
840 | # Each method returns a list-like data type, so you must convert each result
841 | # with the "list()" function if you want to receive a true list value:
842 |
843 | list(eggs.keys()) # ['name', 'species', 'age']
844 |
845 | list(eggs.values()) # ['Zophie', 'cat', 8]
846 |
847 | # Tuples are the same as lists, expect they use parentheses (not brackets):
848 |
849 | list(eggs.items()) # [('name, 'Zophie'), ('species', 'cat'), ('age', 8)]
850 | ```
851 |
852 | - You can iterate over a dictionary's keys/values with a for loop:
853 |
854 | ```python
855 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8}
856 |
857 | # Prints 'name', 'species', and 'age':
858 |
859 | for k in eggs.keys():
860 | print(k)
861 |
862 | # Prints 'name: Zophie', 'species: cat', and 'age: 8'
863 |
864 | for k, v in eggs.items():
865 | print(k + ': ' + str(v))
866 | ```
867 |
868 | - If you attempt to retrieve a value from a key that does not exist in a dictionary, you will normally receive an error. However, you can avoid such problems by using the `get()` method to specify a default value if the key does not exist:
869 |
870 | ```python
871 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8}
872 |
873 | eggs.get('age', 0) # 8
874 |
875 | eggs.get('color', '') # ''
876 | ```
877 |
878 | - If you want to set a value for a key that does not yet exist in a dictionary, use the `setdefault()` method:
879 |
880 | ```python
881 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8}
882 |
883 | eggs.setdefault('color', 'black') # 'black'
884 |
885 | # No change if the key already exists:
886 |
887 | eggs.setdefault('color', 'orange') # 'black'
888 | ```
889 |
890 | - To obtain a **pretty print** of a dictionary (or list), use the `pprint` module:
891 |
892 | ```python
893 | import pprint
894 |
895 | message = 'All cows eat grass'
896 |
897 | count = {}
898 |
899 | for character in message:
900 | count.setdefault(character.lower(), 0)
901 | count[character.lower()] += 1
902 |
903 | pprint.pprint(count)
904 |
905 | # {' ': 3,
906 | # 'a': 3,
907 | # 'c': 1,
908 | # 'e': 1,
909 | # 'g': 1,
910 | # 'l': 2,
911 | # 'o': 1,
912 | # 'r': 1,
913 | # 's': 3,
914 | # 't': 1,
915 | # 'w': 1}
916 | ```
917 |
918 | - **TIP:** If you want to store your `pprint` object as a **string** (rather than merely print it to the screen), use the `pprint` module's `pformat()` method instead.
919 |
920 | ### 7.18 - Data Structures
921 |
922 | - You can use the `type()` function to determine the data type of any value:
923 |
924 | ```python
925 | type(42) #
926 |
927 | type('hello') #
928 |
929 | type({'name': 'Zophie'}) #
930 | ```
931 |
932 | [Back to TOC](#id-toc)
933 |
934 |
935 |
936 | ## Section 8: More About Strings
937 |
938 | ### 8.19 - Advanced String Syntax
939 |
940 | - There are multiple ways to type strings, including via double quotes and escape characters:
941 |
942 | ```python
943 | "That is Alice's cat."
944 |
945 | # Prints 'Say hello to Bob's mother.':
946 |
947 | print('Say hello to Bob\'s mother.')
948 |
949 | # Prints each statement on a new line:
950 |
951 | print('Hello.\nHow are you?\nI\'m fine.')
952 | ```
953 |
954 | - Types of escape characters:
955 |
956 | | Escape character | Prints as |
957 | | :--------------: | -------------------- |
958 | | \\' | Single quote |
959 | | \\" | Double quote |
960 | | \t | Tab |
961 | | \n | Newline (line break) |
962 | | \\\ | Backslash |
963 |
964 | - If you have text that contains many backslashes that you do not want to be treated as escape characters, you can use a **raw string**, which is a string that begins with a lowercase "r":
965 |
966 | ```python
967 | # Prints without the letters "t" and "n", and inserts tab and newline characters instead:
968 |
969 | print('C:\temp\new')
970 |
971 | # Prints text as written:
972 |
973 | print(r'C:\temp\new')
974 | ```
975 |
976 | - Although you can use `\n` to add newlines to a string, it is often easier to use **multiline strings** with triple quotes (either single or double quotes). Any quotes, tabs, or newlines within the triple quotes are considered part of the string:
977 |
978 | ```python
979 | spam = """Dear Alice,
980 | Eve's cat is orange.
981 | Sincerely,
982 | Bob"""
983 |
984 | print(spam) # (Prints each line on a new line)
985 |
986 | spam # "Dear Alice,\nEve's cat is orange.\nSincerely,\nBob"
987 | ```
988 |
989 | ### 8.20 - String Methods
990 |
991 | - The `upper()` and `lower()` methods return a string where all characters are in uppercase or lowercase, respectively:
992 |
993 | ```python
994 | spam = 'Hello, world!'
995 |
996 | spam.upper() # 'HELLO, WORLD!'
997 |
998 | spam.lower() # 'hello, world!'
999 | ```
1000 |
1001 | - **NOTE:** Because strings are immutable, string methods do not modify the original string. If you want to actually modify the string value stored to a variable, you must say, e.g.: `spam = spam.lower()`
1002 |
1003 | - The `isupper()` and `islower()` methods return a Boolean value indicating whether all letters in the string are uppercase or lowercase, respectively:
1004 |
1005 | ```python
1006 | spam = 'hello, world!'
1007 |
1008 | spam.isupper() # False
1009 |
1010 | spam.islower() # True
1011 | ```
1012 |
1013 | - Other noteworthy string methods beginning with the word `is`:
1014 |
1015 | ```python
1016 | isalpha() # (Letters only)
1017 |
1018 | isalnum() # (Letters and numbers only)
1019 |
1020 | isdecimal() # (Numbers only)
1021 |
1022 | isspace() # (Whitespace only)
1023 |
1024 | istitle() # (Titlecase only)
1025 | ```
1026 |
1027 | - **NOTE:** Because string methods return a new string, you are able to **chain** method calls:
1028 |
1029 | ```python
1030 | 'hello'.upper().isupper() # True
1031 | ```
1032 |
1033 | - The `startswith()` and `endswith()` methods return a Boolean value indicating whether the string starts with or ends with (respectively) the specified value:
1034 |
1035 | ```python
1036 | spam = 'Hello, world!'
1037 |
1038 | spam.startswith('Hello') # True
1039 |
1040 | spam.endswith('!') # True
1041 |
1042 | spam.endswith('world') # False
1043 |
1044 | - The `join()` method takes all items in an iterable and joins them into one string using a specified separator:
1045 |
1046 | ```python
1047 | spam = ['cats', 'rats', 'bats']
1048 |
1049 | ', '.join(spam) # 'cats, rats, bats'
1050 |
1051 | '\n'.join(spam) # (Inserts newline character after each item)
1052 | ```
1053 |
1054 | - The `split()` method splits a string into a list. The method splits a string according to whitespace separation by default. However, you can specify the string to be used as the separator (first parameter) and the number of splits to perform (second parameter):
1055 |
1056 | ```python
1057 | spam = 'My name is Simon'
1058 |
1059 | spam.split() # ['My', 'name', 'is', 'Simon']
1060 |
1061 | spam.split('m') # ['My na', 'e is Si', 'on']
1062 |
1063 | spam.split(None, 1) # ['My', 'name is Simon']
1064 | ```
1065 |
1066 | - The `ljust()` and `rjust()` methods return a "padded" version of a string with a number of spaces (first parameter) inserted to left or right justify (respectively) the specified text. An optional second parameter can be used to specify a padding character other than a space. There is also a `center()` method that operates similarly to `ljust()` and `rjust()` but uses padding to center the text, rather than justify left or right:
1067 |
1068 | ```python
1069 | 'Hello'.ljust(10) # 'Hello '
1070 |
1071 | 'Hello'.rjust(10) # ' Hello'
1072 |
1073 | 'Hello'.ljust(10, '.') # 'Hello.....'
1074 |
1075 | 'Hello'.center(15, '-') # '-----Hello-----'
1076 | ```
1077 | - Use the `strip()`, `rstrip()`, and `lstrip()` methods to trim whitespace characters off of a string. You can insert a string as an argument, and any contiguous set of characters in that argument (regardless of order) will be stripped from the end(s) of the string:
1078 |
1079 | ```python
1080 | ' x '.strip() # 'x'
1081 |
1082 | ' x '.lstrip() # 'x '
1083 |
1084 | ' x '.rstrip() # ' x'
1085 |
1086 | 'SpamBaconSpamEggsSpam'.strip('ampS') # 'BaconSpamEggs'
1087 | ```
1088 |
1089 | - The `replace()` method replaces a specified phrase with another specified phrase:
1090 |
1091 | ```python
1092 | 'Hello there!'.replace('e', '3') # 'H3llo th3r3!'
1093 | ```
1094 |
1095 | ### 8.21 - String Formatting
1096 |
1097 | - Rather than concatenating numerous strings with the `+` operator, you can use Python's **string formatting** (a.k.a., string interpolation) by using the `%` operator and the `%s` symbol (one of several types of conversion specifiers):
1098 |
1099 | ```python
1100 | name = 'Alice'
1101 | place = 'Main Street'
1102 | time = '6:00 PM'
1103 | food = 'turnips'
1104 |
1105 | 'Hello, %s. You are invited to a party at %s at %s. Please bring %s.' % (name, place, time, food)
1106 |
1107 | # 'Hello, Alice. You are invited to a party at Main Street at 6:00 PM. Please bring turnips.'
1108 | ```
1109 |
1110 | [Back to TOC](#id-toc)
1111 |
1112 |
1113 |
1114 | ## Section 9: Running Programs from the Command Line
1115 |
1116 | - See [Appendix B](https://automatetheboringstuff.com/appendixb/) re: shebang line (`#! /usr/bin/env python3`) and changing file permissions (`chmod +x pythonScript.py`)
1117 |
1118 | - To use arguments from the command line in your Python script, use the `sys.argv` list:
1119 |
1120 | ```python
1121 | #! /usr/bin/env python3
1122 |
1123 | import sys
1124 |
1125 | print(sys.argv)
1126 | ```
1127 |
1128 | [Back to TOC](#id-toc)
1129 |
1130 |
1131 |
1132 | ## Section 10: Regular Expressions
1133 |
1134 | ### 10.23 - Regular Expression Basics
1135 |
1136 | - Example of using regular expressions with the `re` module:
1137 |
1138 | ```python
1139 | import re
1140 |
1141 | message = 'Call me tomorrow at 415-555-1011, or at 415-555-9999.'
1142 |
1143 | # "compile()" compiles a regex pattern into a regex object that can be used
1144 | # for matching via "match()", "search()", and other methods. "\d" is the
1145 | # regex for a numeric digit character:
1146 |
1147 | phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
1148 |
1149 | # The regex data type has a "search()" method that can be used to search a
1150 | # string for the regex pattern and return a match object containing the first
1151 | # matching string:
1152 |
1153 | matchObject = phoneNumRegex.search(message)
1154 |
1155 | # Match objects have a method called "group()" that will return the text
1156 | # of the matching string:
1157 |
1158 | print(matchObject.group()) # 415-555-1011
1159 | ```
1160 |
1161 | - **NOTE:** If the `search()` method does not find a match, it will return a value of `None`, which will cause an error to result if you call the `group()` method on a nonexistent match object.
1162 |
1163 | ### 10.24 - Regex Groups and the Pipe Character
1164 |
1165 | - Use **parentheses** to mark out groups within a regex, and access groups via the `group()` method:
1166 |
1167 | ```python
1168 | import re
1169 |
1170 | phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
1171 |
1172 | matchObject = phoneNumRegex.search('My number is 415-555-4242.')
1173 |
1174 | matchObject.group(1) # '415'
1175 |
1176 | matchObject.group(2) # '555-4242'
1177 | ```
1178 |
1179 | - **NOTE:** If you want to find literal parentheses (or any other regex special characters) within your text, then you must escape the opening and closing parentheses with a backslash (`\`):
1180 |
1181 | ```python
1182 | phoneNumRegex = re.compile(r'\(\d\d\d\) \d\d\d-\d\d\d\d')
1183 | ```
1184 |
1185 | - Use the **pipe** (`|`) character to match one of many possible groups (based on, e.g., prefix/suffix):
1186 |
1187 | ```python
1188 | batRegex = re.compile(r'Bat(man|mobile|copter)')
1189 |
1190 | matchObject = batRegex.search('Batmobile lost a wheel')
1191 |
1192 | matchObject.group() # 'Batmobile'
1193 |
1194 | matchObject.group(1) # 'mobile'
1195 | ```
1196 |
1197 | ### 10.25 - Repetition in Regex Patterns and Greedy/Nongreedy Matching
1198 |
1199 | - The `?` character matches the preceding expression **0 or 1** time(s) (i.e., the expression can either appear once or not at all for a match to occur):
1200 |
1201 | ```python
1202 | import re
1203 |
1204 | # Matches 'Batman' or 'Batwoman':
1205 |
1206 | batRegex = re.compile(r'Bat(wo)?man')
1207 | ```
1208 |
1209 | - The `*` character matches the preceding expression **0 or more** times:
1210 |
1211 | ```python
1212 | batRegex = re.compile(r'Bat(wo)*man')
1213 |
1214 | matchObject = batRegex.search('The Adventures of Batwowowoman')
1215 |
1216 | matchObject.group() # 'Batwowowoman'
1217 | ```
1218 |
1219 | - The `+` character matches the preceding expression **1 or more** times:
1220 |
1221 | ```python
1222 | # Matches 'Batwoman' or 'Batwowowoman', etc., but not 'Batman':
1223 |
1224 | batRegex = re.compile(r'Bat(wo)+man')
1225 | ```
1226 |
1227 | - The `{`*`n`*`}` character matches **exactly** *n* occurrences of the preceding expression:
1228 |
1229 | ```python
1230 | haRegex = re.compile(r'(ha){3}')
1231 |
1232 | matchObject = haRegex.search('He said, "hahaha"')
1233 |
1234 | matchObject.group() # 'hahaha'
1235 | ```
1236 |
1237 | - The `{`*`n,m`*`}` character matches at least *n* and at most *m* occurrences of the preceding expression (if *n* is omitted, it is treated as 0; if *m* is omitted, it is treated as ∞):
1238 |
1239 | ```python
1240 | haRegex = re.compile(r'(ha){3,5}')
1241 |
1242 | haMatchObject = haRegex.search('He said, "hahahaha"')
1243 |
1244 | haMatchObject.group() # 'hahahaha'
1245 |
1246 | # By default, Python will perform a "greedy" match and return the longest
1247 | # possible match that it finds (in this case, 5 digits rather than 3):
1248 |
1249 | digitRegex = re.compile(r'(\d){3,5}')
1250 |
1251 | digitMatchObject = digitRegex.search('1234567890')
1252 |
1253 | digitMatchObject.group() # '12345'
1254 |
1255 | # To perform a "nongreedy" match, use the "?" character after the curly brace:
1256 |
1257 | digitRegex = re.compile(r'(\d){3,5}?')
1258 |
1259 | digitMatchObject = digitRegex.search('1234567890')
1260 |
1261 | digitMatchObject.group() # '123'
1262 | ```
1263 |
1264 | ### 10.26 - Regex Character Classes and the findall() Method
1265 |
1266 | #### findall() Method
1267 |
1268 | - If you want to return **every** occurrence of a regex pattern (rather than only the first), then use the `findall()` method (instead of `search()`) to return a list containing all matches:
1269 |
1270 | ```python
1271 | import re
1272 |
1273 | message = 'Call me tomorrow at 415-555-1011, or at 415-555-9999.'
1274 |
1275 | phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
1276 |
1277 | print(phoneNumRegex.findall(message)) # ['415-555-1011', '415-555-9999']
1278 | ```
1279 |
1280 | - Be mindful of how **groups** affect the value returned by the `findall()` method:
1281 |
1282 | ```python
1283 | message = 'Call me tomorrow at 415-555-1011, or at 415-555-9999.'
1284 |
1285 | # One group:
1286 |
1287 | single = re.compile(r'(\d\d\d)-\d\d\d-\d\d\d\d')
1288 |
1289 | single.findall(message) # ['415', '415']
1290 |
1291 | # Two groups:
1292 |
1293 | tuples = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
1294 |
1295 | tuples.findall(message) # [('415', '555-1011'), ('415', '555-9999')]
1296 |
1297 | # Two groups nested within one group:
1298 |
1299 | nested = re.compile(r'((\d\d\d)-(\d\d\d-\d\d\d\d))')
1300 |
1301 | nested.findall(message) # [('415-555-1011', '415', '555-1011'),
1302 | # ('415-555-9999', '415', '555-9999')]
1303 | ```
1304 |
1305 | #### Character Classes
1306 |
1307 | - Common Character Classes:
1308 |
1309 | | Shorthand character class | Represents |
1310 | | :-----------------------: | ------------------------------------------------------------------- |
1311 | | \d | Any numeric digit from 0 to 9 |
1312 | | \D | Any character that is *not* a numeric digit from 0 to 9 |
1313 | | \w | Any letter, numeric digit, or underscore (i.e., "word" characters) |
1314 | | \W | Any character that is *not* a letter, numeric digit, or underscore |
1315 | | \s | Any space, tab, or newline character (i.e., "space" characters) |
1316 | | \S | Any character that is *not* a space, tab, or newline |
1317 |
1318 | - Example:
1319 |
1320 | ```python
1321 | lyrics = """12 drummers drumming, 11 pipers piping, 10 lords a leaping,
1322 | 9 ladies dancing, 8 maids a milking, 7 swans a swimming,
1323 | 6 geese a laying, 5 golden rings, 4 calling birds, 3 french hens,
1324 | 2 turtle doves, 1 partridge in a pear tree"""
1325 |
1326 | xmasRegex = re.compile(r'\d+\s\w+')
1327 |
1328 | xmasRegex.findall(lyrics) # ['12 drummers', '11 pipers', '10 lords',
1329 | # '9 ladies', '8 maids', '7 swans',
1330 | # '6 geese', '5 golden', '4 calling',
1331 | # '3 french', '2 turtle', '1 partridge']
1332 | ```
1333 |
1334 | - You can create your own regex **character sets** (e.g., `[xyz]`) and **negated or complemented character sets** (e.g., `[^xyz]`):
1335 |
1336 | ```python
1337 | message = 'Robocop eats baby food.'
1338 |
1339 | # Matches all letters:
1340 |
1341 | alphaRegex = re.compile(r'[a-zA-Z]')
1342 |
1343 | alphaRegex.findall(message) # ['R', 'o', 'b', 'o', 'c', 'o', 'p',
1344 | # 'e', 'a', 't', 's', 'b', 'a', 'b', 'y',
1345 | # 'f', 'o', 'o', 'd']
1346 |
1347 | # Matches all vowels:
1348 |
1349 | vowelRegex = re.compile(r'[aeiouAEIOU]')
1350 |
1351 | vowelRegex.findall(message) # ['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o']
1352 |
1353 | # Matches all vowels appearing in sets of 2:
1354 |
1355 | doubleVowelRegex = re.compile(r'[aeiouAEIOU]{2}')
1356 |
1357 | doubleVowelRegex.findall(message) # ['ea', 'oo']
1358 |
1359 | # Matches anything that is NOT enclosed in the brackets:
1360 |
1361 | nonVowelRegex = re.compile(r'[^aeiouAEIOU]')
1362 |
1363 | nonVowelRegex.findall(message) # ['R', 'b', 'c', 'p', ' ', 't', 's', ' ',
1364 | # 'b', 'b', 'y', ' ', 'f', 'd', '.']
1365 | ```
1366 |
1367 | ### 10.27 - Regex Dot-Star and the Caret/Dollar Characters
1368 |
1369 | - Use the `^` character (not as a first character in a character set) to find a match at the **beginning** of an input, and use the `$` character to find a match at the **end** of an input:
1370 |
1371 | ```python
1372 | import re
1373 |
1374 | # Begins with 'Hello':
1375 |
1376 | beginsWithHelloRegex = re.compile(r'^Hello')
1377 |
1378 | beginsWithHelloRegex.findall('Hello there!') # ['Hello']
1379 |
1380 | beginsWithHelloRegex.findall('He said, "Hello".') # []
1381 |
1382 | # Ends with 'world':
1383 |
1384 | endsWithWorldRegex = re.compile(r'world$')
1385 |
1386 | endsWithWorldRegex.findall('Hello, world') # ['world!']
1387 |
1388 | endsWithWorldRegex.findall('Hello, world!') # []
1389 |
1390 | # Only contains one or more numeric digits:
1391 |
1392 | allDigitsRegex = re.compile(r'^\d+$')
1393 |
1394 | allDigitsRegex.findall('1234567890') # ['1234567890']
1395 |
1396 | allDigitsRegex.findall('12345x7890') # []
1397 | ```
1398 |
1399 | - The `.` (dot) character matches **any** character except the newline character:
1400 |
1401 | ```python
1402 | message = 'The cat in the hat sat on the flat mat.'
1403 |
1404 | # Matches a phrase that ends in 'at' preceded by 1-2 non-newline characters:
1405 |
1406 | atRegex = re.compile(r'.{1,2}at')
1407 |
1408 | # Includes spaces:
1409 |
1410 | atRegex.findall(message) # [' cat', ' hat', ' sat', 'flat', ' mat']
1411 | ```
1412 |
1413 | - **NOTE:** To make `.` truly match **every** character (even newlines), pass the `re.DOTALL` variable as the second argument in the `compile()` function:
1414 |
1415 | ```python
1416 | primeDirectives = 'Serve the public trust.\nProtect the innocent.\nUphold the law.'
1417 |
1418 | dotStar = re.compile(r'.*', re.DOTALL)
1419 |
1420 | matchObject = dotStar.search(primeDirectives)
1421 |
1422 | print(matchObject.group()) # Serve the public trust.
1423 | # Protect the innocent.
1424 | # Uphold the law.
1425 | ```
1426 |
1427 | - **ALSO:** If you want to have a **case-insensitive** regex match, use the `re.IGNORECASE` variable:
1428 |
1429 | ```python
1430 | # TIP: You can also use "re.I" as a shorthand for "re.IGNORECASE':
1431 |
1432 | vowelRegex = re.compile(r'[aeiou]', re.IGNORECASE)
1433 |
1434 | vowelRegex.findall('All cows eat grass.') # ['A', 'o', 'e', 'a', 'a']
1435 | ```
1436 |
1437 | - Common way to match anything is the **Dot-Star** pattern:
1438 |
1439 | ```python
1440 | text = 'First Name: Al Last Name: Sweigart'
1441 |
1442 | nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
1443 |
1444 | nameRegex.findall(text) # [('Al', 'Sweigart')]
1445 | ```
1446 |
1447 | - **NOTE:** Dot-Star uses greedy mode by default, so you must use `(.*?)` for nongreedy matching:
1448 |
1449 | ```python
1450 | serve = ' for dinner.>'
1451 |
1452 | # Nongreedy matching:
1453 |
1454 | nongreedy = re.compile(r'<(.*?)>')
1455 |
1456 | nongreedy.findall(serve) # ['To serve humans']
1457 |
1458 | # Greedy matching:
1459 |
1460 | greedy = re.compile(r'<(.*)>')
1461 |
1462 | greedy.findall(serve) # ['To serve humans> for dinner.']
1463 | ```
1464 |
1465 | ### 10.28 - Regex sub() Method and Verbose Mode
1466 |
1467 | #### sub() Method
1468 |
1469 | - The `sub()` method allows you to find matching text and replace it with new text:
1470 |
1471 | ```python
1472 | import re
1473 |
1474 | message = 'Agent Alice gave documents to Agent Bob.'
1475 |
1476 | namesRegex = re.compile(r'Agent \w+')
1477 |
1478 | # The first argument is the replacement string, and
1479 | # the second argument is the string to be searched:
1480 |
1481 | namesRegex.sub('REDACTED', message) # 'REDACTED gave documents to REDACTED.'
1482 | ```
1483 |
1484 | - You can retain portions of the original text by using the **Slash-Number** syntax (e.g., `\1`, `\2`, etc.), in which the number represents a group in the regex pattern:
1485 |
1486 | ```python
1487 | message = 'Agent Alice gave documents to Agent Bob.'
1488 |
1489 | # The group will contain the first letter of an agent's name:
1490 |
1491 | namesRegex = re.compile(r'Agent (\w)\w*')
1492 |
1493 | namesRegex.findall(message) # ['A', 'B']
1494 |
1495 | # Use the text from "Group 1" for the substituted match:
1496 |
1497 | namesRegex.sub(r'Agent \1', message) # 'Agent A gave documents to Agent B.'
1498 | ```
1499 |
1500 | #### Verbose Mode
1501 |
1502 | - The `re.VERBOSE` flag allows you to write regular expressions that look nicer and are more readable by allowing you to visually separate logical sections of the pattern and add comments. Whitespace within the pattern is generally ignored:
1503 |
1504 | ```python
1505 | message = 'Call me tomorrow at 415-555-1011, or at (415) 555-9999.'
1506 |
1507 | # TIP: You can combine "compile()" options by using the bitwise "|" operator:
1508 |
1509 | phoneRegex = re.compile(r'''
1510 | (((\d\d\d-)| # Area code (without parentheses; with dash)
1511 | (\(\d\d\d\)\s)) # -OR- Area code (with parentheses; without dash)
1512 | \d\d\d # First 3 digits
1513 | - # Second dash
1514 | \d\d\d\d) # Last 4 digits
1515 | ''', re.VERBOSE | re.I | re.DOTALL) # Added extra options for demonstration
1516 |
1517 | phoneRegex.findall(message)[0][0] # '415-555-1011'
1518 |
1519 | phoneRegex.findall(message)[1][0] # '(415) 555-9999'
1520 | ```
1521 |
1522 | [Back to TOC](#id-toc)
1523 |
1524 |
1525 |
1526 | ## Section 11: Files
1527 |
1528 | ### 11.30 - Filenames and Absolute/Relative File Paths
1529 |
1530 | - File paths are handled differently with regard to slashes/backslashes on Windows (e.g., `C:\temp\new`) and Mac/Linux (e.g., `C:/temp/new`). To enforce consistency when creating a file path string in Python, use the `os` module's `path.join()` method:
1531 |
1532 | ```python
1533 | import os
1534 |
1535 | # Returns 'folder1\\folder2\\file.png' if run on Windows, and
1536 | # returns 'folder1/folder2/file.png' if run on Mac/Linux:
1537 |
1538 | os.path.join('folder1', 'folder2', 'file.png')
1539 | ```
1540 |
1541 | - To retrieve the string value of the file's **current working directory** (CWD), use the `os.getcwd()` method. You can manually change what Python considers the CWD to be by using `os.chdir()`:
1542 |
1543 | ```python
1544 | os.getcwd() # '/Users/Guest/Desktop'
1545 |
1546 | oc.chdir('/Users/Guest/Documents')
1547 |
1548 | os.getcwd() # '/Users/Guest/Documents'
1549 | ```
1550 |
1551 | - Being able to modify the CWD is important for handling **relative file paths**. A file referenced by name only (e.g., `file.png`) will be considered to be within the CWD, whereas a file name that is part of an **absolute file path** (e.g., `/Users/Guest/Documents/file.png`) is known to be within the path specified.
1552 |
1553 | - **NOTE:** Relative file paths can also contain references to folders, not just file names.
1554 |
1555 | - To return an absolute file path derived from a non-absolute pathname, use `os.path.abspath()`:
1556 |
1557 | ```python
1558 | # Essentially calls "os.getcwd()" and appends the string argument:
1559 |
1560 | os.path.abspath('spam.png') # '/Users/Guest/Documents/spam.png'
1561 |
1562 | # You can use the ".." symbol to move to a higher folder above the CWD:
1563 |
1564 | os.path.abspath('../spam.png') # '/Users/Guest/spam.png'
1565 | ```
1566 |
1567 | - **TIP:** You can determine whether a path is relative or absolute by using the `os.path.isabs()`, which returns a Boolean value:
1568 |
1569 | ```python
1570 | os.path.isabs('../spam.png') # False
1571 |
1572 | os.path.isabs('/Users/Guest/Documents') # True
1573 | ```
1574 |
1575 | - To find the relative path between two paths, use `os.path.relpath()`. The first argument is the destination path, and the second (optional) argument is the starting path (which defaults to the current directory if not specified):
1576 |
1577 | ```python
1578 | os.path.relpath('/Users/Guest/spam.png', '/Users') # '/Guest/spam.png'
1579 | ```
1580 |
1581 | - Use `os.path.dirname()` to retrieve only the directory in which a file is located, and use `os.path.basename()` to retrieve only the endpoint of a path:
1582 |
1583 | ```python
1584 | os.path.dirname('/Users/Guest/spam.png') # '/Users/Guest'
1585 |
1586 | os.path.basename('/Users/Guest/spam.png') # 'spam.png'
1587 |
1588 | os.path.basename('/Users/Guest') # 'Guest'
1589 | ```
1590 |
1591 | - To determine whether a file or path exists, use `os.path.exists()`, which returns a Boolean value:
1592 |
1593 | ```python
1594 | os.path.exists('/Users/Guest') # True
1595 | ```
1596 |
1597 | - **ALSO:** Use `os.path.isfile()` and `os.path.isdir()` to determine whether a path is referencing a file or directory, respectively (returns a Boolean value).
1598 |
1599 | - Other useful functions for **examining/modifying** directories include `os.path.getsize()`, `os.listdir()`, and `os.makedirs()`:
1600 |
1601 | ```python
1602 | # Returns a directory's or file's size in bytes (as an integer):
1603 |
1604 | os.path.getsize('/Users/Guest') # 384
1605 |
1606 | # Returns the contents of a directory:
1607 |
1608 | os.listdir('/Users/Guest')
1609 |
1610 | # Creates a new folder (accepts either absolute or relative file paths):
1611 |
1612 | os.makedirs('/Users/Guest/Delicious/Waffles')
1613 | ```
1614 |
1615 | ### 11.31 - Reading and Writing Plaintext Files
1616 |
1617 | - Three steps to **reading** plaintext files:
1618 |
1619 | ```python
1620 | # The `open()` function opens a plaintext file in "read mode" (default)
1621 | # and returns a file object:
1622 |
1623 | helloFile = open('/Users/Guest/hello.txt')
1624 |
1625 | # The file object includes the "read()" method that returns a string
1626 | # containing the file's contents:
1627 |
1628 | content = helloFile.read()
1629 |
1630 | # Close the file:
1631 |
1632 | helloFile.close()
1633 | ```
1634 |
1635 | - **NOTE:** Instead of `read()`, you can use the `readlines()` method to return all lines as strings inside of a list. For example, if the file `hello.txt` contained the following text...
1636 |
1637 | ```
1638 | Hello, world!
1639 | How are you?
1640 | ```
1641 |
1642 | ...then `read()` and `readlines()` will process the text accordingly:
1643 |
1644 | ```python
1645 | helloFile.read() # 'Hello, world!\nHow are you?'
1646 |
1647 | helloFile.readlines() # ['Hello, world!\n', 'How are you?']
1648 | ```
1649 |
1650 | - To **write** to a plaintext file (i.e., overwrite its contents), pass the string `'w'` as the second argument to the `open()` function. To **append** new text to a file (i.e., add to the end of the file, rather than overwrite its contents), pass the `'a'` string. In either case, if the file does not already exist, then Python will create a new `txt` file for you to write to:
1651 |
1652 | ```python
1653 | helloFile = open('/Users/Guest/hello2.txt', 'w')
1654 |
1655 | # Use the "write()" method to write:
1656 |
1657 | helloFile.write('Hello!!!\n') # NOTE: Will return the number of bytes written
1658 |
1659 | helloFile.close()
1660 | ```
1661 |
1662 | - **NOTE:** Python will *not* automatically add newline characters when writing/appending text content. So newlines must be added manually if desired.
1663 |
1664 | - If you need to store **complex data** such as lists/dictionaries (rather than just plaintext) to your storage device, use the `shelve` module to create a **binary shelf file**:
1665 |
1666 | ```python
1667 | import shelve
1668 |
1669 | # Returns a "shelf" data object that will be saved to your storage device
1670 | # as a shelf file named "mydata" in the current working directory:
1671 |
1672 | shelfFile = shelve.open('mydata')
1673 |
1674 | # Make changes to the shelf file in the same manner as a dictionary:
1675 |
1676 | shelfFile['cats'] = ['Kiwi', 'Penny', 'Clover']
1677 |
1678 | shelfFile['dogs'] = ['Bambi', 'Buzz', 'Elway']
1679 |
1680 | # Close the file:
1681 |
1682 | shelfFile.close()
1683 | ```
1684 |
1685 | - **NOTE:** On Mac OS X, the shelf file will be saved with the `.db` extension. Its contents can be accessed from a Python program as follows:
1686 |
1687 | ```python
1688 | # NOTE: The "shelve.open()" method opens a shelf file in read-write mode:
1689 |
1690 | shelfFile = shelve.open('mydata')
1691 |
1692 | shelfFile['cats'] # ['Kiwi', 'Penny', 'Clover']
1693 |
1694 | # List all keys in a shelf file:
1695 |
1696 | list(shelfFile.keys()) # ['cats', 'dogs']
1697 |
1698 | # List all values in a shelf file:
1699 |
1700 | list(shelfFile.values()) # [['Kiwi', 'Penny', 'Clover'],
1701 | # ['Bambi', 'Buzz', 'Elway']]
1702 | ```
1703 |
1704 | ### 11.32 - Copying and Moving Files and Folders
1705 |
1706 | - The `shutil` (Shell Utilities) module allows you to copy and move files and folders:
1707 |
1708 | ```python
1709 | import shutil
1710 |
1711 | # COPY a file (first argument) to a new folder (second argument):
1712 |
1713 | shutil.copy('/Users/Guest/hello.txt', '/Users/Guest/Delicious')
1714 |
1715 | # COPY and RENAME a file to a new folder ('/Delicious.txt'):
1716 |
1717 | shutil.copy('/Users/Guest/hello.txt', '/Users/Guest/Delicious/spam.txt')
1718 |
1719 | # COPY an entire FOLDER:
1720 |
1721 | shutil.copytree('/Users/Guest/Delicious', '/Users/Guest/Delicious_Backup')
1722 |
1723 | # MOVE a file to a new location:
1724 |
1725 | shutil.move('/Users/Guest/Delicious/spam.txt', '/Users/Guest/Waffles')
1726 |
1727 | # MOVE and RENAME a file to a new location:
1728 |
1729 | shutil.move('/Users/Guest/Delicious/spam.txt', '/Users/Guest/hello.txt')
1730 | ```
1731 |
1732 | - **NOTE:** `shutil` does not have a method dedicated to renaming a file without copying/moving the file; however, you can accomplish the same result by using the `move()` method and setting the destination path to be the same as the original filepath:
1733 |
1734 | ```python
1735 | shutil.move('/Users/Guest/hello.txt', '/Users/Guest/eggs.txt')
1736 | ```
1737 |
1738 | ### 11.33 - Deleting Files
1739 |
1740 | - The `os` module has an `unlink()` method that can be used for permanently deleting a **single file**, and a `rmdir()` for permanently deleting an **empty folder**:
1741 |
1742 | ```python
1743 | import os
1744 |
1745 | # Deletes a file:
1746 |
1747 | os.unlink('/Users/Guest/Delicious/eggs.txt')
1748 |
1749 | # Deletes an empty folder:
1750 |
1751 | os.unlink('/Users/Guest/Delicious')
1752 | ```
1753 |
1754 | - To permanently remove a folder and all of its contents, use the `shutil.rmtree()` method:
1755 |
1756 | ```python
1757 | import shutil
1758 |
1759 | shutil.rmtree('/Users/Guest/Waffles')
1760 | ```
1761 |
1762 | - A better practice is to send a file/folder to your OS's **trash** or **recycling bin** (rather than permanently deleting the file/folder) by using the [send2trash](https://pypi.org/project/Send2Trash/) third-party module:
1763 |
1764 | ```python
1765 | import send2trash
1766 |
1767 | send2trash.send2trash('/Users/Guest/Delicious/eggs.txt')
1768 | ```
1769 |
1770 | ### 11.34 - Walking a Directory Tree
1771 |
1772 | - The `os.walk()` method allows you to iterate through and execute code upon all of the files or folders within a specified folder:
1773 |
1774 | ```python
1775 | import os
1776 |
1777 | for folderName, subfolders, filenames in os.walk('/Users/Guest'):
1778 | # Delete subfolders containing the string 'fish' in the subfolder name:
1779 | for subfolder in subfolders:
1780 | if 'fish' in subfolder:
1781 | os.rmdir(subfolder)
1782 |
1783 | # Copy all ".py" files to ".backup" files:
1784 | for file in filenames:
1785 | if file.endswith('.py'):
1786 | shutil.copy(
1787 | os.path.join(folderName, file),
1788 | os.path.join(folderName, file + '.backup')
1789 | )
1790 | ```
1791 |
1792 | [Back to TOC](#id-toc)
1793 |
1794 |
1795 |
1796 | ## Section 12: Debugging
1797 |
1798 | ### 12.35 - The raise and assert Statements
1799 |
1800 | - Python automatically raises one of its built-in **exceptions** whenever it tries to run invalid code; however, you can also raise your own exceptions with a `raise` statement. A **traceback** will be logged upon raising the exception, which allows you to see the specific line of code that triggered the exception:
1801 |
1802 | ```python
1803 | raise Exception('This is the error message.)
1804 | ```
1805 |
1806 | - **TIP:** To save a running log of cleanly formatted error messages (as strings), use the `traceback.format_exc()` module:
1807 |
1808 | ```python
1809 | import traceback
1810 |
1811 | try:
1812 | raise Exception('This is the error message.')
1813 | except:
1814 | errorFile = open('error-log.txt', 'a')
1815 | errorFile.write(traceback.format_exc())
1816 | efforFile.close()
1817 | print('The traceback info was written to error-log.txt')
1818 | ```
1819 |
1820 | - An **assertion** can be used to perform a "sanity check". They are intended to address programmer errors rather than user errors. See the following example of a traffic light simulator:
1821 |
1822 | ```python
1823 | mainStreet = {'ns': 'green', 'ew': 'red'}
1824 |
1825 |
1826 | def switchLights(intersection):
1827 | for key in intersection.keys():
1828 | if intersection[key] == 'green':
1829 | intersection[key] == 'yellow'
1830 | elif intersection[key] == 'yellow':
1831 | intersection[key] == 'red'
1832 | elif intersection[key] == 'red':
1833 | intersection[key] == 'green'
1834 | # This program will raise an exception when the assertion fails by returning
1835 | # 'False' on the second run, in which the N/S light will be 'yellow' and the
1836 | # E/W light will be 'green'. As traffic should only be flowing when one
1837 | # light on the intersection is 'red', the assert statement allows you to
1838 | # immediately detect the problem and take corrective action:
1839 | assert 'red' in intersection.values(), 'Neither light is red!' + str(intersection)
1840 |
1841 |
1842 | switchLights(mainStreet)
1843 | ```
1844 |
1845 | ### 12.36 - Logging
1846 |
1847 | - Python's `logging` module allows you to create a record of custom messages. Use the `logging.basicConfig()` method to display log messages on your screen while the program runs:
1848 |
1849 | ```python
1850 | import logging
1851 |
1852 | # The following line should appear at or near the top of your program:
1853 |
1854 | logging.basicConfig(
1855 | level=logging.DEBUG,
1856 | format='%(asctime)s - %(levelname)s - %(message)s'
1857 | )
1858 |
1859 | # Each of the following "debug()" function calls work like "print()" but
1860 | # provide additional information (i.e., timestamp, log level, and custom message):
1861 |
1862 | logging.debug('Start of program')
1863 |
1864 | def factorial(n):
1865 | logging.debug('Start of factorial(%s)' % (n))
1866 | total = 1
1867 | for i in range(n + 1):
1868 | total *= i
1869 | logging.debug('i is %s, total is %s' % (i, total))
1870 | logging.debug('Return value is %s' % (total))
1871 | return total
1872 |
1873 | print(factorial(5)) # (Returns 0, which is incorrect)
1874 |
1875 | logging.debug('End of program')
1876 |
1877 | # In this example, the running log will show that "i" is set to 0 on the first
1878 | # iteration, which results in "total" being set to 0 because any amount
1879 | # times 0 is always equal to 0. Therefore, by reviewing the log, it becomes
1880 | # apparent that the "range()" value should be set to start at 1 rather than 0.
1881 | ```
1882 |
1883 | - To **disable** logging messages that are present in your code, use the `logging.disable()` method at the top of your code:
1884 |
1885 | ```python
1886 | # Disables logging calls of the given severity level (or lower):
1887 |
1888 | logging.disable(logging.CRITICAL)
1889 | ```
1890 |
1891 | - **NOTE:** Python recognizes the following 5 [logging levels](https://docs.python.org/3/library/logging.html#levels) (in descending order of severity). Log messages can be created at a specific log level by using the corresponding logging method:
1892 |
1893 | | Log level | Logging method |
1894 | | --------- | -------------------- |
1895 | | CRITICAL | `logging.critical()` |
1896 | | ERROR | `logging.error()` |
1897 | | WARNING | `logging.warning()` |
1898 | | INFO | `logging.info()` |
1899 | | DEBUG | `logging.debug()` |
1900 |
1901 | - To log messages to a **plaintext file** rather than the screen, use the `filename` keyword argument in the `logging.basicConfig()` method:
1902 |
1903 | ```python
1904 | logging.basicConfig(
1905 | filename='myProgramLog.txt' # (Relative pathname)
1906 | level=logging.DEBUG,
1907 | format='%(asctime)s - %(levelname)s - %(message)s'
1908 | )
1909 | ```
1910 |
1911 | ### 12.37 - Using the Debugger
1912 |
1913 | - The debugger is a feature in IDLE that allows you to run your program one line at a time. To activate the deubgger:
1914 |
1915 | **1**\. Go to `Debug > Debugger` in the IDLE menu bar.
1916 |
1917 | **2**\. Ensure that the `Stack`, `Source`, `Locals`, and `Globals` checkboxes are all checked (to show the most information).
1918 |
1919 | **3**\. Run your program with the debugger enabled. The execution should pause on the first line.
1920 |
1921 | - Use the following controls to navigate through your code with the dugger:
1922 |
1923 | - `Over` ("Step Over") executes the line of code that appears highlighted in the shell, and then proceeds to execute the next line (basically allows you to execute a single line of code at a time).
1924 |
1925 | - **NOTE:** During this process, any variables that are set or modified will be displayed in the `Locals` and `Globals` boxes.
1926 |
1927 | - `Go` runs the program normally and disables the debugger until reaching (1) the end of the program or (2) a **breakpoint**.
1928 |
1929 | - To set a breakpoint in IDLE's file editor, right click the line at which you want to set a breakpoint and click `Set Breakpoint`.
1930 |
1931 | - `Step` ("Step Into") moves the debugger inside of a function call (if a function is about to be executed).
1932 |
1933 | - `Out` ("Step Out") will keep executing lines within the current function until the function returns.
1934 |
1935 | [Back to TOC](#id-toc)
1936 |
1937 |
1938 |
1939 | ## Section 13: Web Scraping
1940 |
1941 | ### 13.38 - The webbrowser Module
1942 |
1943 | - The `webbrowser` module's `open()` function launches a new browser to a specified URL:
1944 |
1945 | ```python
1946 | import webbrowser
1947 |
1948 | webbrowser.open('https://automatetheboringstuff.com')
1949 | ```
1950 |
1951 | ### 13.39 - Downloading from the Web with the requests Module
1952 |
1953 | - The [requests](https://requests.readthedocs.io/en/master/) module is a third-party module that allows you to send HTTP/1.1 requests.
1954 |
1955 | ```python
1956 | import requests
1957 |
1958 | # "get()" returns a response object received from the server:
1959 |
1960 | res = requests.get('http://nunit.org/nuget/nunit3-license.txt')
1961 |
1962 | res.status_code # (Displays the response status code, e.g., 200)
1963 |
1964 | res.text # (Displays the body of the text content)
1965 |
1966 | # "raise_for_status()" will raise an exception if a download error occurred:
1967 |
1968 | res.raise_for_status()
1969 |
1970 | # To save the file to your storage device, use then "open()" function in
1971 | # Write-Binary mode by passing "wb" as the second argument. (NOTE: Even if the
1972 | # downloaded page is in plaintext, you must still write binary data--rather
1973 | # than plaintext data--in order to maintain the Unicode encoding of the text):
1974 |
1975 | licenseFile = open('license.txt', 'wb')
1976 |
1977 | # Write the file by using a for loop with the "iter_content()" method. Files
1978 | # are written in "chunks" (of the "bytes" data type), and you can specify the
1979 | # size of each chunk via the "chunk_size" keyword argument (first parameter).
1980 | # (NOTE: Per the "requests" documentation, 128 is the recommended size when
1981 | # streaming a download; however, this value may be modified as necessary):
1982 |
1983 | for chunk in res.iter_content(128):
1984 | licenseFile.write(chunk) # (Will return an integer of bytes written)
1985 |
1986 | licenseFile.close()
1987 | ```
1988 |
1989 | - **NOTE:** See [here](https://nedbatchelder.com/text/unipain.html) for more information on Python and Unicode.
1990 |
1991 | ### 13.40 - Parsing HTML with the Beautiful Soup Module
1992 |
1993 | - To locate specific HTML elements within an HTML file, you can parse the HTML by using the Beautiful Soup ([beautifulsoup4](https://pypi.org/project/beautifulsoup4/)) third-party module:
1994 |
1995 | ```python
1996 | import bs4
1997 | import requests
1998 |
1999 | # Request an HTML page:
2000 |
2001 | res = requests.get('https://www.amazon.com/dp/1593275994/')
2002 |
2003 | # "BeautifulSoup()" will return a "beautifulsoup" object. The first argument
2004 | # is the content to be parsed, and the second argument is the type of parser
2005 | # you want to use (in this case, HTML):
2006 |
2007 | soup = bs4.BeautifulSoup(res.text, 'html.parser')
2008 |
2009 | # "select()" takes in a string containing the CSS selector you are seeking,
2010 | # and it will return a list of all matching elements. In this case, there
2011 | # will be only one matching element, so it will return a list containing a
2012 | # single tag for the "header-price" from the requested Amazon page:
2013 |
2014 | elements = soup.select(
2015 | """#newOfferAccordionRow > div > div.a-accordion-row-a11y > a > h5 >
2016 | div > div.a-column.a-span4.a-text-right.a-span-last >
2017 | span.a-size-medium.a-color-price.header-price"""
2018 | )
2019 |
2020 | # Access a matching element's internal text content (e.g., just the contents
2021 | # of a , not the opening/closing tags) via the "text" variable:
2022 |
2023 | elements[0].text # (Includes the price and newline/whitespace characters)
2024 |
2025 | elements[0].text.strip() # (Includes only the price)
2026 | ```
2027 |
2028 | ### 13.41 - Controlling the Browser with the Selenium Module
2029 |
2030 | - If you need to parse information from a website that requires you to log in or requires some user interaction with JavaScript, then using Beautiful Soup alone will not be sufficient (as you will have to do more than just download an HTML page). To solve such problems, the [Selenium](https://www.seleniumhq.org/) third-party module can be used to launch a browser that can be programmatically controlled by Python:
2031 |
2032 | ```python
2033 | # Unique way to import Selenium:
2034 |
2035 | from selenium import webdriver
2036 |
2037 | # Set the path of your Chrome driver (http://chromedriver.chromium.org/):
2038 |
2039 | chromeDriverPath = '/Users/bronson/Selenium Drivers/chromedriver'
2040 |
2041 | # Open a new Chrome browser that will be controlled by the automated process:
2042 |
2043 | browser = webdriver.Chrome(chromeDriverPath)
2044 |
2045 | # Direct the automated browser to fetch the requested URL:
2046 |
2047 | browser.get('https://automatetheboringstuff.com')
2048 |
2049 | # Target a SINGLE element containing a hyperlink to be clicked:
2050 |
2051 | element = browser.find_element_by_css_selector(
2052 | """body > div.main > div:nth-child(1) >
2053 | ul:nth-child(18) > li:nth-child(1) > a"""
2054 | )
2055 |
2056 | # "click()" method automates the process of a clicking a hyperlink:
2057 |
2058 | element.click()
2059 | ```
2060 |
2061 | - **NOTE:** Use `find_elements_by_css_selector()` (plural) to fetch a list of **all** matching elements. Other elements that can be targeted with the `find_element_by_` syntax include: `class_name`, `id`, `link_text` (complete match), `partial_link_text` (partial match), `name`, and `tag_name`.
2062 |
2063 | - **ALSO:** Other browser **nagivation** methods include: `back()`, `forward()`, `refresh()`, and `quit()`.
2064 |
2065 | - Use the `send_keys()` and `submit()` methods to enter text and **submit input**, and use an element's `text` variable to **read** the content of an HTML element:
2066 |
2067 | ```python
2068 | browser.get('https://www.google.com/')
2069 |
2070 | # Target Google's search bar:
2071 |
2072 | searchInput = browser.find_element_by_css_selector(
2073 | '#tsf > div:nth-child(2) > div > div.RNNXgb > div > div.a4bIc > input'
2074 | )
2075 |
2076 | # Enter the given string argument into the search input:
2077 |
2078 | searchInput.send_keys('python')
2079 |
2080 | # Fire the submit action associated with the search input:
2081 |
2082 | searchInput.submit()
2083 |
2084 | # Target a specific element on the web page (i.e., the first result):
2085 |
2086 | snippet = browser.find_element_by_css_selector(
2087 | '#rso > div:nth-child(1) > div > div > div > div > div.s > div > span'
2088 | )
2089 |
2090 | # Display the targeted element's inner text content:
2091 |
2092 | snippet.text # 'The official home of the Python Programming Language.'
2093 | ```
2094 |
2095 | [Back to TOC](#id-toc)
2096 |
2097 |
2098 |
2099 | ## Section 14: Excel, Word, and PDF Documents
2100 |
2101 | ### 14.42 - Reading Excel Spreadsheets
2102 |
2103 | - Python can read and write Excel files via the [openpyxl](https://openpyxl.readthedocs.io/en/stable/) third-party module:
2104 |
2105 | ```python
2106 | # NOTE: Ensure that the CWD is the directory containing your Excel file.
2107 |
2108 | import openpyxl
2109 |
2110 | # Open the Excel file (stored as a 'Workbook' object):
2111 |
2112 | workbook = openpyxl.load_workbook('example.xlsx') # (Located in: ./14-42)
2113 |
2114 | # List the names of all sheets in the workbook:
2115 |
2116 | workbook.sheetnames # ['Sheet1', 'Sheet2', 'Sheet3']
2117 |
2118 | # Access a specific sheet in the workbook (stored as a 'Worksheet' object):
2119 |
2120 | sheet = workbook['Sheet1']
2121 |
2122 | # Access the value of a specific cell within a sheet by row/column NAME:
2123 |
2124 | sheet['A1'].value # datetime.datetime(2015, 4, 5, 13, 34, 2)
2125 | str(sheet['A1'].value) # '2015-04-05 13:34:02'
2126 |
2127 | # Access a cell by row/column NUMBER (useful when iterating with a loop):
2128 |
2129 | sheet.cell(row=1, column=2).value # 'Apples'
2130 | ```
2131 |
2132 | ### 14.43 - Editing Excel Spreadsheets
2133 |
2134 | - Example:
2135 |
2136 | ```python
2137 | import openpyxl
2138 |
2139 | # Create a new 'Workbook' object:
2140 |
2141 | workbook = openpyxl.Workbook()
2142 |
2143 | # Access the workbook's 'Sheet' object
2144 |
2145 | workbook.sheetnames # ['Sheet']
2146 | sheet = workbook['Sheet']
2147 |
2148 | # Assign values to sheet cells:
2149 |
2150 | sheet['A1'] = 42
2151 | sheet['A2'] = 'Hello'
2152 |
2153 | # Add a new worksheet to the workbook:
2154 |
2155 | newSheet = workbook.create_sheet()
2156 | workbook.sheetnames # ['Sheet', 'Sheet1']
2157 |
2158 | # Change the worksheet's title:
2159 |
2160 | newSheet.title = 'My New Sheet'
2161 | workbook.sheetnames # ['Sheet', 'Sheet1']
2162 |
2163 | # Specify a new worksheet's order and title upon creation:
2164 |
2165 | otherSheet = workbook.create_sheet(index=0, title='My Other Sheet')
2166 | workbook.sheetnames # ['My Other Sheet', 'Sheet', 'My New Sheet']
2167 |
2168 | # Save the workbook to your storage device:
2169 |
2170 | workbook.save('test.xlsx')
2171 | ```
2172 |
2173 | ### 14.44 - Reading and Editing PDFs
2174 |
2175 | - The [PyPDF2](https://pythonhosted.org/PyPDF2/) third-party module can extract data from PDF files, or manipulate existing PDFs to produce a new file. Note, however, that there may be some PDF files that PyPDF2 will be unable to process. PyPDF2 cannot extract images, charts, or other media, but it can extract text and return it as a string:
2176 |
2177 | ```python
2178 | import PyPDF2
2179 | import os
2180 |
2181 | os.chdir('/Users/bronson/Udemy/automate-the-boring-stuff-with-python/14-44')
2182 |
2183 | # Open in Read-Binary ('rb') mode because PDFs are binary files:
2184 |
2185 | pdfFile = open('meetingminutes1.pdf', 'rb')
2186 |
2187 | # Pass the 'File' object to PyPDF2's "PdfFileReader()",
2188 | # which will return a 'PDF Reader' object:
2189 |
2190 | reader = PyPDF2.PdfFileReader(pdfFile)
2191 |
2192 | # View the number of pages within the PDF file:
2193 |
2194 | reader.numPages # 19
2195 |
2196 | # "getPage()" returns a 'Page' object (numbering starts at 0):
2197 |
2198 | page = reader.getPage(0)
2199 |
2200 | # "extractText()" returns a string of all text extracted from the page:
2201 |
2202 | page.extractText()
2203 |
2204 | # Print out the text of each page in the PDF file:
2205 |
2206 | for pageNum in range(reader.numPages):
2207 | print(reader.getPage(pageNum).extractText())
2208 | ```
2209 |
2210 | - PyPDF2 cannot edit the text of a PDF file, but it can modify a PDF on the **page level** (i.e., you can add, remove, and reorder pages, but you cannot change a specific line of text on a particular page):
2211 |
2212 | ```python
2213 | # Open two PDF files to be combined into a single file:
2214 |
2215 | pdf1File = open('meetingminutes1.pdf', 'rb')
2216 | pdf2File = open('meetingminutes2.pdf', 'rb')
2217 |
2218 | reader1 = PyPDF2.PdfFileReader(pdf1File)
2219 | reader2 = PyPDF2.PdfFileReader(pdf2File)
2220 |
2221 | # Create a new 'Writer' object that will create a new PDF file:
2222 |
2223 | writer = PyPDF2.PdfFileWriter()
2224 |
2225 | # "addPage()" allows you to append pages to a 'Writer' object:
2226 |
2227 | for pageNum in range(reader1.numPages):
2228 | page = reader1.getPage(pageNum)
2229 | writer.addPage(page)
2230 |
2231 | for pageNum in range(reader2.numPages):
2232 | page = reader2.getPage(pageNum)
2233 | writer.addPage(page)
2234 |
2235 | # Open a new 'File' object in Write-Binary mode (will become the new PDF):
2236 |
2237 | outputFile = open('combinedminutes.pdf', 'wb')
2238 |
2239 | # Save the PDF with the 'Writer' object's "write()" method:
2240 |
2241 | writer.write(outputFile)
2242 |
2243 | # Close all files:
2244 |
2245 | outputFile.close()
2246 | pdf1File.close()
2247 | pdf2File.close()
2248 | ```
2249 |
2250 | ### 14.45 - Reading and Editing Word Documents
2251 |
2252 | - Use the [python-docx](https://python-docx.readthedocs.io/en/latest/) third-party module to create and modify Word documents. `python-docx` divides a Word document into three different data structures: a 'Document' object, which contains a list of 'Paragraph' objects, which each contain a list of one or more 'Run' objects (a new run occurs in a paragraph whenever there is a change to the style, e.g., bold, italics, etc.):
2253 |
2254 | ```python
2255 | # Import with 'docx' despite the fact that the module is named 'python-docx':
2256 |
2257 | import docx
2258 |
2259 | filePath = '/Users/bronson/Udemy/automate-the-boring-stuff-with-python/14-45/'
2260 |
2261 | # Create a 'Document' object from the Word document file:
2262 |
2263 | documentObject = docx.Document(filePath + 'demo.docx')
2264 |
2265 | # View the text of a 'Paragraph' object:
2266 |
2267 | documentObject.paragraphs # (Returns a list of all 'Paragraph' objects)
2268 |
2269 | paragraph = documentObject.paragraphs[1]
2270 |
2271 | paragraph.text # 'A plain paragraph having some bold and some italic.'
2272 |
2273 | # Modify a paragraph's "style", as defined within Word:
2274 |
2275 | paragraph.style # 'Normal'
2276 |
2277 | paragraph.style = 'Title'
2278 |
2279 | # View the text of a 'Run' object (split up based on changes to text style):
2280 |
2281 | paragraph.runs # (Returns a list of all 'Run' objects)
2282 |
2283 | run = paragraph.runs[1]
2284 |
2285 | run.text # 'bold'
2286 |
2287 | # Check if a 'Run' is bold, italic, or underline (returns a Boolean):
2288 |
2289 | run.bold # True
2290 |
2291 | run.italic # False
2292 |
2293 | run.underline # False
2294 |
2295 | # Modify a 'Run' object's bold, italic, or underline status:
2296 |
2297 | run.underline = True
2298 |
2299 | # Modify a 'Run' object's text:
2300 |
2301 | run.text = 'bold and underline'
2302 |
2303 | # Add a new paragraph to the end of the document:
2304 |
2305 | newParagraph = documentObject.add_paragraph('New paragraph. ')
2306 |
2307 | # Add additional text content to the new paragraph via "add_run()":
2308 |
2309 | newParagraph.add_run('New run.')
2310 |
2311 | # Save the Word document:
2312 |
2313 | documentObject.save('demo2.docx')
2314 | ```
2315 |
2316 | - **NOTE:** The `add_paragraph()` and `add_run()` methods can only add content to the end of a file. If you want to insert additional content in the middle of a file, then you will have to create a new 'Document' object that will have its contents be copied from the source document, and you can add new content in the midst of this copying process.
2317 |
2318 | [Back to TOC](#id-toc)
2319 |
2320 |
2321 |
2322 | ## Section 15: Email
2323 |
2324 | ### 15.46 - Sending Emails
2325 |
2326 | - Simple Mail Transfer Protocol (SMTP) is an Internet standard for email transmission. Python implements SMTP via its built-in `smtplib` module:
2327 |
2328 | ```python
2329 | import smtplib
2330 |
2331 | # Create a "Connection" object that will be used to connect to the specified
2332 | # SMTP server (i.e., the domain name of your email server). The port number
2333 | # for an SMTP server is 587 (via TLS) or 465 (via SSL):
2334 |
2335 | conn = smtplib.SMTP('smtp.gmail.com', 587)
2336 |
2337 | # Establish the connection with the SMTP server (allowing Internet traffic
2338 | # from your Python program). If the connection is successful, you should
2339 | # receive a 2XX HTTP response code:
2340 |
2341 | conn.ehlo()
2342 |
2343 | # Start TLS encryption to encrypt your email login password:
2344 |
2345 | conn.starttls()
2346 |
2347 | # Log in to your account (first argument is username; second is password).
2348 | # For Gmail, you must generate an "App password":
2349 |
2350 | conn.login('sender@gmail.com', 'yourAppPassword')
2351 |
2352 | # Send email. The first argument is the "From" address, and the second is
2353 | # the "To" address. The third argument is the email content, including
2354 | # header information and the body of the email's message. You must include
2355 | # two newline characters to separate the header and body. "sendmail()" will
2356 | # return a dictionary object containing any emails that it FAILED to send:
2357 |
2358 | conn.sendmail(
2359 | 'sender@gmail.com',
2360 | 'recipient@example.com',
2361 | 'Subject: Straw Dogs\n\nToday the good life means making full use of science and technology...it means seeking peace...it means cherishing freedom.'
2362 | )
2363 |
2364 | # Close the SMTP connection:
2365 |
2366 | conn.quit()
2367 | ```
2368 |
2369 | ### 15.47 - Checking Your Email Inbox
2370 |
2371 | - The Internet Message Access Protocol (IMAP) is an Internet standard protocol used by email clients to retrieve email messages from a mail server over TCP/IP. Python implements IMAP via its built-in `imaplib` module. However, [imapclient](https://imapclient.readthedocs.io/en/2.1.0/) and [pyzmail](http://www.magiksys.net/pyzmail/) are two third-party modules that may make using IMAP more user-friendly:
2372 |
2373 | ```python
2374 | import imapclient
2375 | import pyzmail
2376 |
2377 | # Create a "Connection" object to be used with the specified host:
2378 |
2379 | conn = imapclient.IMAPClient('imap.gmail.com', port=993, ssl=True)
2380 |
2381 | # Log in:
2382 |
2383 | conn.login('doe@gmail.com', 'yourAppPassword')
2384 |
2385 | # View all email folders:
2386 |
2387 | conn.list_folders()
2388 |
2389 | # Select an email folder (e.g., inbox) as the first argument. The second
2390 | # argument can be used to toggle "Read Only" mode (if you want to prevent
2391 | # emails from being deleted):
2392 |
2393 | conn.select_folder('INBOX', readonly=True)
2394 |
2395 | # Find an email via the "search()" method. The first argument is a list
2396 | # containing strings formatted according to the imapclient syntax. The
2397 | # method will return a string of unique IDs referencing a particular email:
2398 |
2399 | UIDs = conn.search(['SINCE 20-Aug-2018'])
2400 |
2401 | # Translate a UID into an actual email via the "fetch()" method. The first
2402 | # argument is a list containing the desired UID, and the second argument
2403 | # specifies which parts of an email to retrieve:
2404 |
2405 | rawMessage = conn.fetch([29068], ['BODY[]', 'FLAGS'])
2406 |
2407 | # Parse the body of the raw email message and store it as a "Message" object:
2408 |
2409 | message = pyzmail.PyzMessage.factory(rawMessage[29068][b'BODY[]'])
2410 |
2411 | # View subject line:
2412 |
2413 | message.get_subject()
2414 |
2415 | # View sender/recipient:
2416 |
2417 | message.get_addresses('from')
2418 | message.get_addresses('to')
2419 | message.get_addresses('bcc')
2420 |
2421 | # The body of a message can be plaintext, HTML, or a combination of the two.
2422 | # The following can be used to view the length of plaintext and HTML portions.
2423 | # If the specified content does not exist, then "None" will be the value:
2424 |
2425 | message.text_part
2426 | message.html_part
2427 |
2428 | # Retrieve and decode the text content of the email message (usually UTF-8):
2429 |
2430 | message.text_part.get_payload().decode('UTF-8')
2431 |
2432 | # If you have "Read Only" mode disabled, you can delete messages via the
2433 | # "delete_messages()" method that accepts a list of all UIDs to be deleted.
2434 | # (NOTE: This is a PERMANENT deletion. The email is NOT moved to "Trash"):
2435 |
2436 | conn.delete_messages([29068])
2437 |
2438 | # Log out:
2439 |
2440 | conn.logout()
2441 | ```
2442 |
2443 | - **IMPORTANT:** If you are receiving an SSLCertVerificationError while using `imapclient`, you may need to [downgrade to version 0.13](https://stackoverflow.com/questions/34714342/imapclient-error-on-windows). If you are unable to install `pyzmail`, you may need to install [pyzmail36](https://stackoverflow.com/questions/40924672/pip-install-pyzmail-error-message) instead.
2444 |
2445 | [Back to TOC](#id-toc)
2446 |
2447 |
2448 |
2449 | ## Section 16: GUI Automation
2450 |
2451 | ### 16.48 - Controlling the Mouse from Python
2452 |
2453 | - [PyAutoGUI](https://pyautogui.readthedocs.io/en/latest/) is a third-party Python module for programmatically controlling the mouse and keyboard:
2454 |
2455 | ```python
2456 | import pyautogui
2457 |
2458 | # Obtain the resolution of your screen (width, height):
2459 |
2460 | width, height = pyautogui.size()
2461 |
2462 | # Obtain the current coordinates of the mouse cursor (width, height).
2463 | # The "width" value indicates the number of pixels from the LEFT of the
2464 | # screen, and the "height" value indicates the number from the TOP.
2465 | # (NOTE: Because the starting position is (0, 0), that means the max position
2466 | # will be one pixel less than the max screen width/height):
2467 |
2468 | pyautogui.position()
2469 |
2470 | # Move the mouse cursor to an ABSOLUTE position by specifying the width
2471 | # coordinate (first argument), the height coordinate (second argument),
2472 | # and the duration in seconds for the movement to occur (third argument):
2473 |
2474 | pyautogui.moveTo(840, 525, duration=0.5)
2475 |
2476 | # Move to the mouse cursor to a RELATIVE position (in relation to the current
2477 | # position of the mouse) via the "moveRel()" method following the same
2478 | # procedure explained above:
2479 |
2480 | pyautogui.moveRel(-10, 0, duration=0.25)
2481 |
2482 | # Left click on an element at the specified position. If no coordinates are
2483 | # given, then the mouse will simply be clicked at its current position:
2484 |
2485 | pyautogui.click(450, 10)
2486 | ```
2487 |
2488 | - **NOTE:** The "click" functionality also includes the following methods: `doubleClick()`, `rightClick()`, and `middleClick()`. Additionally, you can perform **click-and-drag** operations in the same manner as `moveTo()` and `moveRel()` but with the left mouse button treated as being held down by using `dragTo()` and `dragRel()`
2489 |
2490 | - **TIP:** If your program ever results in the loss of control over your mouse cursor, force the cursor to the top left corner of the screen (0, 0) to kill the process by triggering PyAutoGUI's **failsafe exception**.
2491 |
2492 | - Run the following code from the terminal (not IDLE) to see your current mouse cursor position in real-time. This is useful for planning out all of the locations that you want your program to click:
2493 |
2494 | ```python
2495 | import pyautogui, sys
2496 |
2497 | print('Press Ctrl-C to quit.')
2498 |
2499 | try:
2500 | while True:
2501 | x, y = pyautogui.position()
2502 | positionStr = 'X: ' + str(x).rjust(4) + ' Y: ' + str(y).rjust(4)
2503 | print(positionStr, end='')
2504 | print('\b' * len(positionStr), end='', flush=True)
2505 | except KeyboardInterrupt:
2506 | print('\n')
2507 | ```
2508 |
2509 | - Alternatively, PyAutoGUI has a method called `displayMousePosition()` that operates in a similar manner.
2510 |
2511 | ### 16.49 - Controlling the Keyboard from Python
2512 |
2513 | - Example:
2514 |
2515 | ```python
2516 | # "typewrite()" sends virtual keypresses to the computer. It can be used
2517 | # in conjunction with "click()" to first click on a text input field. You
2518 | # can specify an "interval" to add a delay (in seconds) between each keypress:
2519 |
2520 | pyautogui.click(1200, 400) # ( Also accepts tuple: click((1200, 400)) )
2521 |
2522 | pyautogui.typewrite('Hello, world!', interval=0.2)
2523 |
2524 | # To use non-character keys (e.g., left arrow), you must specify the input
2525 | # as strings in a list:
2526 |
2527 | pyautogui.typewrite(['a', 'b', 'left', 'left', 'X', 'Y']) # 'XYab'
2528 |
2529 | # Press a single key:
2530 |
2531 | pyautogui.press('F1')
2532 |
2533 | # Trigger a keyboard shortcut:
2534 |
2535 | pyautogui.hotkey('command', 'f') # (Opens search dialog)
2536 | ```
2537 |
2538 | - **NOTE:** You can view a list of all possible keys that can be accessed by `typewrite()` by accessing `pyautogui.KEYBOARD_KEYS`
2539 |
2540 | ### 16.50 - Screenshots and Image Recognition
2541 |
2542 | - With PyAutoGUI, you can save a screenshot to an absolute or relative path:
2543 |
2544 | ```python
2545 | pyautogui.screenshot('example.png')
2546 | ```
2547 |
2548 | - If you have a cropped image of an element that is presently displayed on your screen, you can locate the coordinates of the element by using `locateOnScreen()` or `locateCenterOnScreen()`, which is useful for targeting a specific element for to be clicked:
2549 |
2550 | ```python
2551 | # Displays the coordinates of the element's top left corner, along with
2552 | # width and height of the found element:
2553 |
2554 | pyautogui.locateOnScreen('crop.png') # (1690, 516, 64, 64)
2555 |
2556 | # Displays the coordinates of the element's center point on screen:
2557 |
2558 | pyautogui.locateCenterOnScreen('crop.png') # (1722, 548)
2559 | ```
2560 |
2561 | - **NOTE:** These image recognition methods are computationally expensive and take time to complete (and therefore will not work on moving content). Additionally, the element on screen must be a **pixel perfect** match of the reference image.
2562 |
2563 | [Back to TOC](#id-toc)
2564 |
--------------------------------------------------------------------------------