├── 05-12 └── guess-the-number.py ├── 07-17 └── character-count.py ├── 10-29 └── phone-and-email.py ├── 11-30 └── file-sizes.py ├── 12-35 └── box-print.py ├── 13-38 └── map-it.py ├── 13-40 └── get-amazon-price.py ├── 14-42 └── example.xlsx ├── 14-44 ├── combinedminutes.pdf ├── meetingminutes1.pdf └── meetingminutes2.pdf ├── 14-45 ├── demo.docx ├── demo2.docx └── get-text.py └── README.md /05-12/guess-the-number.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | print('Hello, what is your name?') 4 | 5 | name = str(input()) 6 | answer = random.randint(1, 20) 7 | 8 | print('Well, ' + name + ', I am thinking of a number from 1 to 20.') 9 | 10 | guessesTaken = 0 11 | guessedCorrectAnswer = False 12 | 13 | while guessesTaken < 6: 14 | print('Take a guess.') 15 | 16 | guess = input() 17 | guessesTaken = guessesTaken + 1 18 | 19 | try: 20 | if int(guess) == answer: 21 | guessedCorrectAnswer = True 22 | print('Good job, ' + name + 23 | '! You guessed my number in ' + str(guessesTaken) + 24 | (' guess!' if guessesTaken == 1 else ' guesses!')) 25 | break 26 | elif int(guess) < answer: 27 | print('Your guess is too low.') 28 | else: 29 | print('Your guess is too high.') 30 | except ValueError: 31 | guessesTaken = guessesTaken - 1 32 | print('You must enter a number.') 33 | 34 | if not guessedCorrectAnswer: 35 | print('Nope. The number I was thinking of was ' + str(answer) + '.') 36 | -------------------------------------------------------------------------------- /07-17/character-count.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | 3 | message = 'All cows eat grass' 4 | 5 | count = {} 6 | 7 | for character in message: 8 | count.setdefault(character.lower(), 0) 9 | count[character.lower()] += 1 10 | 11 | pprint.pprint(count) -------------------------------------------------------------------------------- /10-29/phone-and-email.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import re 4 | import pyperclip 5 | 6 | # Create regex for phone numbers: 7 | 8 | phoneRegex = re.compile(r''' 9 | # Valid phone number formats: 10 | # 415-555-0000, 555-0000, (415) 555-0000, 555-0000 ext 12345, ext. 12345, x12345 11 | 12 | ( 13 | ((\d\d\d) | (\(\d\d\d\)))? # Area code (optional) 14 | (\s|-) # First separator 15 | \d\d\d # First 3 digits 16 | - # Separator 17 | \d\d\d\d # Last 4 digits 18 | ((ext(\.)?\s|x) # Extension word (optional) 19 | (\d{2,5}))? # Extension number (optional) 20 | ) 21 | ''', re.VERBOSE) 22 | 23 | # Create regex for email addresses: 24 | 25 | emailRegex = re.compile(r''' 26 | [a-zA-Z0-9_.+]+ # Name part 27 | @ # @ symbol 28 | [a-zA-Z0-9_.+]+ # Domain name part 29 | ''', re.VERBOSE) 30 | 31 | # Get text from clipboard: 32 | 33 | text = pyperclip.paste() 34 | 35 | # Extract phone numbers and email addresses from text: 36 | 37 | extractedPhone = phoneRegex.findall(text) 38 | extractedEmail = emailRegex.findall(text) 39 | 40 | allPhoneNumbers = [] 41 | for phoneNumber in extractedPhone: 42 | allPhoneNumbers.append(phoneNumber[0]) 43 | 44 | # Copy extracted phone numbers and email addresses to clipboard: 45 | 46 | results = '\n'.join(allPhoneNumbers) + '\n' + '\n'.join(extractedEmail) 47 | pyperclip.copy(results) 48 | -------------------------------------------------------------------------------- /11-30/file-sizes.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | 5 | pathname = '/Users/bronson/Downloads' 6 | totalSize = 0 7 | 8 | for filename in os.listdir(pathname): 9 | # Skip to next filename if the current item is not a file: 10 | if not os.path.isfile(os.path.join(pathname, filename)): 11 | continue 12 | # Add to totalSize if the the current item is a file: 13 | totalSize += os.path.getsize(os.path.join(pathname, filename)) 14 | 15 | print(totalSize) 16 | -------------------------------------------------------------------------------- /12-35/box-print.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | """ 4 | 5 | *************** 6 | * * 7 | * * 8 | * * 9 | *************** 10 | 11 | """ 12 | 13 | import sys 14 | 15 | # Stop program if user enters fewer than 3 arguments: 16 | if len(sys.argv) < 4: 17 | raise Exception('Use the following syntax: ./box-print.py "*" 15 5') 18 | 19 | 20 | def boxPrint(symbol, width, height): 21 | if len(symbol) != 1: 22 | raise Exception('"symbol" must be a string of length 1.') 23 | if (int(width) < 2) or (int(height) < 2): 24 | raise Exception('"width" and "height" must be 2 or greater."') 25 | 26 | print(symbol * int(width)) 27 | 28 | for i in range(int(height) - 2): 29 | print(symbol + (' ' * (int(width) - 2)) + symbol) 30 | 31 | print(symbol * int(width)) 32 | 33 | 34 | boxPrint(sys.argv[1], sys.argv[2], sys.argv[3]) 35 | -------------------------------------------------------------------------------- /13-38/map-it.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import webbrowser 4 | import sys 5 | import pyperclip 6 | 7 | # Check if command line arguments were passed: 8 | if len(sys.argv) > 1: 9 | # Concatenate arguments to form a valid street address, e.g., 10 | # ['mapit.py', '123', 'Main', 'St.'] -> '123 Main St.' 11 | address = ' '.join(sys.argv[1:]) 12 | else: 13 | # Get address from the user's clipboard if no arguments provided: 14 | address = pyperclip.paste() 15 | 16 | # https://www.google.com/maps/place/
17 | webbrowser.open('https://www.google.com/maps/place/' + address) 18 | -------------------------------------------------------------------------------- /13-40/get-amazon-price.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import sys 4 | import pyperclip 5 | import bs4 6 | import requests 7 | 8 | 9 | def getAmazonPrice(productUrl): 10 | res = requests.get(productUrl) 11 | res.raise_for_status() 12 | 13 | soup = bs4.BeautifulSoup(res.text, 'html.parser') 14 | # Only works on products with a '#newOfferAccordionRow' element: 15 | elements = soup.select( 16 | """#newOfferAccordionRow > div > div.a-accordion-row-a11y > a > h5 > 17 | div > div.a-column.a-span4.a-text-right.a-span-last > 18 | span.a-size-medium.a-color-price.header-price""" 19 | ) 20 | return elements[0].text.strip() 21 | 22 | 23 | if len(sys.argv) > 1: 24 | # Set product URL if included as an argument: 25 | productUrl = sys.argv[1] 26 | else: 27 | # Get product URL from the user's clipboard if no argument provided: 28 | productUrl = pyperclip.paste() 29 | 30 | 31 | price = getAmazonPrice(productUrl) 32 | 33 | print('The price is ' + price) 34 | -------------------------------------------------------------------------------- /14-42/example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-42/example.xlsx -------------------------------------------------------------------------------- /14-44/combinedminutes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-44/combinedminutes.pdf -------------------------------------------------------------------------------- /14-44/meetingminutes1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-44/meetingminutes1.pdf -------------------------------------------------------------------------------- /14-44/meetingminutes2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-44/meetingminutes2.pdf -------------------------------------------------------------------------------- /14-45/demo.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-45/demo.docx -------------------------------------------------------------------------------- /14-45/demo2.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bronsonavila/automate-boring-stuff-python-notes/4de654f4da0db8a12a4abb3c7856b57b454b7b58/14-45/demo2.docx -------------------------------------------------------------------------------- /14-45/get-text.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import docx 4 | 5 | # Returns a single string value of all text in a Word document: 6 | def getText(filename): 7 | documentObject = docx.Document(filename) 8 | fullText = [] 9 | for paragraph in documentObject.paragraphs: 10 | fullText.append(paragraph.text) 11 | return '\n'.join(fullText) 12 | 13 | 14 | print(getText( 15 | '/Users/bronson/Udemy/automate-the-boring-stuff-with-python/14-45/demo.docx' 16 | )) 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Notes on "Automate the Boring Stuff with Python Programming" 2 | 3 | ## About 4 | 5 | This repository is derived from the lectures covered in [Automate the Boring Stuff with Python Programming](https://www.udemy.com/automate/) by Al Sweigart. This repository is intended to serve as a personal quick reference guide and not a full-fledged tutorial. For more in-depth coverage, please consult the cited Udemy course, or review the free e-book available [here](http://automatetheboringstuff.com/). 6 | 7 |
8 | 9 | ## Table of Contents 10 | 11 | - Section 1: [Python Basics](#id-section1) 12 | - Section 2: [Flow Control](#id-section2) 13 | - Section 3: [Functions](#id-section3) 14 | - Section 4: [Handling Errors with Try/Except](#id-section4) 15 | - Section 6: [Lists](#id-section6) 16 | - Section 7: [Dictionaries](#id-section7) 17 | - Section 8: [More About Strings](#id-section8) 18 | - Section 9: [Running Programs from the Command Line](#id-section9) 19 | - Section 10: [Regular Expressions](#id-section10) 20 | - Section 11: [Files](#id-section11) 21 | - Section 12: [Debugging](#id-section12) 22 | - Section 13: [Web Scraping](#id-section13) 23 | - Section 14: [Excel, Word, and PDF Documents](#id-section14) 24 | - Section 15: [Email](#id-section15) 25 | - Section 16: [GUI Automation](#id-section16) 26 | 27 |
28 | 29 | ## Section 1: Python Basics 30 | 31 | ### 1.2 - Basic Terminology and Using IDLE 32 | 33 | #### Expressions 34 | 35 | - Expressions consist of values and operators that reduce down to a single value (including combinations of numbers and strings): 36 | 37 | ```python 38 | 2 + 2 # 4 39 | 40 | 'Alice' + 'Bob' # 'AliceBob' 41 | 42 | 'Hello' + '!' * 10 # 'Hello!!!!!!!!!!' 43 | ``` 44 | 45 | #### Variables 46 | 47 | - Declaring a variable: 48 | 49 | ```python 50 | spam = 'Hello' 51 | 52 | spam + ' World' # Hello World 53 | ``` 54 | 55 | ### 1.3 - Writing Our First Program 56 | 57 | - Create a file named `file.py` containing the following code: 58 | 59 | ```python 60 | # This program says hello and asks for your name: 61 | 62 | print('What is your name?') 63 | myName = input() 64 | print('Nice to meet you, ' + myName) 65 | print('The length of your name is:') 66 | print(len(myName)) 67 | print('What is your age?') 68 | myAge = input() 69 | print('You will be ' + str(int(myAge) + 1) + ' in a year.') 70 | ``` 71 | 72 | - `print()` displays the contents (arguments) within its parentheses on the screen. 73 | 74 | - `input()` accepts the value of the user's keyboard input and returns a **string** value. 75 | 76 | - **NOTE:** The program will wait until the input is entered before continuing to execute the remaining code. 77 | 78 | - `len()` takes a string argument and evaluates to the integer value of the string's length: 79 | 80 | ```python 81 | len('Al') # 2 82 | ``` 83 | 84 | - `str()` takes an argument and converts it into a string data type: 85 | 86 | ```python 87 | str(42) # '42' 88 | ``` 89 | 90 | - `int()` takes an argument and converts it into an integer data type: 91 | 92 | ```python 93 | int('42') # 42 94 | ``` 95 | 96 | - **NOTE:** If you want to convert to a **floating point** number (i.e., a number with a decimal point) rather than an integer (i.e., a whole number), use `float()`: 97 | 98 | ```python 99 | float('3.14') # 3.14 100 | ``` 101 | 102 | - **NOTE:** On OS X, you may need to run `python3` rather than `python` to run the current version of Python. 103 | 104 | [Back to TOC](#id-toc) 105 | 106 |
107 | 108 | ## Section 2: Flow Control 109 | 110 | ### 2.4 - Flow Charts and Basic Flow Control Concepts 111 | 112 | #### Booleans 113 | 114 | - Booleans have two values: `True` and `False` (which **must** be capitalized). 115 | 116 | #### Comparison Operators 117 | 118 | - Overview: 119 | 120 | | Operator | Meaning | 121 | | :------: | ------------------------ | 122 | | == | Equal to | 123 | | != | Not equal to | 124 | | < | Less than | 125 | | > | Greater than | 126 | | <= | Less than or equal to | 127 | | >= | Greater than or equal to | 128 | 129 | - Expressions with comparison operators evaluate to a Boolean value: 130 | 131 | ```python 132 | 42 == 42 # True 133 | 134 | 42 >= 100 # False 135 | 136 | # Integers and strings will never be equal to each other: 137 | 138 | 42 == '42' # False 139 | 140 | # However, floats and integers can be equal to each other: 141 | 142 | 42.0 == 42 # True 143 | ``` 144 | 145 | #### Boolean Operators 146 | 147 | - Overview: 148 | 149 | ```python 150 | # The "and" operator returns true when all values are true: 151 | 152 | True and True # True 153 | 154 | True and False # False 155 | 156 | # The "or" operator returns true when at least one value is true: 157 | 158 | True or False # True 159 | 160 | # The "not" operator evaluates to the opposite Boolean value: 161 | 162 | not True # False 163 | ``` 164 | 165 | - Example: 166 | 167 | ```python 168 | myAge = 26 169 | 170 | myPet = 'cat' 171 | 172 | myAge > 20 and myPet == 'cat' # True 173 | ``` 174 | 175 | ### 2.5 - If, Else, and Elif Statements 176 | 177 | - Example: 178 | 179 | ```python 180 | # If the condition after the "if" statement is true, then the indented line 181 | # below the conditional statement will run, and the "else" block is skipped: 182 | 183 | if answer < 42: 184 | print('Too low') 185 | 186 | # If the preceding "if" statement (or "elif" statement) is false, then the 187 | # subsequent "elif" statement will be evaluated: 188 | 189 | elif answer > 42: 190 | print('Too high') 191 | 192 | # If all prior conditional statements are false, the "else" block will run: 193 | 194 | else: 195 | print('Correct') 196 | ``` 197 | 198 | - **NOTE:** New "blocks" are designated by increasing indentation and begin only after statements that end with a colon (`:`). 199 | 200 | - Python allows for "truthy" and "falsey" evaluations: 201 | 202 | ```python 203 | print('Enter a name.') 204 | 205 | name = input() 206 | 207 | if name: 208 | print('Thank you for entering a name.') 209 | else: 210 | print('You did not enter a name.') 211 | ``` 212 | 213 | - **TIP:** If you want to evaluate the truthiness of a value, execute the `bool()` function with the value passed in as an argument: 214 | 215 | ```python 216 | bool(42) # True 217 | 218 | bool(0) # False 219 | 220 | bool('Hello') # True 221 | 222 | bool('') # False 223 | ``` 224 | 225 | ### 2.6 - While Loops 226 | 227 | - Examples: 228 | 229 | ```python 230 | # Prints "Hello, world." to the console five times: 231 | 232 | spam = 0 233 | 234 | while spam < 5: 235 | print('Hello, world.') 236 | spam = spam + 1 237 | 238 | # Requests input until user enters required string: 239 | 240 | name = '' 241 | 242 | while name != 'your name': 243 | print('Please type your name.') 244 | name = input() 245 | 246 | print('Thank you.') 247 | ``` 248 | 249 | - The `break` statement is used to break out of a loop (including an infinite loop): 250 | 251 | ```python 252 | name = '' 253 | 254 | while True: 255 | print('Please type your name.') 256 | name = input() 257 | if name == 'your name': 258 | break 259 | 260 | print('Thank you.') 261 | ``` 262 | 263 | - The `continue` statement is used to return to the start of the loop and reevaluate the loop's condition: 264 | 265 | ```python 266 | # Prints 1, 2, 4, and 5. Number 3 is not printed due to "continue": 267 | 268 | spam = 0 269 | 270 | while spam < 5: 271 | spam = spam + 1 272 | if spam == 3: 273 | continue 274 | print(spam) 275 | ``` 276 | 277 | ### 2.7 - For Loops 278 | 279 | - Example: 280 | 281 | ```python 282 | # The variable "i" is set to 0 on the first iteration, and its value is 283 | # printed to the console on each iteration. The value of "i" increases 284 | # by 1 on each iteration up to (but not including) 5. The iteration 285 | # process terminates once the value of "i" is set to 5: 286 | 287 | for i in range(5): 288 | print(i) 289 | ``` 290 | 291 | - **NOTE:** If `range()` is given only **one** argument, then Python will generate a sequence of numbers starting at 0 (as a range object data type, which is a list-like value known as a "sequence"), and the stopping point will be the value of the argument (which must be an integer). However, `range()` can accept up to three arguments (all of which must be integers): 292 | 293 | ```python 294 | # range([start], stop[, step]) 295 | ``` 296 | 297 | - `start`: Starting number of the sequence. 298 | - `stop`: Generate numbers up to (but not including) this number. 299 | - `step`: Difference between each number in the sequence. 300 | 301 | - For loops are able to use `break` and `continue` statements in the same manner as while loops. 302 | 303 | [Back to TOC](#id-toc) 304 | 305 |
306 | 307 | ## Section 3: Functions 308 | 309 | ### 3.8 - Python's Built-In Functions 310 | 311 | #### Standard Library 312 | 313 | - Python comes with a set of modules called the **[Standard Library](https://docs.python.org/3/library/)**. Each module is a Python program that contains a related group of functions you can use in your programs (e.g., numeric and mathematical modules). Before you can use the functions in a module, you must **import** the module with an `import` statement: 314 | 315 | ```python 316 | # Returns a random integer from 1 to 10: 317 | 318 | import random 319 | 320 | random.randint(1, 10) 321 | ``` 322 | 323 | - In the example above, `randint()` is a function within the `random` module. You specify which function you want to use in a module by using dot notation. 324 | 325 | - You can specify **multiple** modules for import by separating their names with **commas**: 326 | 327 | ```python 328 | import random, sys, os, math 329 | ``` 330 | 331 | - It is generally considered best to use the syntax outlined above when using a function in a Standard Library module. However, if you want to import and call a function **directly** without needing to reference the module name each time, use the `from` form of an import statement: 332 | 333 | ```python 334 | # Imports all functions from the "random" module, not the module itself: 335 | 336 | from random import * 337 | 338 | randint(1, 10) 339 | ``` 340 | 341 | - **TIP:** To terminate a program early, use the `exit()` function of the `sys` module: 342 | 343 | ```python 344 | # Terminates after printing "Hello": 345 | 346 | import sys 347 | 348 | print('Hello') 349 | sys.exit() 350 | print('Goodbye') 351 | ``` 352 | 353 | #### Third-Party Modules 354 | 355 | - Modules can be installed by using the `pip` (or `pip3`) tool from the terminal: 356 | 357 | ``` 358 | $ pip install ${MODULE_NAME} 359 | ``` 360 | 361 | - **NOTE:** See [here](http://automatetheboringstuff.com/appendixa/) for more information on installing third-party modules. 362 | 363 | - One noteworthy module is **[pyperclip](https://pypi.org/project/pyperclip/)** which allows you to copy and paste text to and from the clipboard: 364 | 365 | ```python 366 | import pyperclip 367 | 368 | pyperclip.copy('The text to be copied to the clipboard.') 369 | pyperclip.paste() # 'The text to be copied to the clipboard.' 370 | ``` 371 | 372 | ### 3.9 - Writing Your Own Functions 373 | 374 | - Define a function by using the `def` keyword: 375 | 376 | ```python 377 | # Define a function called "hello()" that accepts a "name" parameter: 378 | 379 | def hello(name): 380 | print('Hello, ' + name) 381 | 382 | hello('Alice') # "Hello, Alice" 383 | ``` 384 | 385 | - All function calls return a value. You can specify what value should be returned by the function by using a `return` statement: 386 | 387 | ```python 388 | def plusOne(number): 389 | return number + 1 390 | 391 | newNumber = plusOne(5) 392 | 393 | print(newNumber) # 6 394 | ``` 395 | 396 | - **NOTE:** If the value returned is considered "empty" (or if the return statement is omitted entirely), Python still returns a value called `None` (i.e., a value that represents a lack of a value). The `None` value will not be visibly displayed in the console. 397 | 398 | - Some functions accept **keyword arguments**, which are used as optional arguments to pass to a function call. For example, the `print()` function adds a newline character by default to the end of the string it prints. However, this behavior can be modified by changing the value of the `end` keyword argument: 399 | 400 | ```python 401 | # Prints "Hello" and "World" on two separate lines: 402 | 403 | print('Hello') 404 | print('World') 405 | 406 | # Prints "Hello World" on one line: 407 | 408 | print('Hello', end=' ') 409 | print('World') 410 | ``` 411 | 412 | - **NOTE:** The `print()` function also contains a `sep` keyword argument that specifies what character should be used to separate multiple arguments (an empty space by default): 413 | 414 | ```python 415 | # Prints 'cat dog mouse': 416 | 417 | print('cat', 'dog', 'mouse') 418 | 419 | # Prints 'cat, dog, mouse': 420 | 421 | print('cat', 'dog', 'mouse', sep=', ') 422 | ``` 423 | 424 | ### 3.10 - Global and Local Scopes 425 | 426 | - Variables inside of a function can have the same name as variables outside of the function, but they are considered two separate variables due to scope. Variables defined in a function belong to that function's **local scope**, whereas all variables defined outside of functions belong to the application's **global scope**: 427 | 428 | ```python 429 | spam = 42 # Global variable 430 | 431 | def eggs(): 432 | spam = 42 # Local variable 433 | ``` 434 | 435 | - Key Points: 436 | 437 | **1**\. Code in the global scope cannot use any local variables. 438 | 439 | **2**\. Code in a local scope can access global variables. 440 | 441 | **3**\. Code in one function's local scope cannot use variables in another local scope. 442 | 443 | **4**\. You can use the same name for different variables if they are in different scopes. 444 | 445 | - If you want to reassign the value of a global variable (e.g. `eggs = 42`) from within a local scope, you cannot simply say `eggs = 'Hello'`, as this will merely create a local variable named "eggs" within the local scope. Rather, you must use a `global` statement: 446 | 447 | ```python 448 | eggs = 42 449 | 450 | def spam(): 451 | global eggs 452 | eggs = 'Hello' # Overwrites 42 in global "eggs" variable 453 | print(eggs) # Prints 'Hello' 454 | 455 | spam() 456 | 457 | print(eggs) # Prints 'Hello' 458 | ``` 459 | 460 | [Back to TOC](#id-toc) 461 | 462 |
463 | 464 | ## Section 4: Handling Errors with Try/Except 465 | 466 | ### 4.11 - Try and Except Statements 467 | 468 | - By default, a Python application will crash if an error occurs while executing code: 469 | 470 | ```python 471 | def div42by(divideBy): 472 | return 42 / divideBy 473 | 474 | print(div42by(2)) # 21.0 475 | print(div42by(0)) # (Will crash the application) 476 | print(div42by(21)) # (Will not be printed) 477 | ``` 478 | 479 | - In order to detect and handle errors while still allowing the program to run, you must use `try`/`except` statements: 480 | 481 | ```python 482 | def div42by(divideBy): 483 | try: 484 | return 42 / divideBy 485 | except ZeroDivisionError: 486 | print('Error: You tried to divide by zero.') 487 | 488 | print(div42by(2)) # 21.0 489 | print(div42by(0)) # 'Error: You tried to divide by zero.' 490 | print(div42by(21)) # 2.0 491 | ``` 492 | 493 | - **NOTE:** `ZeroDivisionError` is one of Python's [Built-in Exceptions](https://docs.python.org/2/library/exceptions.html). You can omit the exception type if you want Python to handle all errors via the code in the `except` block. 494 | 495 | [Back to TOC](#id-toc) 496 | 497 |
498 | 499 | ## Section 6: Lists 500 | 501 | ### 6.13 - The List Data Type 502 | 503 | - A **list** is a value containing sequential, comma-delimited items within square brackets. To access items in a list, use an integer index for the item's position in the list (starting with 0): 504 | 505 | ```python 506 | spam = [['cat', 'bat', 'rat'], 'elephant'] 507 | 508 | spam[0] # ['cat', 'bat', 'rat'] 509 | 510 | spam[1] # 'elephant' 511 | 512 | spam[0][1] # 'bat' 513 | 514 | # You can also access items in reverse order by using a negative integer, 515 | # with -1 starting as the last item in the list: 516 | 517 | spam[0][-1] # 'rat' 518 | 519 | # An item's value can be reassigned by accessing the index: 520 | 521 | spam[0] = 'mouse' 522 | 523 | spam # ['mouse', 'elephant'] 524 | ``` 525 | 526 | - To view the **length** of a list, use the `len()` function: 527 | 528 | ```python 529 | spam = ['cat', 'bat', 'rat'] 530 | 531 | len(spam) # 3 532 | ``` 533 | 534 | - A **slice** can access (not mutate) multiple items in a list by specifying the index at which the slice begins and the index at which the slice ends (non-inclusive): 535 | 536 | ```python 537 | spam = ['cat', 'bat', 'rat'] 538 | 539 | spam[0:2] # ['cat', 'bat'] 540 | 541 | # You can redefine multiple items in a list by using a slice: 542 | 543 | spam[1:3] = ['dog', 'fish'] 544 | 545 | spam # ['cat', 'dog', 'fish'] 546 | ``` 547 | 548 | - **TIP:** You can omit either number on each side of the colon. If you omit the number to the left of the colon, the slice will start at index 0 and end at the number to the right. If you omit the number to the right, the slice will start from the number on the left and end at the number that is the length of the list (allowing the slice to include the last item in the list): 549 | 550 | ```python 551 | spam = ['cat', 'bat', 'rat'] 552 | 553 | spam[:2] # ['cat', 'bat'] 554 | ``` 555 | 556 | - To **delete** items from a list, use the `del` statement: 557 | 558 | ```python 559 | spam = ['cat', 'bat', 'elephant', 'rat'] 560 | 561 | del spam[2] 562 | 563 | spam # ['cat', 'bat', 'rat'] 564 | ``` 565 | 566 | - To **concatenate** lists, use the `+` or `*` operators: 567 | 568 | ```python 569 | [1, 2, 3] + [4, 5, 6] # [1, 2, 3, 4, 5, 6] 570 | 571 | [1, 2, 3] * 3 # [1, 2, 3, 1, 2, 3, 1, 2, 3] 572 | ``` 573 | 574 | - To **convert** another iterable data type (e.g., a string) into a list, use the `list()` function: 575 | 576 | ```python 577 | list('hello') # ['h', 'e', 'l', 'l', 'o'] 578 | ``` 579 | 580 | - To determine whether an item is **contained** in a list, you can use the `in` and `not in` operators: 581 | 582 | ```python 583 | 'elephant' in ['cat', 'bat', 'rat'] # False 584 | 585 | 'elephant' not in ['cat', 'bat', 'rat'] # True 586 | ``` 587 | 588 | ### 6.14 - For Loops with Lists, Multiple Assignment, and Augmented Operators 589 | 590 | - A list can be iterated over in a for loop in the same manner as a `range` object: 591 | 592 | ```python 593 | # Both loops produce the same output: 594 | 595 | for i in range(4): 596 | print(i) 597 | 598 | for i in [0, 1, 2, 3]: 599 | print(i) 600 | ``` 601 | 602 | - **TIP:** The ability to transform a `range` object into a list allows for you to take advantage of the `range()` function's step parameter: 603 | 604 | ```python 605 | # Prints all even numbers from 0 to 98: 606 | 607 | list(range(0, 100, 2)) 608 | ``` 609 | 610 | - **TIP:** You can access both the index and value of each item while iterating over a list by using the `range()` and `len()` functions: 611 | 612 | ```python 613 | supplies = ['pens', 'staplers', 'binders'] 614 | 615 | for i in range(len(supplies)): 616 | print ('Index ' + str(i) + ' in supplies is: ' + supplies[i]) 617 | 618 | # Index 0 in supplies is: pens 619 | # Index 1 in supplies is: staplers 620 | # Index 2 in supplies is: binders 621 | ``` 622 | 623 | - You can use Python's **multiple assignments** feature to iterate over a list and assign each item's value to a variable: 624 | 625 | ```python 626 | cat = ['fat', 'orange', 'loud'] 627 | 628 | size, color, disposition = cat 629 | 630 | size # 'fat' 631 | color # 'orange' 632 | disposition # 'loud' 633 | ``` 634 | 635 | - **NOTE:** Multiple assignments work not only with lists, but also comma-delimited values outside of a list: 636 | 637 | ```python 638 | size, color, disposition = 'slim', 'gray', 'quiet' 639 | ``` 640 | 641 | - **TIP:** Multiple assignments are also useful for quick variable swapping: 642 | 643 | ```python 644 | a = 'AAA' 645 | b = 'BBB' 646 | 647 | a, b = b, a 648 | 649 | a # 'BBB' 650 | b # 'AAA' 651 | ``` 652 | 653 | - **Augmented Operators** (`+=`, `-=`, `*=`, `/=`, `%=`): 654 | 655 | ```python 656 | spam = 42 657 | 658 | spam = spam + 1 659 | 660 | spam += 1 661 | 662 | spam # 44 663 | ``` 664 | 665 | ### 6.15 - List Methods 666 | 667 | - The `index()` method returns the index of the first occurrence of the specified value: 668 | 669 | ```python 670 | spam = ['hello', 'hi', 'howdy', 'hi'] 671 | 672 | spam.index('hi') # 1 673 | 674 | spam.index('hey') # (Raises an exception if value not found) 675 | ``` 676 | 677 | - The `append()` method appends an item to the end of the list: 678 | 679 | ```python 680 | spam = ['cat', 'dog', 'bat'] 681 | 682 | spam.append('moose') 683 | 684 | spam[3] # 'moose' 685 | ``` 686 | 687 | - The `insert()` method inserts the specified value at the specified position: 688 | 689 | ```python 690 | spam = ['cat', 'dog', 'bat'] 691 | 692 | spam.insert(1, 'chicken') 693 | 694 | spam # ['cat', 'chicken', 'dog', 'bat'] 695 | ``` 696 | 697 | - The `remove()` method removes the first occurrence of the item with the specified value: 698 | 699 | ```python 700 | spam = ['cat', 'bat', 'elephant', 'rat'] 701 | 702 | spam.remove('elephant') 703 | 704 | spam # ['cat', 'bat', 'rat'] 705 | 706 | spam.remove('oat') # (Throws an error) 707 | ``` 708 | 709 | - The `sort()` method sorts a list in ascending order by default. The sorting direction can be reversed by using the `reverse` keyword argument: 710 | 711 | ```python 712 | spam = [2, 5, 3.14, 1, -7] 713 | 714 | spam.sort() 715 | 716 | spam # [-7, 1, 2, 3.14, 5] 717 | 718 | spam = ['ants', 'cats', 'badgers'] 719 | 720 | spam.sort() 721 | 722 | spam # ['ants', 'badgers', 'cats'] 723 | 724 | spam.sort(reverse=True) 725 | 726 | spam # ['cats', 'badgers', 'ants'] 727 | ``` 728 | 729 | - **NOTE:** You cannot sort an array that contains both number and string types. 730 | 731 | - **ALSO:** When working with strings, `sort()` actually sorts by "**ASCII-betical**" order rather than alphabetical order (resulting in uppercase letters being sorted before lowercase letters, because uppercase letters appears before lowercase letters in ASCII code). However, you can sort by true alphabetical order by using the `key` keyword argument: 732 | 733 | ```python 734 | spam = ['a', 'z', 'A', 'Z'] 735 | 736 | spam.sort() 737 | 738 | spam # ['A', 'Z', 'a', 'z'] 739 | 740 | # str.lower is a string method that converts a string input to lowercase: 741 | 742 | spam.sort(key=str.lower) 743 | 744 | spam # ['A', 'a', 'Z', 'z'] 745 | ``` 746 | 747 | ### 6.16 - Similarities Between Lists and Strings 748 | 749 | - A string is essentially a list of single character strings (which is why `list()` can accept a string as an argument). However, they are significantly different in the sense that a list is a **mutable** data type (i.e., it can have values added, moved, or changed), whereas a string is an **immutable** data type (i.e., its value cannot be changed). Because strings are immutable, the proper way to create a new string derived from an existing variable is by using **slices**: 750 | 751 | ```python 752 | name = 'Zophie a cat' 753 | 754 | newName = name[0:7] + 'the' + name[8:12] 755 | 756 | newName # 'Zophie the cat' 757 | ``` 758 | 759 | - When a list is assigned to a variable, Python actually stores a **reference** to the list in memory, not the actual list itself. Thus, if a list is referenced in two separate variables, a modification to one variable will affect the value stored in the other variable as well: 760 | 761 | ```python 762 | spam = [0, 1, 2, 3, 4, 5] 763 | 764 | cheese = spam 765 | 766 | cheese[1] = 'Hello' 767 | 768 | cheese # [0, 'Hello', 2, 3, 4, 5] 769 | 770 | spam # [0, 'Hello', 2, 3, 4, 5] 771 | ``` 772 | 773 | - If you want to make a true copy of a list (rather than having two or more variables point to the same list by reference), use the `copy` module's `deepcopy()` function: 774 | 775 | ```python 776 | import copy 777 | 778 | spam = ['A', 'B', 'C', 'D'] 779 | 780 | # Creates a list with items identical to (but separate from) those in "spam": 781 | 782 | cheese = copy.deepcopy(spam) 783 | ``` 784 | 785 | - When working within lists, Python is aware that instances of **line continuation** should not be considered a new block: 786 | 787 | ```python 788 | spam = ['apples', 789 | 'oranges', 790 | 'bananas'] 791 | ``` 792 | 793 | - **TIP:** You can take advantage of line continuation even without a list by using the line continuation character (`\`): 794 | 795 | ```python 796 | print('Four score and seven ' + \ 797 | 'years ago...') 798 | 799 | # 'Four score and seven years ago...' 800 | ``` 801 | 802 | [Back to TOC](#id-toc) 803 | 804 |
805 | 806 | ## Section 7: Dictionaries 807 | 808 | ### 7.17 - The Dictionary Data Type 809 | 810 | - A dictionary is a mutable collection of key-value pairs: 811 | 812 | ```python 813 | myCat = {'size': 'large', 'color': 'gray', 'disposition': 'loud'} 814 | 815 | myCat['size'] # 'large' 816 | 817 | myCat['age'] # (Results in a KeyError message) 818 | 819 | # Check if a key exists with the "in" and "not in" operators: 820 | 821 | 'name' in myCat # False 822 | 823 | 'name' not in myCat # True 824 | ``` 825 | 826 | - Two dictionaries with identical key-value pairs will be considered equivalent regardless of the order in which those key-value pairs are arranged: 827 | 828 | ```python 829 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8} 830 | ham = {'species': 'cat', 'age': 8, 'name': 'Zophie'} 831 | 832 | eggs == ham # True 833 | ``` 834 | 835 | - Three major dictionary iteration methods (`keys()`, `values()`, `items()`): 836 | 837 | ```python 838 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8} 839 | 840 | # Each method returns a list-like data type, so you must convert each result 841 | # with the "list()" function if you want to receive a true list value: 842 | 843 | list(eggs.keys()) # ['name', 'species', 'age'] 844 | 845 | list(eggs.values()) # ['Zophie', 'cat', 8] 846 | 847 | # Tuples are the same as lists, expect they use parentheses (not brackets): 848 | 849 | list(eggs.items()) # [('name, 'Zophie'), ('species', 'cat'), ('age', 8)] 850 | ``` 851 | 852 | - You can iterate over a dictionary's keys/values with a for loop: 853 | 854 | ```python 855 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8} 856 | 857 | # Prints 'name', 'species', and 'age': 858 | 859 | for k in eggs.keys(): 860 | print(k) 861 | 862 | # Prints 'name: Zophie', 'species: cat', and 'age: 8' 863 | 864 | for k, v in eggs.items(): 865 | print(k + ': ' + str(v)) 866 | ``` 867 | 868 | - If you attempt to retrieve a value from a key that does not exist in a dictionary, you will normally receive an error. However, you can avoid such problems by using the `get()` method to specify a default value if the key does not exist: 869 | 870 | ```python 871 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8} 872 | 873 | eggs.get('age', 0) # 8 874 | 875 | eggs.get('color', '') # '' 876 | ``` 877 | 878 | - If you want to set a value for a key that does not yet exist in a dictionary, use the `setdefault()` method: 879 | 880 | ```python 881 | eggs = {'name': 'Zophie', 'species': 'cat', 'age': 8} 882 | 883 | eggs.setdefault('color', 'black') # 'black' 884 | 885 | # No change if the key already exists: 886 | 887 | eggs.setdefault('color', 'orange') # 'black' 888 | ``` 889 | 890 | - To obtain a **pretty print** of a dictionary (or list), use the `pprint` module: 891 | 892 | ```python 893 | import pprint 894 | 895 | message = 'All cows eat grass' 896 | 897 | count = {} 898 | 899 | for character in message: 900 | count.setdefault(character.lower(), 0) 901 | count[character.lower()] += 1 902 | 903 | pprint.pprint(count) 904 | 905 | # {' ': 3, 906 | # 'a': 3, 907 | # 'c': 1, 908 | # 'e': 1, 909 | # 'g': 1, 910 | # 'l': 2, 911 | # 'o': 1, 912 | # 'r': 1, 913 | # 's': 3, 914 | # 't': 1, 915 | # 'w': 1} 916 | ``` 917 | 918 | - **TIP:** If you want to store your `pprint` object as a **string** (rather than merely print it to the screen), use the `pprint` module's `pformat()` method instead. 919 | 920 | ### 7.18 - Data Structures 921 | 922 | - You can use the `type()` function to determine the data type of any value: 923 | 924 | ```python 925 | type(42) # 926 | 927 | type('hello') # 928 | 929 | type({'name': 'Zophie'}) # 930 | ``` 931 | 932 | [Back to TOC](#id-toc) 933 | 934 |
935 | 936 | ## Section 8: More About Strings 937 | 938 | ### 8.19 - Advanced String Syntax 939 | 940 | - There are multiple ways to type strings, including via double quotes and escape characters: 941 | 942 | ```python 943 | "That is Alice's cat." 944 | 945 | # Prints 'Say hello to Bob's mother.': 946 | 947 | print('Say hello to Bob\'s mother.') 948 | 949 | # Prints each statement on a new line: 950 | 951 | print('Hello.\nHow are you?\nI\'m fine.') 952 | ``` 953 | 954 | - Types of escape characters: 955 | 956 | | Escape character | Prints as | 957 | | :--------------: | -------------------- | 958 | | \\' | Single quote | 959 | | \\" | Double quote | 960 | | \t | Tab | 961 | | \n | Newline (line break) | 962 | | \\\ | Backslash | 963 | 964 | - If you have text that contains many backslashes that you do not want to be treated as escape characters, you can use a **raw string**, which is a string that begins with a lowercase "r": 965 | 966 | ```python 967 | # Prints without the letters "t" and "n", and inserts tab and newline characters instead: 968 | 969 | print('C:\temp\new') 970 | 971 | # Prints text as written: 972 | 973 | print(r'C:\temp\new') 974 | ``` 975 | 976 | - Although you can use `\n` to add newlines to a string, it is often easier to use **multiline strings** with triple quotes (either single or double quotes). Any quotes, tabs, or newlines within the triple quotes are considered part of the string: 977 | 978 | ```python 979 | spam = """Dear Alice, 980 | Eve's cat is orange. 981 | Sincerely, 982 | Bob""" 983 | 984 | print(spam) # (Prints each line on a new line) 985 | 986 | spam # "Dear Alice,\nEve's cat is orange.\nSincerely,\nBob" 987 | ``` 988 | 989 | ### 8.20 - String Methods 990 | 991 | - The `upper()` and `lower()` methods return a string where all characters are in uppercase or lowercase, respectively: 992 | 993 | ```python 994 | spam = 'Hello, world!' 995 | 996 | spam.upper() # 'HELLO, WORLD!' 997 | 998 | spam.lower() # 'hello, world!' 999 | ``` 1000 | 1001 | - **NOTE:** Because strings are immutable, string methods do not modify the original string. If you want to actually modify the string value stored to a variable, you must say, e.g.: `spam = spam.lower()` 1002 | 1003 | - The `isupper()` and `islower()` methods return a Boolean value indicating whether all letters in the string are uppercase or lowercase, respectively: 1004 | 1005 | ```python 1006 | spam = 'hello, world!' 1007 | 1008 | spam.isupper() # False 1009 | 1010 | spam.islower() # True 1011 | ``` 1012 | 1013 | - Other noteworthy string methods beginning with the word `is`: 1014 | 1015 | ```python 1016 | isalpha() # (Letters only) 1017 | 1018 | isalnum() # (Letters and numbers only) 1019 | 1020 | isdecimal() # (Numbers only) 1021 | 1022 | isspace() # (Whitespace only) 1023 | 1024 | istitle() # (Titlecase only) 1025 | ``` 1026 | 1027 | - **NOTE:** Because string methods return a new string, you are able to **chain** method calls: 1028 | 1029 | ```python 1030 | 'hello'.upper().isupper() # True 1031 | ``` 1032 | 1033 | - The `startswith()` and `endswith()` methods return a Boolean value indicating whether the string starts with or ends with (respectively) the specified value: 1034 | 1035 | ```python 1036 | spam = 'Hello, world!' 1037 | 1038 | spam.startswith('Hello') # True 1039 | 1040 | spam.endswith('!') # True 1041 | 1042 | spam.endswith('world') # False 1043 | 1044 | - The `join()` method takes all items in an iterable and joins them into one string using a specified separator: 1045 | 1046 | ```python 1047 | spam = ['cats', 'rats', 'bats'] 1048 | 1049 | ', '.join(spam) # 'cats, rats, bats' 1050 | 1051 | '\n'.join(spam) # (Inserts newline character after each item) 1052 | ``` 1053 | 1054 | - The `split()` method splits a string into a list. The method splits a string according to whitespace separation by default. However, you can specify the string to be used as the separator (first parameter) and the number of splits to perform (second parameter): 1055 | 1056 | ```python 1057 | spam = 'My name is Simon' 1058 | 1059 | spam.split() # ['My', 'name', 'is', 'Simon'] 1060 | 1061 | spam.split('m') # ['My na', 'e is Si', 'on'] 1062 | 1063 | spam.split(None, 1) # ['My', 'name is Simon'] 1064 | ``` 1065 | 1066 | - The `ljust()` and `rjust()` methods return a "padded" version of a string with a number of spaces (first parameter) inserted to left or right justify (respectively) the specified text. An optional second parameter can be used to specify a padding character other than a space. There is also a `center()` method that operates similarly to `ljust()` and `rjust()` but uses padding to center the text, rather than justify left or right: 1067 | 1068 | ```python 1069 | 'Hello'.ljust(10) # 'Hello ' 1070 | 1071 | 'Hello'.rjust(10) # ' Hello' 1072 | 1073 | 'Hello'.ljust(10, '.') # 'Hello.....' 1074 | 1075 | 'Hello'.center(15, '-') # '-----Hello-----' 1076 | ``` 1077 | - Use the `strip()`, `rstrip()`, and `lstrip()` methods to trim whitespace characters off of a string. You can insert a string as an argument, and any contiguous set of characters in that argument (regardless of order) will be stripped from the end(s) of the string: 1078 | 1079 | ```python 1080 | ' x '.strip() # 'x' 1081 | 1082 | ' x '.lstrip() # 'x ' 1083 | 1084 | ' x '.rstrip() # ' x' 1085 | 1086 | 'SpamBaconSpamEggsSpam'.strip('ampS') # 'BaconSpamEggs' 1087 | ``` 1088 | 1089 | - The `replace()` method replaces a specified phrase with another specified phrase: 1090 | 1091 | ```python 1092 | 'Hello there!'.replace('e', '3') # 'H3llo th3r3!' 1093 | ``` 1094 | 1095 | ### 8.21 - String Formatting 1096 | 1097 | - Rather than concatenating numerous strings with the `+` operator, you can use Python's **string formatting** (a.k.a., string interpolation) by using the `%` operator and the `%s` symbol (one of several types of conversion specifiers): 1098 | 1099 | ```python 1100 | name = 'Alice' 1101 | place = 'Main Street' 1102 | time = '6:00 PM' 1103 | food = 'turnips' 1104 | 1105 | 'Hello, %s. You are invited to a party at %s at %s. Please bring %s.' % (name, place, time, food) 1106 | 1107 | # 'Hello, Alice. You are invited to a party at Main Street at 6:00 PM. Please bring turnips.' 1108 | ``` 1109 | 1110 | [Back to TOC](#id-toc) 1111 | 1112 |
1113 | 1114 | ## Section 9: Running Programs from the Command Line 1115 | 1116 | - See [Appendix B](https://automatetheboringstuff.com/appendixb/) re: shebang line (`#! /usr/bin/env python3`) and changing file permissions (`chmod +x pythonScript.py`) 1117 | 1118 | - To use arguments from the command line in your Python script, use the `sys.argv` list: 1119 | 1120 | ```python 1121 | #! /usr/bin/env python3 1122 | 1123 | import sys 1124 | 1125 | print(sys.argv) 1126 | ``` 1127 | 1128 | [Back to TOC](#id-toc) 1129 | 1130 |
1131 | 1132 | ## Section 10: Regular Expressions 1133 | 1134 | ### 10.23 - Regular Expression Basics 1135 | 1136 | - Example of using regular expressions with the `re` module: 1137 | 1138 | ```python 1139 | import re 1140 | 1141 | message = 'Call me tomorrow at 415-555-1011, or at 415-555-9999.' 1142 | 1143 | # "compile()" compiles a regex pattern into a regex object that can be used 1144 | # for matching via "match()", "search()", and other methods. "\d" is the 1145 | # regex for a numeric digit character: 1146 | 1147 | phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') 1148 | 1149 | # The regex data type has a "search()" method that can be used to search a 1150 | # string for the regex pattern and return a match object containing the first 1151 | # matching string: 1152 | 1153 | matchObject = phoneNumRegex.search(message) 1154 | 1155 | # Match objects have a method called "group()" that will return the text 1156 | # of the matching string: 1157 | 1158 | print(matchObject.group()) # 415-555-1011 1159 | ``` 1160 | 1161 | - **NOTE:** If the `search()` method does not find a match, it will return a value of `None`, which will cause an error to result if you call the `group()` method on a nonexistent match object. 1162 | 1163 | ### 10.24 - Regex Groups and the Pipe Character 1164 | 1165 | - Use **parentheses** to mark out groups within a regex, and access groups via the `group()` method: 1166 | 1167 | ```python 1168 | import re 1169 | 1170 | phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)') 1171 | 1172 | matchObject = phoneNumRegex.search('My number is 415-555-4242.') 1173 | 1174 | matchObject.group(1) # '415' 1175 | 1176 | matchObject.group(2) # '555-4242' 1177 | ``` 1178 | 1179 | - **NOTE:** If you want to find literal parentheses (or any other regex special characters) within your text, then you must escape the opening and closing parentheses with a backslash (`\`): 1180 | 1181 | ```python 1182 | phoneNumRegex = re.compile(r'\(\d\d\d\) \d\d\d-\d\d\d\d') 1183 | ``` 1184 | 1185 | - Use the **pipe** (`|`) character to match one of many possible groups (based on, e.g., prefix/suffix): 1186 | 1187 | ```python 1188 | batRegex = re.compile(r'Bat(man|mobile|copter)') 1189 | 1190 | matchObject = batRegex.search('Batmobile lost a wheel') 1191 | 1192 | matchObject.group() # 'Batmobile' 1193 | 1194 | matchObject.group(1) # 'mobile' 1195 | ``` 1196 | 1197 | ### 10.25 - Repetition in Regex Patterns and Greedy/Nongreedy Matching 1198 | 1199 | - The `?` character matches the preceding expression **0 or 1** time(s) (i.e., the expression can either appear once or not at all for a match to occur): 1200 | 1201 | ```python 1202 | import re 1203 | 1204 | # Matches 'Batman' or 'Batwoman': 1205 | 1206 | batRegex = re.compile(r'Bat(wo)?man') 1207 | ``` 1208 | 1209 | - The `*` character matches the preceding expression **0 or more** times: 1210 | 1211 | ```python 1212 | batRegex = re.compile(r'Bat(wo)*man') 1213 | 1214 | matchObject = batRegex.search('The Adventures of Batwowowoman') 1215 | 1216 | matchObject.group() # 'Batwowowoman' 1217 | ``` 1218 | 1219 | - The `+` character matches the preceding expression **1 or more** times: 1220 | 1221 | ```python 1222 | # Matches 'Batwoman' or 'Batwowowoman', etc., but not 'Batman': 1223 | 1224 | batRegex = re.compile(r'Bat(wo)+man') 1225 | ``` 1226 | 1227 | - The `{`*`n`*`}` character matches **exactly** *n* occurrences of the preceding expression: 1228 | 1229 | ```python 1230 | haRegex = re.compile(r'(ha){3}') 1231 | 1232 | matchObject = haRegex.search('He said, "hahaha"') 1233 | 1234 | matchObject.group() # 'hahaha' 1235 | ``` 1236 | 1237 | - The `{`*`n,m`*`}` character matches at least *n* and at most *m* occurrences of the preceding expression (if *n* is omitted, it is treated as 0; if *m* is omitted, it is treated as ∞): 1238 | 1239 | ```python 1240 | haRegex = re.compile(r'(ha){3,5}') 1241 | 1242 | haMatchObject = haRegex.search('He said, "hahahaha"') 1243 | 1244 | haMatchObject.group() # 'hahahaha' 1245 | 1246 | # By default, Python will perform a "greedy" match and return the longest 1247 | # possible match that it finds (in this case, 5 digits rather than 3): 1248 | 1249 | digitRegex = re.compile(r'(\d){3,5}') 1250 | 1251 | digitMatchObject = digitRegex.search('1234567890') 1252 | 1253 | digitMatchObject.group() # '12345' 1254 | 1255 | # To perform a "nongreedy" match, use the "?" character after the curly brace: 1256 | 1257 | digitRegex = re.compile(r'(\d){3,5}?') 1258 | 1259 | digitMatchObject = digitRegex.search('1234567890') 1260 | 1261 | digitMatchObject.group() # '123' 1262 | ``` 1263 | 1264 | ### 10.26 - Regex Character Classes and the findall() Method 1265 | 1266 | #### findall() Method 1267 | 1268 | - If you want to return **every** occurrence of a regex pattern (rather than only the first), then use the `findall()` method (instead of `search()`) to return a list containing all matches: 1269 | 1270 | ```python 1271 | import re 1272 | 1273 | message = 'Call me tomorrow at 415-555-1011, or at 415-555-9999.' 1274 | 1275 | phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') 1276 | 1277 | print(phoneNumRegex.findall(message)) # ['415-555-1011', '415-555-9999'] 1278 | ``` 1279 | 1280 | - Be mindful of how **groups** affect the value returned by the `findall()` method: 1281 | 1282 | ```python 1283 | message = 'Call me tomorrow at 415-555-1011, or at 415-555-9999.' 1284 | 1285 | # One group: 1286 | 1287 | single = re.compile(r'(\d\d\d)-\d\d\d-\d\d\d\d') 1288 | 1289 | single.findall(message) # ['415', '415'] 1290 | 1291 | # Two groups: 1292 | 1293 | tuples = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)') 1294 | 1295 | tuples.findall(message) # [('415', '555-1011'), ('415', '555-9999')] 1296 | 1297 | # Two groups nested within one group: 1298 | 1299 | nested = re.compile(r'((\d\d\d)-(\d\d\d-\d\d\d\d))') 1300 | 1301 | nested.findall(message) # [('415-555-1011', '415', '555-1011'), 1302 | # ('415-555-9999', '415', '555-9999')] 1303 | ``` 1304 | 1305 | #### Character Classes 1306 | 1307 | - Common Character Classes: 1308 | 1309 | | Shorthand character class | Represents | 1310 | | :-----------------------: | ------------------------------------------------------------------- | 1311 | | \d | Any numeric digit from 0 to 9 | 1312 | | \D | Any character that is *not* a numeric digit from 0 to 9 | 1313 | | \w | Any letter, numeric digit, or underscore (i.e., "word" characters) | 1314 | | \W | Any character that is *not* a letter, numeric digit, or underscore | 1315 | | \s | Any space, tab, or newline character (i.e., "space" characters) | 1316 | | \S | Any character that is *not* a space, tab, or newline | 1317 | 1318 | - Example: 1319 | 1320 | ```python 1321 | lyrics = """12 drummers drumming, 11 pipers piping, 10 lords a leaping, 1322 | 9 ladies dancing, 8 maids a milking, 7 swans a swimming, 1323 | 6 geese a laying, 5 golden rings, 4 calling birds, 3 french hens, 1324 | 2 turtle doves, 1 partridge in a pear tree""" 1325 | 1326 | xmasRegex = re.compile(r'\d+\s\w+') 1327 | 1328 | xmasRegex.findall(lyrics) # ['12 drummers', '11 pipers', '10 lords', 1329 | # '9 ladies', '8 maids', '7 swans', 1330 | # '6 geese', '5 golden', '4 calling', 1331 | # '3 french', '2 turtle', '1 partridge'] 1332 | ``` 1333 | 1334 | - You can create your own regex **character sets** (e.g., `[xyz]`) and **negated or complemented character sets** (e.g., `[^xyz]`): 1335 | 1336 | ```python 1337 | message = 'Robocop eats baby food.' 1338 | 1339 | # Matches all letters: 1340 | 1341 | alphaRegex = re.compile(r'[a-zA-Z]') 1342 | 1343 | alphaRegex.findall(message) # ['R', 'o', 'b', 'o', 'c', 'o', 'p', 1344 | # 'e', 'a', 't', 's', 'b', 'a', 'b', 'y', 1345 | # 'f', 'o', 'o', 'd'] 1346 | 1347 | # Matches all vowels: 1348 | 1349 | vowelRegex = re.compile(r'[aeiouAEIOU]') 1350 | 1351 | vowelRegex.findall(message) # ['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o'] 1352 | 1353 | # Matches all vowels appearing in sets of 2: 1354 | 1355 | doubleVowelRegex = re.compile(r'[aeiouAEIOU]{2}') 1356 | 1357 | doubleVowelRegex.findall(message) # ['ea', 'oo'] 1358 | 1359 | # Matches anything that is NOT enclosed in the brackets: 1360 | 1361 | nonVowelRegex = re.compile(r'[^aeiouAEIOU]') 1362 | 1363 | nonVowelRegex.findall(message) # ['R', 'b', 'c', 'p', ' ', 't', 's', ' ', 1364 | # 'b', 'b', 'y', ' ', 'f', 'd', '.'] 1365 | ``` 1366 | 1367 | ### 10.27 - Regex Dot-Star and the Caret/Dollar Characters 1368 | 1369 | - Use the `^` character (not as a first character in a character set) to find a match at the **beginning** of an input, and use the `$` character to find a match at the **end** of an input: 1370 | 1371 | ```python 1372 | import re 1373 | 1374 | # Begins with 'Hello': 1375 | 1376 | beginsWithHelloRegex = re.compile(r'^Hello') 1377 | 1378 | beginsWithHelloRegex.findall('Hello there!') # ['Hello'] 1379 | 1380 | beginsWithHelloRegex.findall('He said, "Hello".') # [] 1381 | 1382 | # Ends with 'world': 1383 | 1384 | endsWithWorldRegex = re.compile(r'world$') 1385 | 1386 | endsWithWorldRegex.findall('Hello, world') # ['world!'] 1387 | 1388 | endsWithWorldRegex.findall('Hello, world!') # [] 1389 | 1390 | # Only contains one or more numeric digits: 1391 | 1392 | allDigitsRegex = re.compile(r'^\d+$') 1393 | 1394 | allDigitsRegex.findall('1234567890') # ['1234567890'] 1395 | 1396 | allDigitsRegex.findall('12345x7890') # [] 1397 | ``` 1398 | 1399 | - The `.` (dot) character matches **any** character except the newline character: 1400 | 1401 | ```python 1402 | message = 'The cat in the hat sat on the flat mat.' 1403 | 1404 | # Matches a phrase that ends in 'at' preceded by 1-2 non-newline characters: 1405 | 1406 | atRegex = re.compile(r'.{1,2}at') 1407 | 1408 | # Includes spaces: 1409 | 1410 | atRegex.findall(message) # [' cat', ' hat', ' sat', 'flat', ' mat'] 1411 | ``` 1412 | 1413 | - **NOTE:** To make `.` truly match **every** character (even newlines), pass the `re.DOTALL` variable as the second argument in the `compile()` function: 1414 | 1415 | ```python 1416 | primeDirectives = 'Serve the public trust.\nProtect the innocent.\nUphold the law.' 1417 | 1418 | dotStar = re.compile(r'.*', re.DOTALL) 1419 | 1420 | matchObject = dotStar.search(primeDirectives) 1421 | 1422 | print(matchObject.group()) # Serve the public trust. 1423 | # Protect the innocent. 1424 | # Uphold the law. 1425 | ``` 1426 | 1427 | - **ALSO:** If you want to have a **case-insensitive** regex match, use the `re.IGNORECASE` variable: 1428 | 1429 | ```python 1430 | # TIP: You can also use "re.I" as a shorthand for "re.IGNORECASE': 1431 | 1432 | vowelRegex = re.compile(r'[aeiou]', re.IGNORECASE) 1433 | 1434 | vowelRegex.findall('All cows eat grass.') # ['A', 'o', 'e', 'a', 'a'] 1435 | ``` 1436 | 1437 | - Common way to match anything is the **Dot-Star** pattern: 1438 | 1439 | ```python 1440 | text = 'First Name: Al Last Name: Sweigart' 1441 | 1442 | nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)') 1443 | 1444 | nameRegex.findall(text) # [('Al', 'Sweigart')] 1445 | ``` 1446 | 1447 | - **NOTE:** Dot-Star uses greedy mode by default, so you must use `(.*?)` for nongreedy matching: 1448 | 1449 | ```python 1450 | serve = ' for dinner.>' 1451 | 1452 | # Nongreedy matching: 1453 | 1454 | nongreedy = re.compile(r'<(.*?)>') 1455 | 1456 | nongreedy.findall(serve) # ['To serve humans'] 1457 | 1458 | # Greedy matching: 1459 | 1460 | greedy = re.compile(r'<(.*)>') 1461 | 1462 | greedy.findall(serve) # ['To serve humans> for dinner.'] 1463 | ``` 1464 | 1465 | ### 10.28 - Regex sub() Method and Verbose Mode 1466 | 1467 | #### sub() Method 1468 | 1469 | - The `sub()` method allows you to find matching text and replace it with new text: 1470 | 1471 | ```python 1472 | import re 1473 | 1474 | message = 'Agent Alice gave documents to Agent Bob.' 1475 | 1476 | namesRegex = re.compile(r'Agent \w+') 1477 | 1478 | # The first argument is the replacement string, and 1479 | # the second argument is the string to be searched: 1480 | 1481 | namesRegex.sub('REDACTED', message) # 'REDACTED gave documents to REDACTED.' 1482 | ``` 1483 | 1484 | - You can retain portions of the original text by using the **Slash-Number** syntax (e.g., `\1`, `\2`, etc.), in which the number represents a group in the regex pattern: 1485 | 1486 | ```python 1487 | message = 'Agent Alice gave documents to Agent Bob.' 1488 | 1489 | # The group will contain the first letter of an agent's name: 1490 | 1491 | namesRegex = re.compile(r'Agent (\w)\w*') 1492 | 1493 | namesRegex.findall(message) # ['A', 'B'] 1494 | 1495 | # Use the text from "Group 1" for the substituted match: 1496 | 1497 | namesRegex.sub(r'Agent \1', message) # 'Agent A gave documents to Agent B.' 1498 | ``` 1499 | 1500 | #### Verbose Mode 1501 | 1502 | - The `re.VERBOSE` flag allows you to write regular expressions that look nicer and are more readable by allowing you to visually separate logical sections of the pattern and add comments. Whitespace within the pattern is generally ignored: 1503 | 1504 | ```python 1505 | message = 'Call me tomorrow at 415-555-1011, or at (415) 555-9999.' 1506 | 1507 | # TIP: You can combine "compile()" options by using the bitwise "|" operator: 1508 | 1509 | phoneRegex = re.compile(r''' 1510 | (((\d\d\d-)| # Area code (without parentheses; with dash) 1511 | (\(\d\d\d\)\s)) # -OR- Area code (with parentheses; without dash) 1512 | \d\d\d # First 3 digits 1513 | - # Second dash 1514 | \d\d\d\d) # Last 4 digits 1515 | ''', re.VERBOSE | re.I | re.DOTALL) # Added extra options for demonstration 1516 | 1517 | phoneRegex.findall(message)[0][0] # '415-555-1011' 1518 | 1519 | phoneRegex.findall(message)[1][0] # '(415) 555-9999' 1520 | ``` 1521 | 1522 | [Back to TOC](#id-toc) 1523 | 1524 |
1525 | 1526 | ## Section 11: Files 1527 | 1528 | ### 11.30 - Filenames and Absolute/Relative File Paths 1529 | 1530 | - File paths are handled differently with regard to slashes/backslashes on Windows (e.g., `C:\temp\new`) and Mac/Linux (e.g., `C:/temp/new`). To enforce consistency when creating a file path string in Python, use the `os` module's `path.join()` method: 1531 | 1532 | ```python 1533 | import os 1534 | 1535 | # Returns 'folder1\\folder2\\file.png' if run on Windows, and 1536 | # returns 'folder1/folder2/file.png' if run on Mac/Linux: 1537 | 1538 | os.path.join('folder1', 'folder2', 'file.png') 1539 | ``` 1540 | 1541 | - To retrieve the string value of the file's **current working directory** (CWD), use the `os.getcwd()` method. You can manually change what Python considers the CWD to be by using `os.chdir()`: 1542 | 1543 | ```python 1544 | os.getcwd() # '/Users/Guest/Desktop' 1545 | 1546 | oc.chdir('/Users/Guest/Documents') 1547 | 1548 | os.getcwd() # '/Users/Guest/Documents' 1549 | ``` 1550 | 1551 | - Being able to modify the CWD is important for handling **relative file paths**. A file referenced by name only (e.g., `file.png`) will be considered to be within the CWD, whereas a file name that is part of an **absolute file path** (e.g., `/Users/Guest/Documents/file.png`) is known to be within the path specified. 1552 | 1553 | - **NOTE:** Relative file paths can also contain references to folders, not just file names. 1554 | 1555 | - To return an absolute file path derived from a non-absolute pathname, use `os.path.abspath()`: 1556 | 1557 | ```python 1558 | # Essentially calls "os.getcwd()" and appends the string argument: 1559 | 1560 | os.path.abspath('spam.png') # '/Users/Guest/Documents/spam.png' 1561 | 1562 | # You can use the ".." symbol to move to a higher folder above the CWD: 1563 | 1564 | os.path.abspath('../spam.png') # '/Users/Guest/spam.png' 1565 | ``` 1566 | 1567 | - **TIP:** You can determine whether a path is relative or absolute by using the `os.path.isabs()`, which returns a Boolean value: 1568 | 1569 | ```python 1570 | os.path.isabs('../spam.png') # False 1571 | 1572 | os.path.isabs('/Users/Guest/Documents') # True 1573 | ``` 1574 | 1575 | - To find the relative path between two paths, use `os.path.relpath()`. The first argument is the destination path, and the second (optional) argument is the starting path (which defaults to the current directory if not specified): 1576 | 1577 | ```python 1578 | os.path.relpath('/Users/Guest/spam.png', '/Users') # '/Guest/spam.png' 1579 | ``` 1580 | 1581 | - Use `os.path.dirname()` to retrieve only the directory in which a file is located, and use `os.path.basename()` to retrieve only the endpoint of a path: 1582 | 1583 | ```python 1584 | os.path.dirname('/Users/Guest/spam.png') # '/Users/Guest' 1585 | 1586 | os.path.basename('/Users/Guest/spam.png') # 'spam.png' 1587 | 1588 | os.path.basename('/Users/Guest') # 'Guest' 1589 | ``` 1590 | 1591 | - To determine whether a file or path exists, use `os.path.exists()`, which returns a Boolean value: 1592 | 1593 | ```python 1594 | os.path.exists('/Users/Guest') # True 1595 | ``` 1596 | 1597 | - **ALSO:** Use `os.path.isfile()` and `os.path.isdir()` to determine whether a path is referencing a file or directory, respectively (returns a Boolean value). 1598 | 1599 | - Other useful functions for **examining/modifying** directories include `os.path.getsize()`, `os.listdir()`, and `os.makedirs()`: 1600 | 1601 | ```python 1602 | # Returns a directory's or file's size in bytes (as an integer): 1603 | 1604 | os.path.getsize('/Users/Guest') # 384 1605 | 1606 | # Returns the contents of a directory: 1607 | 1608 | os.listdir('/Users/Guest') 1609 | 1610 | # Creates a new folder (accepts either absolute or relative file paths): 1611 | 1612 | os.makedirs('/Users/Guest/Delicious/Waffles') 1613 | ``` 1614 | 1615 | ### 11.31 - Reading and Writing Plaintext Files 1616 | 1617 | - Three steps to **reading** plaintext files: 1618 | 1619 | ```python 1620 | # The `open()` function opens a plaintext file in "read mode" (default) 1621 | # and returns a file object: 1622 | 1623 | helloFile = open('/Users/Guest/hello.txt') 1624 | 1625 | # The file object includes the "read()" method that returns a string 1626 | # containing the file's contents: 1627 | 1628 | content = helloFile.read() 1629 | 1630 | # Close the file: 1631 | 1632 | helloFile.close() 1633 | ``` 1634 | 1635 | - **NOTE:** Instead of `read()`, you can use the `readlines()` method to return all lines as strings inside of a list. For example, if the file `hello.txt` contained the following text... 1636 | 1637 | ``` 1638 | Hello, world! 1639 | How are you? 1640 | ``` 1641 | 1642 | ...then `read()` and `readlines()` will process the text accordingly: 1643 | 1644 | ```python 1645 | helloFile.read() # 'Hello, world!\nHow are you?' 1646 | 1647 | helloFile.readlines() # ['Hello, world!\n', 'How are you?'] 1648 | ``` 1649 | 1650 | - To **write** to a plaintext file (i.e., overwrite its contents), pass the string `'w'` as the second argument to the `open()` function. To **append** new text to a file (i.e., add to the end of the file, rather than overwrite its contents), pass the `'a'` string. In either case, if the file does not already exist, then Python will create a new `txt` file for you to write to: 1651 | 1652 | ```python 1653 | helloFile = open('/Users/Guest/hello2.txt', 'w') 1654 | 1655 | # Use the "write()" method to write: 1656 | 1657 | helloFile.write('Hello!!!\n') # NOTE: Will return the number of bytes written 1658 | 1659 | helloFile.close() 1660 | ``` 1661 | 1662 | - **NOTE:** Python will *not* automatically add newline characters when writing/appending text content. So newlines must be added manually if desired. 1663 | 1664 | - If you need to store **complex data** such as lists/dictionaries (rather than just plaintext) to your storage device, use the `shelve` module to create a **binary shelf file**: 1665 | 1666 | ```python 1667 | import shelve 1668 | 1669 | # Returns a "shelf" data object that will be saved to your storage device 1670 | # as a shelf file named "mydata" in the current working directory: 1671 | 1672 | shelfFile = shelve.open('mydata') 1673 | 1674 | # Make changes to the shelf file in the same manner as a dictionary: 1675 | 1676 | shelfFile['cats'] = ['Kiwi', 'Penny', 'Clover'] 1677 | 1678 | shelfFile['dogs'] = ['Bambi', 'Buzz', 'Elway'] 1679 | 1680 | # Close the file: 1681 | 1682 | shelfFile.close() 1683 | ``` 1684 | 1685 | - **NOTE:** On Mac OS X, the shelf file will be saved with the `.db` extension. Its contents can be accessed from a Python program as follows: 1686 | 1687 | ```python 1688 | # NOTE: The "shelve.open()" method opens a shelf file in read-write mode: 1689 | 1690 | shelfFile = shelve.open('mydata') 1691 | 1692 | shelfFile['cats'] # ['Kiwi', 'Penny', 'Clover'] 1693 | 1694 | # List all keys in a shelf file: 1695 | 1696 | list(shelfFile.keys()) # ['cats', 'dogs'] 1697 | 1698 | # List all values in a shelf file: 1699 | 1700 | list(shelfFile.values()) # [['Kiwi', 'Penny', 'Clover'], 1701 | # ['Bambi', 'Buzz', 'Elway']] 1702 | ``` 1703 | 1704 | ### 11.32 - Copying and Moving Files and Folders 1705 | 1706 | - The `shutil` (Shell Utilities) module allows you to copy and move files and folders: 1707 | 1708 | ```python 1709 | import shutil 1710 | 1711 | # COPY a file (first argument) to a new folder (second argument): 1712 | 1713 | shutil.copy('/Users/Guest/hello.txt', '/Users/Guest/Delicious') 1714 | 1715 | # COPY and RENAME a file to a new folder ('/Delicious.txt'): 1716 | 1717 | shutil.copy('/Users/Guest/hello.txt', '/Users/Guest/Delicious/spam.txt') 1718 | 1719 | # COPY an entire FOLDER: 1720 | 1721 | shutil.copytree('/Users/Guest/Delicious', '/Users/Guest/Delicious_Backup') 1722 | 1723 | # MOVE a file to a new location: 1724 | 1725 | shutil.move('/Users/Guest/Delicious/spam.txt', '/Users/Guest/Waffles') 1726 | 1727 | # MOVE and RENAME a file to a new location: 1728 | 1729 | shutil.move('/Users/Guest/Delicious/spam.txt', '/Users/Guest/hello.txt') 1730 | ``` 1731 | 1732 | - **NOTE:** `shutil` does not have a method dedicated to renaming a file without copying/moving the file; however, you can accomplish the same result by using the `move()` method and setting the destination path to be the same as the original filepath: 1733 | 1734 | ```python 1735 | shutil.move('/Users/Guest/hello.txt', '/Users/Guest/eggs.txt') 1736 | ``` 1737 | 1738 | ### 11.33 - Deleting Files 1739 | 1740 | - The `os` module has an `unlink()` method that can be used for permanently deleting a **single file**, and a `rmdir()` for permanently deleting an **empty folder**: 1741 | 1742 | ```python 1743 | import os 1744 | 1745 | # Deletes a file: 1746 | 1747 | os.unlink('/Users/Guest/Delicious/eggs.txt') 1748 | 1749 | # Deletes an empty folder: 1750 | 1751 | os.unlink('/Users/Guest/Delicious') 1752 | ``` 1753 | 1754 | - To permanently remove a folder and all of its contents, use the `shutil.rmtree()` method: 1755 | 1756 | ```python 1757 | import shutil 1758 | 1759 | shutil.rmtree('/Users/Guest/Waffles') 1760 | ``` 1761 | 1762 | - A better practice is to send a file/folder to your OS's **trash** or **recycling bin** (rather than permanently deleting the file/folder) by using the [send2trash](https://pypi.org/project/Send2Trash/) third-party module: 1763 | 1764 | ```python 1765 | import send2trash 1766 | 1767 | send2trash.send2trash('/Users/Guest/Delicious/eggs.txt') 1768 | ``` 1769 | 1770 | ### 11.34 - Walking a Directory Tree 1771 | 1772 | - The `os.walk()` method allows you to iterate through and execute code upon all of the files or folders within a specified folder: 1773 | 1774 | ```python 1775 | import os 1776 | 1777 | for folderName, subfolders, filenames in os.walk('/Users/Guest'): 1778 | # Delete subfolders containing the string 'fish' in the subfolder name: 1779 | for subfolder in subfolders: 1780 | if 'fish' in subfolder: 1781 | os.rmdir(subfolder) 1782 | 1783 | # Copy all ".py" files to ".backup" files: 1784 | for file in filenames: 1785 | if file.endswith('.py'): 1786 | shutil.copy( 1787 | os.path.join(folderName, file), 1788 | os.path.join(folderName, file + '.backup') 1789 | ) 1790 | ``` 1791 | 1792 | [Back to TOC](#id-toc) 1793 | 1794 |
1795 | 1796 | ## Section 12: Debugging 1797 | 1798 | ### 12.35 - The raise and assert Statements 1799 | 1800 | - Python automatically raises one of its built-in **exceptions** whenever it tries to run invalid code; however, you can also raise your own exceptions with a `raise` statement. A **traceback** will be logged upon raising the exception, which allows you to see the specific line of code that triggered the exception: 1801 | 1802 | ```python 1803 | raise Exception('This is the error message.) 1804 | ``` 1805 | 1806 | - **TIP:** To save a running log of cleanly formatted error messages (as strings), use the `traceback.format_exc()` module: 1807 | 1808 | ```python 1809 | import traceback 1810 | 1811 | try: 1812 | raise Exception('This is the error message.') 1813 | except: 1814 | errorFile = open('error-log.txt', 'a') 1815 | errorFile.write(traceback.format_exc()) 1816 | efforFile.close() 1817 | print('The traceback info was written to error-log.txt') 1818 | ``` 1819 | 1820 | - An **assertion** can be used to perform a "sanity check". They are intended to address programmer errors rather than user errors. See the following example of a traffic light simulator: 1821 | 1822 | ```python 1823 | mainStreet = {'ns': 'green', 'ew': 'red'} 1824 | 1825 | 1826 | def switchLights(intersection): 1827 | for key in intersection.keys(): 1828 | if intersection[key] == 'green': 1829 | intersection[key] == 'yellow' 1830 | elif intersection[key] == 'yellow': 1831 | intersection[key] == 'red' 1832 | elif intersection[key] == 'red': 1833 | intersection[key] == 'green' 1834 | # This program will raise an exception when the assertion fails by returning 1835 | # 'False' on the second run, in which the N/S light will be 'yellow' and the 1836 | # E/W light will be 'green'. As traffic should only be flowing when one 1837 | # light on the intersection is 'red', the assert statement allows you to 1838 | # immediately detect the problem and take corrective action: 1839 | assert 'red' in intersection.values(), 'Neither light is red!' + str(intersection) 1840 | 1841 | 1842 | switchLights(mainStreet) 1843 | ``` 1844 | 1845 | ### 12.36 - Logging 1846 | 1847 | - Python's `logging` module allows you to create a record of custom messages. Use the `logging.basicConfig()` method to display log messages on your screen while the program runs: 1848 | 1849 | ```python 1850 | import logging 1851 | 1852 | # The following line should appear at or near the top of your program: 1853 | 1854 | logging.basicConfig( 1855 | level=logging.DEBUG, 1856 | format='%(asctime)s - %(levelname)s - %(message)s' 1857 | ) 1858 | 1859 | # Each of the following "debug()" function calls work like "print()" but 1860 | # provide additional information (i.e., timestamp, log level, and custom message): 1861 | 1862 | logging.debug('Start of program') 1863 | 1864 | def factorial(n): 1865 | logging.debug('Start of factorial(%s)' % (n)) 1866 | total = 1 1867 | for i in range(n + 1): 1868 | total *= i 1869 | logging.debug('i is %s, total is %s' % (i, total)) 1870 | logging.debug('Return value is %s' % (total)) 1871 | return total 1872 | 1873 | print(factorial(5)) # (Returns 0, which is incorrect) 1874 | 1875 | logging.debug('End of program') 1876 | 1877 | # In this example, the running log will show that "i" is set to 0 on the first 1878 | # iteration, which results in "total" being set to 0 because any amount 1879 | # times 0 is always equal to 0. Therefore, by reviewing the log, it becomes 1880 | # apparent that the "range()" value should be set to start at 1 rather than 0. 1881 | ``` 1882 | 1883 | - To **disable** logging messages that are present in your code, use the `logging.disable()` method at the top of your code: 1884 | 1885 | ```python 1886 | # Disables logging calls of the given severity level (or lower): 1887 | 1888 | logging.disable(logging.CRITICAL) 1889 | ``` 1890 | 1891 | - **NOTE:** Python recognizes the following 5 [logging levels](https://docs.python.org/3/library/logging.html#levels) (in descending order of severity). Log messages can be created at a specific log level by using the corresponding logging method: 1892 | 1893 | | Log level | Logging method | 1894 | | --------- | -------------------- | 1895 | | CRITICAL | `logging.critical()` | 1896 | | ERROR | `logging.error()` | 1897 | | WARNING | `logging.warning()` | 1898 | | INFO | `logging.info()` | 1899 | | DEBUG | `logging.debug()` | 1900 | 1901 | - To log messages to a **plaintext file** rather than the screen, use the `filename` keyword argument in the `logging.basicConfig()` method: 1902 | 1903 | ```python 1904 | logging.basicConfig( 1905 | filename='myProgramLog.txt' # (Relative pathname) 1906 | level=logging.DEBUG, 1907 | format='%(asctime)s - %(levelname)s - %(message)s' 1908 | ) 1909 | ``` 1910 | 1911 | ### 12.37 - Using the Debugger 1912 | 1913 | - The debugger is a feature in IDLE that allows you to run your program one line at a time. To activate the deubgger: 1914 | 1915 | **1**\. Go to `Debug > Debugger` in the IDLE menu bar. 1916 | 1917 | **2**\. Ensure that the `Stack`, `Source`, `Locals`, and `Globals` checkboxes are all checked (to show the most information). 1918 | 1919 | **3**\. Run your program with the debugger enabled. The execution should pause on the first line. 1920 | 1921 | - Use the following controls to navigate through your code with the dugger: 1922 | 1923 | - `Over` ("Step Over") executes the line of code that appears highlighted in the shell, and then proceeds to execute the next line (basically allows you to execute a single line of code at a time). 1924 | 1925 | - **NOTE:** During this process, any variables that are set or modified will be displayed in the `Locals` and `Globals` boxes. 1926 | 1927 | - `Go` runs the program normally and disables the debugger until reaching (1) the end of the program or (2) a **breakpoint**. 1928 | 1929 | - To set a breakpoint in IDLE's file editor, right click the line at which you want to set a breakpoint and click `Set Breakpoint`. 1930 | 1931 | - `Step` ("Step Into") moves the debugger inside of a function call (if a function is about to be executed). 1932 | 1933 | - `Out` ("Step Out") will keep executing lines within the current function until the function returns. 1934 | 1935 | [Back to TOC](#id-toc) 1936 | 1937 |
1938 | 1939 | ## Section 13: Web Scraping 1940 | 1941 | ### 13.38 - The webbrowser Module 1942 | 1943 | - The `webbrowser` module's `open()` function launches a new browser to a specified URL: 1944 | 1945 | ```python 1946 | import webbrowser 1947 | 1948 | webbrowser.open('https://automatetheboringstuff.com') 1949 | ``` 1950 | 1951 | ### 13.39 - Downloading from the Web with the requests Module 1952 | 1953 | - The [requests](https://requests.readthedocs.io/en/master/) module is a third-party module that allows you to send HTTP/1.1 requests. 1954 | 1955 | ```python 1956 | import requests 1957 | 1958 | # "get()" returns a response object received from the server: 1959 | 1960 | res = requests.get('http://nunit.org/nuget/nunit3-license.txt') 1961 | 1962 | res.status_code # (Displays the response status code, e.g., 200) 1963 | 1964 | res.text # (Displays the body of the text content) 1965 | 1966 | # "raise_for_status()" will raise an exception if a download error occurred: 1967 | 1968 | res.raise_for_status() 1969 | 1970 | # To save the file to your storage device, use then "open()" function in 1971 | # Write-Binary mode by passing "wb" as the second argument. (NOTE: Even if the 1972 | # downloaded page is in plaintext, you must still write binary data--rather 1973 | # than plaintext data--in order to maintain the Unicode encoding of the text): 1974 | 1975 | licenseFile = open('license.txt', 'wb') 1976 | 1977 | # Write the file by using a for loop with the "iter_content()" method. Files 1978 | # are written in "chunks" (of the "bytes" data type), and you can specify the 1979 | # size of each chunk via the "chunk_size" keyword argument (first parameter). 1980 | # (NOTE: Per the "requests" documentation, 128 is the recommended size when 1981 | # streaming a download; however, this value may be modified as necessary): 1982 | 1983 | for chunk in res.iter_content(128): 1984 | licenseFile.write(chunk) # (Will return an integer of bytes written) 1985 | 1986 | licenseFile.close() 1987 | ``` 1988 | 1989 | - **NOTE:** See [here](https://nedbatchelder.com/text/unipain.html) for more information on Python and Unicode. 1990 | 1991 | ### 13.40 - Parsing HTML with the Beautiful Soup Module 1992 | 1993 | - To locate specific HTML elements within an HTML file, you can parse the HTML by using the Beautiful Soup ([beautifulsoup4](https://pypi.org/project/beautifulsoup4/)) third-party module: 1994 | 1995 | ```python 1996 | import bs4 1997 | import requests 1998 | 1999 | # Request an HTML page: 2000 | 2001 | res = requests.get('https://www.amazon.com/dp/1593275994/') 2002 | 2003 | # "BeautifulSoup()" will return a "beautifulsoup" object. The first argument 2004 | # is the content to be parsed, and the second argument is the type of parser 2005 | # you want to use (in this case, HTML): 2006 | 2007 | soup = bs4.BeautifulSoup(res.text, 'html.parser') 2008 | 2009 | # "select()" takes in a string containing the CSS selector you are seeking, 2010 | # and it will return a list of all matching elements. In this case, there 2011 | # will be only one matching element, so it will return a list containing a 2012 | # single tag for the "header-price" from the requested Amazon page: 2013 | 2014 | elements = soup.select( 2015 | """#newOfferAccordionRow > div > div.a-accordion-row-a11y > a > h5 > 2016 | div > div.a-column.a-span4.a-text-right.a-span-last > 2017 | span.a-size-medium.a-color-price.header-price""" 2018 | ) 2019 | 2020 | # Access a matching element's internal text content (e.g., just the contents 2021 | # of a , not the opening/closing tags) via the "text" variable: 2022 | 2023 | elements[0].text # (Includes the price and newline/whitespace characters) 2024 | 2025 | elements[0].text.strip() # (Includes only the price) 2026 | ``` 2027 | 2028 | ### 13.41 - Controlling the Browser with the Selenium Module 2029 | 2030 | - If you need to parse information from a website that requires you to log in or requires some user interaction with JavaScript, then using Beautiful Soup alone will not be sufficient (as you will have to do more than just download an HTML page). To solve such problems, the [Selenium](https://www.seleniumhq.org/) third-party module can be used to launch a browser that can be programmatically controlled by Python: 2031 | 2032 | ```python 2033 | # Unique way to import Selenium: 2034 | 2035 | from selenium import webdriver 2036 | 2037 | # Set the path of your Chrome driver (http://chromedriver.chromium.org/): 2038 | 2039 | chromeDriverPath = '/Users/bronson/Selenium Drivers/chromedriver' 2040 | 2041 | # Open a new Chrome browser that will be controlled by the automated process: 2042 | 2043 | browser = webdriver.Chrome(chromeDriverPath) 2044 | 2045 | # Direct the automated browser to fetch the requested URL: 2046 | 2047 | browser.get('https://automatetheboringstuff.com') 2048 | 2049 | # Target a SINGLE element containing a hyperlink to be clicked: 2050 | 2051 | element = browser.find_element_by_css_selector( 2052 | """body > div.main > div:nth-child(1) > 2053 | ul:nth-child(18) > li:nth-child(1) > a""" 2054 | ) 2055 | 2056 | # "click()" method automates the process of a clicking a hyperlink: 2057 | 2058 | element.click() 2059 | ``` 2060 | 2061 | - **NOTE:** Use `find_elements_by_css_selector()` (plural) to fetch a list of **all** matching elements. Other elements that can be targeted with the `find_element_by_` syntax include: `class_name`, `id`, `link_text` (complete match), `partial_link_text` (partial match), `name`, and `tag_name`. 2062 | 2063 | - **ALSO:** Other browser **nagivation** methods include: `back()`, `forward()`, `refresh()`, and `quit()`. 2064 | 2065 | - Use the `send_keys()` and `submit()` methods to enter text and **submit input**, and use an element's `text` variable to **read** the content of an HTML element: 2066 | 2067 | ```python 2068 | browser.get('https://www.google.com/') 2069 | 2070 | # Target Google's search bar: 2071 | 2072 | searchInput = browser.find_element_by_css_selector( 2073 | '#tsf > div:nth-child(2) > div > div.RNNXgb > div > div.a4bIc > input' 2074 | ) 2075 | 2076 | # Enter the given string argument into the search input: 2077 | 2078 | searchInput.send_keys('python') 2079 | 2080 | # Fire the submit action associated with the search input: 2081 | 2082 | searchInput.submit() 2083 | 2084 | # Target a specific element on the web page (i.e., the first result): 2085 | 2086 | snippet = browser.find_element_by_css_selector( 2087 | '#rso > div:nth-child(1) > div > div > div > div > div.s > div > span' 2088 | ) 2089 | 2090 | # Display the targeted element's inner text content: 2091 | 2092 | snippet.text # 'The official home of the Python Programming Language.' 2093 | ``` 2094 | 2095 | [Back to TOC](#id-toc) 2096 | 2097 |
2098 | 2099 | ## Section 14: Excel, Word, and PDF Documents 2100 | 2101 | ### 14.42 - Reading Excel Spreadsheets 2102 | 2103 | - Python can read and write Excel files via the [openpyxl](https://openpyxl.readthedocs.io/en/stable/) third-party module: 2104 | 2105 | ```python 2106 | # NOTE: Ensure that the CWD is the directory containing your Excel file. 2107 | 2108 | import openpyxl 2109 | 2110 | # Open the Excel file (stored as a 'Workbook' object): 2111 | 2112 | workbook = openpyxl.load_workbook('example.xlsx') # (Located in: ./14-42) 2113 | 2114 | # List the names of all sheets in the workbook: 2115 | 2116 | workbook.sheetnames # ['Sheet1', 'Sheet2', 'Sheet3'] 2117 | 2118 | # Access a specific sheet in the workbook (stored as a 'Worksheet' object): 2119 | 2120 | sheet = workbook['Sheet1'] 2121 | 2122 | # Access the value of a specific cell within a sheet by row/column NAME: 2123 | 2124 | sheet['A1'].value # datetime.datetime(2015, 4, 5, 13, 34, 2) 2125 | str(sheet['A1'].value) # '2015-04-05 13:34:02' 2126 | 2127 | # Access a cell by row/column NUMBER (useful when iterating with a loop): 2128 | 2129 | sheet.cell(row=1, column=2).value # 'Apples' 2130 | ``` 2131 | 2132 | ### 14.43 - Editing Excel Spreadsheets 2133 | 2134 | - Example: 2135 | 2136 | ```python 2137 | import openpyxl 2138 | 2139 | # Create a new 'Workbook' object: 2140 | 2141 | workbook = openpyxl.Workbook() 2142 | 2143 | # Access the workbook's 'Sheet' object 2144 | 2145 | workbook.sheetnames # ['Sheet'] 2146 | sheet = workbook['Sheet'] 2147 | 2148 | # Assign values to sheet cells: 2149 | 2150 | sheet['A1'] = 42 2151 | sheet['A2'] = 'Hello' 2152 | 2153 | # Add a new worksheet to the workbook: 2154 | 2155 | newSheet = workbook.create_sheet() 2156 | workbook.sheetnames # ['Sheet', 'Sheet1'] 2157 | 2158 | # Change the worksheet's title: 2159 | 2160 | newSheet.title = 'My New Sheet' 2161 | workbook.sheetnames # ['Sheet', 'Sheet1'] 2162 | 2163 | # Specify a new worksheet's order and title upon creation: 2164 | 2165 | otherSheet = workbook.create_sheet(index=0, title='My Other Sheet') 2166 | workbook.sheetnames # ['My Other Sheet', 'Sheet', 'My New Sheet'] 2167 | 2168 | # Save the workbook to your storage device: 2169 | 2170 | workbook.save('test.xlsx') 2171 | ``` 2172 | 2173 | ### 14.44 - Reading and Editing PDFs 2174 | 2175 | - The [PyPDF2](https://pythonhosted.org/PyPDF2/) third-party module can extract data from PDF files, or manipulate existing PDFs to produce a new file. Note, however, that there may be some PDF files that PyPDF2 will be unable to process. PyPDF2 cannot extract images, charts, or other media, but it can extract text and return it as a string: 2176 | 2177 | ```python 2178 | import PyPDF2 2179 | import os 2180 | 2181 | os.chdir('/Users/bronson/Udemy/automate-the-boring-stuff-with-python/14-44') 2182 | 2183 | # Open in Read-Binary ('rb') mode because PDFs are binary files: 2184 | 2185 | pdfFile = open('meetingminutes1.pdf', 'rb') 2186 | 2187 | # Pass the 'File' object to PyPDF2's "PdfFileReader()", 2188 | # which will return a 'PDF Reader' object: 2189 | 2190 | reader = PyPDF2.PdfFileReader(pdfFile) 2191 | 2192 | # View the number of pages within the PDF file: 2193 | 2194 | reader.numPages # 19 2195 | 2196 | # "getPage()" returns a 'Page' object (numbering starts at 0): 2197 | 2198 | page = reader.getPage(0) 2199 | 2200 | # "extractText()" returns a string of all text extracted from the page: 2201 | 2202 | page.extractText() 2203 | 2204 | # Print out the text of each page in the PDF file: 2205 | 2206 | for pageNum in range(reader.numPages): 2207 | print(reader.getPage(pageNum).extractText()) 2208 | ``` 2209 | 2210 | - PyPDF2 cannot edit the text of a PDF file, but it can modify a PDF on the **page level** (i.e., you can add, remove, and reorder pages, but you cannot change a specific line of text on a particular page): 2211 | 2212 | ```python 2213 | # Open two PDF files to be combined into a single file: 2214 | 2215 | pdf1File = open('meetingminutes1.pdf', 'rb') 2216 | pdf2File = open('meetingminutes2.pdf', 'rb') 2217 | 2218 | reader1 = PyPDF2.PdfFileReader(pdf1File) 2219 | reader2 = PyPDF2.PdfFileReader(pdf2File) 2220 | 2221 | # Create a new 'Writer' object that will create a new PDF file: 2222 | 2223 | writer = PyPDF2.PdfFileWriter() 2224 | 2225 | # "addPage()" allows you to append pages to a 'Writer' object: 2226 | 2227 | for pageNum in range(reader1.numPages): 2228 | page = reader1.getPage(pageNum) 2229 | writer.addPage(page) 2230 | 2231 | for pageNum in range(reader2.numPages): 2232 | page = reader2.getPage(pageNum) 2233 | writer.addPage(page) 2234 | 2235 | # Open a new 'File' object in Write-Binary mode (will become the new PDF): 2236 | 2237 | outputFile = open('combinedminutes.pdf', 'wb') 2238 | 2239 | # Save the PDF with the 'Writer' object's "write()" method: 2240 | 2241 | writer.write(outputFile) 2242 | 2243 | # Close all files: 2244 | 2245 | outputFile.close() 2246 | pdf1File.close() 2247 | pdf2File.close() 2248 | ``` 2249 | 2250 | ### 14.45 - Reading and Editing Word Documents 2251 | 2252 | - Use the [python-docx](https://python-docx.readthedocs.io/en/latest/) third-party module to create and modify Word documents. `python-docx` divides a Word document into three different data structures: a 'Document' object, which contains a list of 'Paragraph' objects, which each contain a list of one or more 'Run' objects (a new run occurs in a paragraph whenever there is a change to the style, e.g., bold, italics, etc.): 2253 | 2254 | ```python 2255 | # Import with 'docx' despite the fact that the module is named 'python-docx': 2256 | 2257 | import docx 2258 | 2259 | filePath = '/Users/bronson/Udemy/automate-the-boring-stuff-with-python/14-45/' 2260 | 2261 | # Create a 'Document' object from the Word document file: 2262 | 2263 | documentObject = docx.Document(filePath + 'demo.docx') 2264 | 2265 | # View the text of a 'Paragraph' object: 2266 | 2267 | documentObject.paragraphs # (Returns a list of all 'Paragraph' objects) 2268 | 2269 | paragraph = documentObject.paragraphs[1] 2270 | 2271 | paragraph.text # 'A plain paragraph having some bold and some italic.' 2272 | 2273 | # Modify a paragraph's "style", as defined within Word: 2274 | 2275 | paragraph.style # 'Normal' 2276 | 2277 | paragraph.style = 'Title' 2278 | 2279 | # View the text of a 'Run' object (split up based on changes to text style): 2280 | 2281 | paragraph.runs # (Returns a list of all 'Run' objects) 2282 | 2283 | run = paragraph.runs[1] 2284 | 2285 | run.text # 'bold' 2286 | 2287 | # Check if a 'Run' is bold, italic, or underline (returns a Boolean): 2288 | 2289 | run.bold # True 2290 | 2291 | run.italic # False 2292 | 2293 | run.underline # False 2294 | 2295 | # Modify a 'Run' object's bold, italic, or underline status: 2296 | 2297 | run.underline = True 2298 | 2299 | # Modify a 'Run' object's text: 2300 | 2301 | run.text = 'bold and underline' 2302 | 2303 | # Add a new paragraph to the end of the document: 2304 | 2305 | newParagraph = documentObject.add_paragraph('New paragraph. ') 2306 | 2307 | # Add additional text content to the new paragraph via "add_run()": 2308 | 2309 | newParagraph.add_run('New run.') 2310 | 2311 | # Save the Word document: 2312 | 2313 | documentObject.save('demo2.docx') 2314 | ``` 2315 | 2316 | - **NOTE:** The `add_paragraph()` and `add_run()` methods can only add content to the end of a file. If you want to insert additional content in the middle of a file, then you will have to create a new 'Document' object that will have its contents be copied from the source document, and you can add new content in the midst of this copying process. 2317 | 2318 | [Back to TOC](#id-toc) 2319 | 2320 |
2321 | 2322 | ## Section 15: Email 2323 | 2324 | ### 15.46 - Sending Emails 2325 | 2326 | - Simple Mail Transfer Protocol (SMTP) is an Internet standard for email transmission. Python implements SMTP via its built-in `smtplib` module: 2327 | 2328 | ```python 2329 | import smtplib 2330 | 2331 | # Create a "Connection" object that will be used to connect to the specified 2332 | # SMTP server (i.e., the domain name of your email server). The port number 2333 | # for an SMTP server is 587 (via TLS) or 465 (via SSL): 2334 | 2335 | conn = smtplib.SMTP('smtp.gmail.com', 587) 2336 | 2337 | # Establish the connection with the SMTP server (allowing Internet traffic 2338 | # from your Python program). If the connection is successful, you should 2339 | # receive a 2XX HTTP response code: 2340 | 2341 | conn.ehlo() 2342 | 2343 | # Start TLS encryption to encrypt your email login password: 2344 | 2345 | conn.starttls() 2346 | 2347 | # Log in to your account (first argument is username; second is password). 2348 | # For Gmail, you must generate an "App password": 2349 | 2350 | conn.login('sender@gmail.com', 'yourAppPassword') 2351 | 2352 | # Send email. The first argument is the "From" address, and the second is 2353 | # the "To" address. The third argument is the email content, including 2354 | # header information and the body of the email's message. You must include 2355 | # two newline characters to separate the header and body. "sendmail()" will 2356 | # return a dictionary object containing any emails that it FAILED to send: 2357 | 2358 | conn.sendmail( 2359 | 'sender@gmail.com', 2360 | 'recipient@example.com', 2361 | 'Subject: Straw Dogs\n\nToday the good life means making full use of science and technology...it means seeking peace...it means cherishing freedom.' 2362 | ) 2363 | 2364 | # Close the SMTP connection: 2365 | 2366 | conn.quit() 2367 | ``` 2368 | 2369 | ### 15.47 - Checking Your Email Inbox 2370 | 2371 | - The Internet Message Access Protocol (IMAP) is an Internet standard protocol used by email clients to retrieve email messages from a mail server over TCP/IP. Python implements IMAP via its built-in `imaplib` module. However, [imapclient](https://imapclient.readthedocs.io/en/2.1.0/) and [pyzmail](http://www.magiksys.net/pyzmail/) are two third-party modules that may make using IMAP more user-friendly: 2372 | 2373 | ```python 2374 | import imapclient 2375 | import pyzmail 2376 | 2377 | # Create a "Connection" object to be used with the specified host: 2378 | 2379 | conn = imapclient.IMAPClient('imap.gmail.com', port=993, ssl=True) 2380 | 2381 | # Log in: 2382 | 2383 | conn.login('doe@gmail.com', 'yourAppPassword') 2384 | 2385 | # View all email folders: 2386 | 2387 | conn.list_folders() 2388 | 2389 | # Select an email folder (e.g., inbox) as the first argument. The second 2390 | # argument can be used to toggle "Read Only" mode (if you want to prevent 2391 | # emails from being deleted): 2392 | 2393 | conn.select_folder('INBOX', readonly=True) 2394 | 2395 | # Find an email via the "search()" method. The first argument is a list 2396 | # containing strings formatted according to the imapclient syntax. The 2397 | # method will return a string of unique IDs referencing a particular email: 2398 | 2399 | UIDs = conn.search(['SINCE 20-Aug-2018']) 2400 | 2401 | # Translate a UID into an actual email via the "fetch()" method. The first 2402 | # argument is a list containing the desired UID, and the second argument 2403 | # specifies which parts of an email to retrieve: 2404 | 2405 | rawMessage = conn.fetch([29068], ['BODY[]', 'FLAGS']) 2406 | 2407 | # Parse the body of the raw email message and store it as a "Message" object: 2408 | 2409 | message = pyzmail.PyzMessage.factory(rawMessage[29068][b'BODY[]']) 2410 | 2411 | # View subject line: 2412 | 2413 | message.get_subject() 2414 | 2415 | # View sender/recipient: 2416 | 2417 | message.get_addresses('from') 2418 | message.get_addresses('to') 2419 | message.get_addresses('bcc') 2420 | 2421 | # The body of a message can be plaintext, HTML, or a combination of the two. 2422 | # The following can be used to view the length of plaintext and HTML portions. 2423 | # If the specified content does not exist, then "None" will be the value: 2424 | 2425 | message.text_part 2426 | message.html_part 2427 | 2428 | # Retrieve and decode the text content of the email message (usually UTF-8): 2429 | 2430 | message.text_part.get_payload().decode('UTF-8') 2431 | 2432 | # If you have "Read Only" mode disabled, you can delete messages via the 2433 | # "delete_messages()" method that accepts a list of all UIDs to be deleted. 2434 | # (NOTE: This is a PERMANENT deletion. The email is NOT moved to "Trash"): 2435 | 2436 | conn.delete_messages([29068]) 2437 | 2438 | # Log out: 2439 | 2440 | conn.logout() 2441 | ``` 2442 | 2443 | - **IMPORTANT:** If you are receiving an SSLCertVerificationError while using `imapclient`, you may need to [downgrade to version 0.13](https://stackoverflow.com/questions/34714342/imapclient-error-on-windows). If you are unable to install `pyzmail`, you may need to install [pyzmail36](https://stackoverflow.com/questions/40924672/pip-install-pyzmail-error-message) instead. 2444 | 2445 | [Back to TOC](#id-toc) 2446 | 2447 |
2448 | 2449 | ## Section 16: GUI Automation 2450 | 2451 | ### 16.48 - Controlling the Mouse from Python 2452 | 2453 | - [PyAutoGUI](https://pyautogui.readthedocs.io/en/latest/) is a third-party Python module for programmatically controlling the mouse and keyboard: 2454 | 2455 | ```python 2456 | import pyautogui 2457 | 2458 | # Obtain the resolution of your screen (width, height): 2459 | 2460 | width, height = pyautogui.size() 2461 | 2462 | # Obtain the current coordinates of the mouse cursor (width, height). 2463 | # The "width" value indicates the number of pixels from the LEFT of the 2464 | # screen, and the "height" value indicates the number from the TOP. 2465 | # (NOTE: Because the starting position is (0, 0), that means the max position 2466 | # will be one pixel less than the max screen width/height): 2467 | 2468 | pyautogui.position() 2469 | 2470 | # Move the mouse cursor to an ABSOLUTE position by specifying the width 2471 | # coordinate (first argument), the height coordinate (second argument), 2472 | # and the duration in seconds for the movement to occur (third argument): 2473 | 2474 | pyautogui.moveTo(840, 525, duration=0.5) 2475 | 2476 | # Move to the mouse cursor to a RELATIVE position (in relation to the current 2477 | # position of the mouse) via the "moveRel()" method following the same 2478 | # procedure explained above: 2479 | 2480 | pyautogui.moveRel(-10, 0, duration=0.25) 2481 | 2482 | # Left click on an element at the specified position. If no coordinates are 2483 | # given, then the mouse will simply be clicked at its current position: 2484 | 2485 | pyautogui.click(450, 10) 2486 | ``` 2487 | 2488 | - **NOTE:** The "click" functionality also includes the following methods: `doubleClick()`, `rightClick()`, and `middleClick()`. Additionally, you can perform **click-and-drag** operations in the same manner as `moveTo()` and `moveRel()` but with the left mouse button treated as being held down by using `dragTo()` and `dragRel()` 2489 | 2490 | - **TIP:** If your program ever results in the loss of control over your mouse cursor, force the cursor to the top left corner of the screen (0, 0) to kill the process by triggering PyAutoGUI's **failsafe exception**. 2491 | 2492 | - Run the following code from the terminal (not IDLE) to see your current mouse cursor position in real-time. This is useful for planning out all of the locations that you want your program to click: 2493 | 2494 | ```python 2495 | import pyautogui, sys 2496 | 2497 | print('Press Ctrl-C to quit.') 2498 | 2499 | try: 2500 | while True: 2501 | x, y = pyautogui.position() 2502 | positionStr = 'X: ' + str(x).rjust(4) + ' Y: ' + str(y).rjust(4) 2503 | print(positionStr, end='') 2504 | print('\b' * len(positionStr), end='', flush=True) 2505 | except KeyboardInterrupt: 2506 | print('\n') 2507 | ``` 2508 | 2509 | - Alternatively, PyAutoGUI has a method called `displayMousePosition()` that operates in a similar manner. 2510 | 2511 | ### 16.49 - Controlling the Keyboard from Python 2512 | 2513 | - Example: 2514 | 2515 | ```python 2516 | # "typewrite()" sends virtual keypresses to the computer. It can be used 2517 | # in conjunction with "click()" to first click on a text input field. You 2518 | # can specify an "interval" to add a delay (in seconds) between each keypress: 2519 | 2520 | pyautogui.click(1200, 400) # ( Also accepts tuple: click((1200, 400)) ) 2521 | 2522 | pyautogui.typewrite('Hello, world!', interval=0.2) 2523 | 2524 | # To use non-character keys (e.g., left arrow), you must specify the input 2525 | # as strings in a list: 2526 | 2527 | pyautogui.typewrite(['a', 'b', 'left', 'left', 'X', 'Y']) # 'XYab' 2528 | 2529 | # Press a single key: 2530 | 2531 | pyautogui.press('F1') 2532 | 2533 | # Trigger a keyboard shortcut: 2534 | 2535 | pyautogui.hotkey('command', 'f') # (Opens search dialog) 2536 | ``` 2537 | 2538 | - **NOTE:** You can view a list of all possible keys that can be accessed by `typewrite()` by accessing `pyautogui.KEYBOARD_KEYS` 2539 | 2540 | ### 16.50 - Screenshots and Image Recognition 2541 | 2542 | - With PyAutoGUI, you can save a screenshot to an absolute or relative path: 2543 | 2544 | ```python 2545 | pyautogui.screenshot('example.png') 2546 | ``` 2547 | 2548 | - If you have a cropped image of an element that is presently displayed on your screen, you can locate the coordinates of the element by using `locateOnScreen()` or `locateCenterOnScreen()`, which is useful for targeting a specific element for to be clicked: 2549 | 2550 | ```python 2551 | # Displays the coordinates of the element's top left corner, along with 2552 | # width and height of the found element: 2553 | 2554 | pyautogui.locateOnScreen('crop.png') # (1690, 516, 64, 64) 2555 | 2556 | # Displays the coordinates of the element's center point on screen: 2557 | 2558 | pyautogui.locateCenterOnScreen('crop.png') # (1722, 548) 2559 | ``` 2560 | 2561 | - **NOTE:** These image recognition methods are computationally expensive and take time to complete (and therefore will not work on moving content). Additionally, the element on screen must be a **pixel perfect** match of the reference image. 2562 | 2563 | [Back to TOC](#id-toc) 2564 | --------------------------------------------------------------------------------