├── .idea └── vcs.xml ├── README.md ├── item_01_version_of_python.py ├── item_02_PEP8Style.py ├── item_03_Difference_bytes_str_unicode.py ├── item_04_helper_function.py ├── item_05_slice_sequence.py ├── item_06_avoid_using.py ├── item_07_list_not_map_filter.py ├── item_08_no_more_than_2_expressions.py ├── item_09_generator_expressions.py ├── item_10_prefer_enumerate.py ├── item_11_use_zip.py ├── item_12_avoid_else.py ├── item_13_try_except_else_finally.py ├── item_14_prefer_exceptions.py ├── item_15_closure_variable_scope.py ├── item_16_address.txt ├── item_16_generators_instead_of_lists.py ├── item_17_be_defensive.py ├── item_17_my_numbers.txt ├── item_18_reduce_visual_noise.py ├── item_19_provide_optimal_behavior.py ├── item_20_use_none_and_docstrings.py ├── item_21_enforce_clarity.py ├── item_22_prefer_helper_classes.py ├── item_23_accepts_functions_4_interfaces.py ├── item_24_use_classmethod.py ├── item_25_init_parent_classes_with_super.py ├── item_26_when_use_multiple_inheritance.py ├── item_27_prefer_public_attributes.py ├── item_28_inherit_from_collections_abc.py ├── item_29_use_plain_attributes.py ├── item_30_consider_property.py ├── item_31_use_descriptors.py ├── item_32_use_getattr.py ├── item_33_validate_subclass.py ├── item_34_register_class_existence.py ├── item_35_annotate_class_attributes.py ├── item_36_use_subprocess.py ├── item_37_use_threads.py ├── item_38_use_lock.py ├── item_39_use_queue.py ├── item_40_consider_coroutines.py ├── item_41_consider_concurrent_futures.py ├── item_42_define_function_decorators.py ├── item_43_consider_contexlib.py ├── item_44_make_pickle_reliable.py ├── item_45_use_datetime.py ├── item_46_use_built_in_algorithm.py ├── item_47_use_decimal.py ├── item_48_community_built_modules.py ├── item_49_write_docstrings_4_everything.py ├── item_50_use_packages.py ├── item_51_define_a_root_exception.py ├── item_52_break_circular_dependencies.py ├── item_53_use_virtual_environments.py ├── item_54_consider_module_scoped_code.py ├── item_55_use_repr_strings.py ├── item_56_test_utils.py ├── item_56_unittest.py ├── item_56_utils.py ├── item_57_pdb.py ├── item_58_profile_before_optimizing.py ├── item_59_use_tracemalloc.py ├── item_59_use_tracemalloc_top_n.py ├── item_59_use_tracemalloc_using_gc.py ├── item_59_use_tracemalloc_waste_memory.py └── item_59_use_tracemalloc_with_trace.py /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /item_01_version_of_python.py: -------------------------------------------------------------------------------- 1 | # Chapter 1: Pythonic thinking 2 | 3 | 4 | # The idioms of a programming language are defined by its users. Over the 5 | # years, the Python community has come to use the adjective Pythonic to 6 | # describe code that follows a particular style. The Pythonic style isn't 7 | # regimented or enforced by the compiler. It has emerged over time through 8 | # experience using the language and working with others. Python programmers 9 | # prefer to be explicit, to choose simple over complex, and to maximize 10 | # readability (type import this). 11 | 12 | # Programmers familiar with other language may try to write Python as if it's 13 | # C++, Java, or whatever know best. New programmers may still be getting 14 | # comfortable with the vast range of concepts expressible in Python. It's 15 | # important for everyone to know the best--the Pythonic--way to do the most 16 | # common things in Python. These patterns will affect every program you write. 17 | 18 | 19 | # Item 1: Know which version of python you're using 20 | 21 | 22 | # $ python --version 23 | # Python 2.7.12 24 | # 25 | # $ python3 --version 26 | # Python 3.5.2 27 | 28 | 29 | import sys 30 | print(sys.version_info) 31 | # sys.version_info(major=2, minor=7, micro=12, releaselevel='final', serial=0) 32 | 33 | print(sys.version) 34 | # 2.7.12 (default, Nov 19 2016, 06:48:10) 35 | # [GCC 5.4.0 20160609] 36 | 37 | 38 | # Things to Remember 39 | 40 | # 1. There are two major version of Python still in active use: Python 2 and 41 | # Python 3. 42 | # 2. There are multiple popular runtimes for Python: CPython, Jython, 43 | # IronPython, PyPy, etc. 44 | # 3. Be sure that the command-line for running Python on your system is the 45 | # version you expect it to be. 46 | # 4. Prefer Python 3 for your next project because that is the primary focus 47 | # of the Python community. 48 | -------------------------------------------------------------------------------- /item_02_PEP8Style.py: -------------------------------------------------------------------------------- 1 | # Item 2: Follow the PEP 8 style guide 2 | 3 | 4 | # Whitespace: In Python, whitespace is syntactically significant. Python 5 | # programmers are especially sensitive to the effects of whitespace on 6 | # code clarity. 7 | 8 | # 1. Use spaces instead of tabs for indentation. 9 | # 2. Use four spaces for each level of syntactically significant indenting. 10 | # 3. Lines should be 79 characters in length or less. 11 | # 4. Continuations of long expressions onto additional lines should be 12 | # indented by four extra spaces from their normal indentation level. 13 | # 5. In a file, functions and classes should be separated by two blank lines. 14 | # 6. In a class, methods should be separated by one blank line. 15 | # 7. Don't put spaces around list indexes, function calls, or keyword 16 | # argument assignments. 17 | # 8. Put one-and only one-space before and after variable assignments. 18 | 19 | 20 | # Naming: PEP 8 suggests unique styles of naming for different part in the 21 | # language. 22 | 23 | # 1. Functions, variables, and attributs should be in lovercase_underscore 24 | # format. 25 | # 2. Protected instance attributes should be in _leading_underscore format. 26 | # 3. Private instance attributes should be in __double_leading_underscore 27 | # format. 28 | # 4. Classes and exceptions should be in CapitalizedWord format. 29 | # 5. Module-level constants should be in ALL_CAPS format. 30 | # 6. Instance methods in classes should use self as the name of the first 31 | # parameter (which refers to the object). 32 | # 7. Class methods should use cls as the name of the first parameter (which 33 | # refers to the class). 34 | 35 | 36 | # Expressions and Statements: The Zen of Python states: "There should be one- 37 | # and preferably only one-obvious way to do it." 38 | 39 | # 1. Use inline negation (if a is not b) instead of negative of positive 40 | # expressions (if not a is b) 41 | # 2. Don't check for empty value (like [] or '') by checking the length 42 | # (if len(somelist) == 0). Use if not somelist and assume empty values 43 | # implicitly evaluate to False. 44 | # 3. The same thing goes for non-empty values (like [1] or 'hi'). The statement 45 | # if somelist is implicitly True for non-empty values. 46 | # 4. Avoid single-line if statements, for and while loops, and except compound 47 | # statements. Spread these over multiple lines for clarity. 48 | # 5. Always put import statements as the top of a file. 49 | # 6. Always use absolute names for modules when importing them, not names 50 | # relative to the current module's own path. For example, to import the foo 51 | # module for the bar package, you should do from bar import foo, not just 52 | # import foo. 53 | # 7. If you must do relative imports, use the explicit syntax from . import foo. 54 | # 8. Imports should be in sections in the following order: standard library 55 | # modules, third-party modules, your own modules. Each subsection should 56 | # have imports in alphabetical order. 57 | 58 | 59 | # Things to Remember 60 | 61 | # 1. Always follow the PEP 8 style guide when writing Python code. 62 | # 2. Sharing a common style with the larger Python community facilitates 63 | # collaboration with others. 64 | # 3. Using a consistent style makes it easier to modify your own code later. 65 | -------------------------------------------------------------------------------- /item_03_Difference_bytes_str_unicode.py: -------------------------------------------------------------------------------- 1 | # Item 3: Know the difference between bytes, str, and unicode 2 | import os 3 | 4 | # In Python 3, there are two types that represent sequences of characters: 5 | # bytes and str. Instances of bytes contain raw 8-bit values. Instances of 6 | # str contain Unicode characters. 7 | 8 | # In Python 2, there two types that represent sequences of characters: str and 9 | # unicode. In contrast to Python 3, instances of str contain raw 8-bit values. 10 | # Instances of unicode contain Unicode characters. 11 | 12 | # There are many ways to represent Unicode characters as binary data (raw 13 | # 8-bits values). The most common encoding in UTF-8. Importantly, str 14 | # instances in Python 3 and unicode instances in Python 2 do not have an 15 | # associated binary encoding. To convert Unicode characters to binary data, 16 | # you must use the encode method. To convert binary data to Unicode 17 | # characters, you must use the decode method. 18 | 19 | # When you're writing Python programs, it's important to do encoding and 20 | # decoding of Unicode at the furthest boundary of your interfaces. The core of 21 | # your program should use Unicode character types (str in Python 3, unicode in 22 | # Python 2) and should not assume any thing about character encodings. This 23 | # approach allows you to be very accepting of alternative text encodings 24 | # (such as Latin-1, Shift JIS, and Big5) while being strict about your output 25 | # text encoding (idealy, UTF-8). 26 | 27 | # The split between character types leads to two common situations in Python 28 | # code: 29 | # 1. You want to operate on raw 8-bit values that are UTF-8-encoded characters 30 | # (or some other encoding). 31 | # 2. You want to operate on Unicode characters that have no specific encoding. 32 | 33 | # You'll often need two helper functions to convert between these two cases 34 | # and and to ensure that the type of input values matches your code's 35 | # expectations. 36 | 37 | # In Python 3, you'll need one method that takes a str or bytes and always 38 | # returns a str. 39 | 40 | 41 | def to_str(bytes_or_str): 42 | if isinstance(bytes_or_str, bytes): 43 | value = bytes_or_str.decode('utf-8') 44 | else: 45 | value = bytes_or_str 46 | return value # Instance of str 47 | 48 | 49 | # You'll need another method that takes a str and bytes and always returns a 50 | # bytes. 51 | 52 | 53 | def to_bytes(bytes_or_str): 54 | if isinstance(bytes_or_str, str): 55 | value = bytes_or_str.encode('utf-8') 56 | else: 57 | value = bytes_or_str 58 | return value # Instance of str 59 | 60 | 61 | # In Python 2, you'll need one method that takes a str or unicode and always 62 | # returns a unicode. 63 | 64 | 65 | def to_unicode(unicode_or_str): 66 | if isinstance(unicode_or_str, str): 67 | value = unicode_or_str.decode('utf-8') 68 | else: 69 | value = unicode_or_str 70 | return value # Instance of unicode 71 | 72 | 73 | # You'll need another method that takes str or unicode and always returns a str. 74 | 75 | 76 | def to_str(unicode_or_str): 77 | if isinstance(unicode_or_str): 78 | value = unicode_or_str.encode('utf-8') 79 | else: 80 | value = unicode_or_str 81 | return value # Instance of str 82 | 83 | 84 | # There are two big gotchas when dealing with raw 8-bit values and Unicode 85 | # characters in Python. 86 | 87 | # The first issue is that in Python 2, unicode and str instances seem to be 88 | # the same type when a str only contains 7-bit ASCII characters. 89 | # 1. You can combine such a str and unicode together using the + operator. 90 | # 2. You can compare such str and unicode instances using equality and 91 | # inequality operators. 92 | 93 | # All of this behavior means that you can often pass a str or unicode instance 94 | # to a function expecting one or the other and things will just work (as long 95 | # as you're only dealing with 7-bit ASCII). In Python 3, bytes and str 96 | # instances are never equivalent-not even the empty string-so you must be more 97 | # deliberate about the types of character sequences that you're passing around. 98 | 99 | # The second issue is that in Python 3, operations involving file handles 100 | # (returned by the open built-in function) default to UTF-8 encoding. In 101 | # Python 2, file operations default to binary encoding. This causes surprising 102 | # failures, especially for programmers accustomed to Python 2. 103 | 104 | # For example, say you want to write some random binary data to a file. In 105 | # Python 2, this works. In Python 3, this breaks. 106 | 107 | 108 | with open('random.bin', 'w') as f: 109 | f.write("random") 110 | # f.write(os.urandom(10)) 111 | 112 | # TypeError: write() argument must be str, not bytes 113 | 114 | 115 | # The cause of this exception is the new encoding argument for open that was 116 | # added in Python 3. This parameter defaults to 'utf-8'. That makes read and 117 | # write operations on file handles expect str instances containing Unicode 118 | # characters instead of bytes instances containing binary data. 119 | 120 | # To make this work properly, you must indicate that the data is being 121 | # opened in write binary mode ('wb') instead of write character mode ('w'). 122 | # Here, I use open in a way that works correctly in Python 2 and Python 3: 123 | 124 | 125 | with open('random.bin', 'wb') as f: 126 | f.write(os.urandom(10)) 127 | 128 | 129 | # This problem also exists for reading data from files. The solution is the 130 | # same: Indicate binary mode by using 'rb' instead of 'r' when opening a file. 131 | 132 | 133 | # Things to Remember 134 | 135 | # 1. In Python 3, bytes contains sequences of 8-bit values, str contains 136 | # sequences of Unicode characters. bytes and str instances can't be 137 | # used together with operators (like > or +). 138 | # 2. In Python 2, str contains sequences of 8-bit values, unicode contains 139 | # sequences of Unicode characters. str and unicode can be used together 140 | # with operators if the str only contains 7-bit ASCII characters. 141 | # 3. Use helper functions to ensure that the inputs you operate on are the 142 | # type of character sequence you expect (8-bit values, UTF-8 encoded 143 | # characters, Unicode characters, etc.) 144 | # 4. If you want to read or write binary data to/from a file, always open the 145 | # file using a binary mode (like 'rb' or 'wb'). 146 | -------------------------------------------------------------------------------- /item_04_helper_function.py: -------------------------------------------------------------------------------- 1 | # Item 4: Write helper functions instead of complex expressions 2 | 3 | 4 | # Python's pithy syntax makes it easy to write single-line expressions that 5 | # implement a lot of logic. For example, say you want to decode the query 6 | # string from a URL. Here, each query string parameter represents an integer 7 | # value: 8 | 9 | 10 | from urllib.parse import parse_qs 11 | my_values = parse_qs('red=5&blue=0&green=', keep_blank_values=True) 12 | print(repr(my_values)) 13 | # $ python3 item_04_helper_function.py 14 | # {'red': ['5'], 'green': [''], 'blue': ['0']} 15 | 16 | 17 | # Some query string parameters may have multiple values, some may have single 18 | # values, some may be present but have blank values, and some may be missing 19 | # entirely. Using the get method on the result dictionary will return 20 | # different values in each circumstance. 21 | 22 | 23 | print("Red: ", my_values.get('red')) 24 | print("Green: ", my_values.get('green')) 25 | print("Opacity: ", my_values.get('opacity')) 26 | # $ python3 item_04_helper_function.py 27 | # Red: ['5'] 28 | # Green: [''] 29 | # Opacity: None 30 | 31 | 32 | # It'd be nice if a default value of 0 was assigned when a parameter isn't 33 | # supplied or is blank. You might choose to do this with Boolean expressions 34 | # because it feels like this logic doesn't merit a whole if statement or 35 | # helper function quite yet. 36 | 37 | # Python's syntax makes this choice all too easy. The trick here is that the 38 | # empty string, the empty list, and zero all evaluate to False implicitly. 39 | # Thus, the expressions below will evaluate to the subexpression after the or 40 | # operator when the first subexpression is False. 41 | 42 | 43 | # For query string 'red=5&blue=0&green=' 44 | red = my_values.get('red', [''])[0] or 0 45 | green = my_values.get('green', [''])[0] or 0 46 | opacity = my_values.get('opacity', [''])[0] or 0 47 | print("Red: %r" % red) 48 | print("Green: %r" % green) 49 | print("Opacity: %r" % opacity) 50 | # Red: '5' 51 | # Green: 0 52 | # Opacity: 0 53 | 54 | 55 | # The red case works because the key is present in the my_values dictionary. 56 | # The value is a list with one member: the string '5'. This string implicitly 57 | # evaluates to True, so red is assigned to the first part of the or 58 | # expression. 59 | 60 | # The green case works because the value in the my_values dictionary is a list 61 | # with one member: an empty string. The empty string implicitly evaluates to 62 | # False, causing the or expression to evaluate to 0. 63 | 64 | # The opacity case works because the value in the my_values dictionary is 65 | # missing altogether. The behavior of the get method is to return its second 66 | # argument if the key doesn't exist in the dictionary. The default value in 67 | # this case is a list with one member, an empty string. When opacity isn't 68 | # found in the dictionary, this code does exactly the same thing as the green 69 | # case. 70 | 71 | # However, this expression is difficult to read and it still doesn't do 72 | # everything you need. You'd also want to ensure that all the parameter values 73 | # are integers so you can use them in mathematical expressions. To do that, 74 | # you'd wrap each expression with the int built-in function to parse the 75 | # string as an integer. 76 | 77 | 78 | red = int(my_values.get('red', [''])[0] or 0) 79 | print('red: ', red) 80 | # red: 5 81 | 82 | 83 | # This is now extremely hard to read. There's so much visual noise. The code 84 | # isn't approachable. A new reader of the code would have to spend too much 85 | # time picking apart the expression to figure out what it actually does. Even 86 | # though it's nice to keep things short, it's not worth trying to fit this all 87 | # on one line. 88 | 89 | # Python 2.5 added if/else conditional-or ternary-expressions to make cases 90 | # like this clearer while keeping the code short. 91 | 92 | 93 | red = my_values.get('red', ['']) 94 | red = int(red[0]) if red[0] else 0 95 | print('red: ', red) 96 | # red: 5 97 | 98 | 99 | # This is better. For less complicated situations, if/else conditional 100 | # expressions can make things very clear. But the example above is still not 101 | # as clear as the alternative of a full if/else statement over multiple lines. 102 | # Seeing all of the logic spread out like this makes the dense version seem 103 | # even more complex. 104 | 105 | 106 | green = my_values.get('green', ['']) 107 | if green[0]: 108 | green = int(green[0]) 109 | else: 110 | green = 0 111 | 112 | 113 | # Writing a helper function is the way to go, especially if you need to use 114 | # this logic repeatedly. 115 | 116 | 117 | def get_first_int(values, key, default=0): 118 | found = values.get(key, ['']) 119 | if found[0]: 120 | found = int(found[0]) 121 | else: 122 | found = default 123 | return found 124 | 125 | 126 | # The calling code is much clearer than complex expression using or and the 127 | # two-line version using the if/else expression. 128 | 129 | 130 | green = get_first_int(my_values, 'green') 131 | print('green: ', green) 132 | # green: 0 133 | 134 | 135 | # As soon as your expressions get complicated, it's time to consider 136 | # splitting them into smaller pieces and moving logic into helper functions. 137 | # What you gain in readability always outweighs what brevity may have 138 | # afforded you. Don't let Python's pithy syntax for complex expressions get 139 | # you into a mess like this. 140 | 141 | 142 | # Things to remember 143 | 144 | # 1. Python's syntax makes it all too easy to write single-line expressions 145 | # that are overly complicated and difficult to read. 146 | # 2. Move complex expressions into helper functions, especially if you need to 147 | # use the same logic repeatedly. 148 | # 3. The if/else expression provides a more readable alternative to using 149 | # Boolean operators like or and adn in expressions. 150 | -------------------------------------------------------------------------------- /item_05_slice_sequence.py: -------------------------------------------------------------------------------- 1 | # Item 5: Know hot to slice sequences 2 | 3 | 4 | # Python includes syntax for slicing sequences into pieces. Slicing lets you 5 | # access a subset of a sequence's items with minimal effort. The simplest uses 6 | # for slicing are the built-in types list, str, and bytes. Slicing can be 7 | # extended to any Python class that implements the __getitem__ and __setitem__ 8 | # special methods (see Item 28: Inherit form collections.abc for custom 9 | # container types). 10 | 11 | # The basic form of the slicing syntax is somelist[start:end], where start is 12 | # inclusive and end is exclusive. 13 | 14 | 15 | a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] 16 | print('First four: ', a[:4]) 17 | print('Last four: ', a[-4:]) 18 | print('Middle two: ', a[3:-3]) 19 | # First four: ['a', 'b', 'c', 'd'] 20 | # Last four: ['e', 'f', 'g', 'h'] 21 | # Middle two: ['d', 'e'] 22 | 23 | 24 | # When slicing from the start of a list, you should leave out the zero index 25 | # to reduce visual noise. 26 | 27 | 28 | assert a[:5] == a[0:5] 29 | 30 | 31 | # When slicing to the end of a list, you should leave out the final index 32 | # because it's redundant. 33 | 34 | 35 | assert a[5:] == a[5:len(a)] 36 | 37 | 38 | # Using negative numbers for slicing is helpful for doing offsets relative 39 | # to the end of a list. All of these forms of slicing would be clear to a new 40 | # reader of your code. There are no surprises, and I encourage you to use 41 | # these variations. 42 | 43 | 44 | print(a[:]) 45 | print(a[:5]) 46 | print(a[:-1]) 47 | print(a[4:]) 48 | print(a[-3:]) 49 | print(a[2:5]) 50 | print(a[2:-1]) 51 | print(a[-3:-1]) 52 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] 53 | # ['a', 'b', 'c', 'd', 'e'] 54 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g'] 55 | # ['e', 'f', 'g', 'h'] 56 | # ['f', 'g', 'h'] 57 | # ['c', 'd', 'e'] 58 | # ['c', 'd', 'e', 'f', 'g'] 59 | # ['f', 'g'] 60 | 61 | 62 | # Slicing deals properly with start and end indexes that are beyond the 63 | # boundaries of the list. That makes it easy for your code to establish 64 | # a maximum length to consider for an input sequence. 65 | 66 | 67 | first_twenty_items = a[:20] 68 | last_twenty_items = a[-20:] 69 | print(first_twenty_items) 70 | print(last_twenty_items) 71 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] 72 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] 73 | 74 | 75 | # In contrast, accessing the same index directly causes an exception. 76 | # print(a[20]) 77 | # IndexError: list index out of range 78 | 79 | 80 | # Note 81 | # Beware that indexing a list by a negative variable is one of the few 82 | # situations in which you can get surprising results from slicing. For 83 | # example, the expression somelist[-n:] will work fine when n is greater 84 | # than one (e.g. somelist[-3:]). However, when n is zero, the expression 85 | # somelist[-0:] will result in a copy of the original list. 86 | 87 | 88 | # The result of slicing a list is a whole new list. References to the objects 89 | # from the original list are maintained. Modifying the result of slicing won't 90 | # affect the original list. 91 | 92 | 93 | b = a[4:] 94 | print('Before: ', b) 95 | b[1] = 99 96 | print('After: ', b) 97 | print('No change: ', a) 98 | # Before: ['e', 'f', 'g', 'h'] 99 | # After: ['e', 99, 'g', 'h'] 100 | # No change: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] 101 | 102 | 103 | # When used in assignments, slices will replace the specified range in the 104 | # original list. Unlike tuple assignments (like a, b = c[:2), the length of 105 | # slice assignments don't need to be the same. The values before and after 106 | # the assigned slice will be preserved. The list will grow or shrink to 107 | # accommodate the new values. 108 | 109 | 110 | print('Before: ', a) 111 | a[2:7] = [99, 22, 14] 112 | print('After: ', a) 113 | # Before: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] 114 | # After: ['a', 'b', 99, 22, 14, 'h'] 115 | 116 | 117 | # If you leave out both the start and the end indexes when slicing, you'll end 118 | # up with a copy of the original list. 119 | 120 | 121 | b = a[:] 122 | assert b == a and b is not a 123 | 124 | 125 | # if you assign a slice with no start or end indexes, you'll replace its 126 | # entire contents with a copy of what's referenced (instead of allocating a 127 | # new list). 128 | 129 | 130 | b = a 131 | print('Before: ', a) 132 | a[:] = [101, 102, 103] 133 | assert a is b 134 | print('After: ', a) 135 | # Before: ['a', 'b', 99, 22, 14, 'h'] 136 | # After: [101, 102, 103] 137 | 138 | 139 | # Things to remember 140 | 141 | # 1. Avoid being verbose: Don't supply 0 for the start index or the length of 142 | # the sequence for the end index. 143 | # 2. Slicing is forgiving of start or end indexes that are out of bounds, 144 | # making it easy to express slices on the front or back boundaries of a 145 | # sequence (like a[:20] or a[-20:]). 146 | # 3. Assigning to a list slice will replace that range in the original 147 | # sequence with what's referenced even if their lengths are different. 148 | -------------------------------------------------------------------------------- /item_06_avoid_using.py: -------------------------------------------------------------------------------- 1 | # Item 6: Avoid using start, end and stride in a single slice 2 | 3 | 4 | # In addition to basic slicing (see Item 5: Knowing how to slice sequences), 5 | # Python has special syntax for the stride of a slice in the form 6 | # somelist[start:end:stride]. This lets you take every n-th item when slicing 7 | # a sequence. For example, the stride makes it easy to group by even and odd 8 | # indexes in a list. 9 | 10 | 11 | a = ['red', 'orange', 'yellow', 'green', 'blue', 'purple'] 12 | odds = a[::2] 13 | evens = a[1::2] 14 | print(odds) 15 | print(evens) 16 | # ['red', 'yellow', 'blue'] 17 | # ['orange', 'green', 'purple'] 18 | 19 | 20 | # The problem is that the stride syntax ofter cause unexpected behavior that 21 | # can introduce bugs. For example, a common Python trick for reversing a byte 22 | # string is to slice the string with a stride of -1. 23 | 24 | 25 | x = b'mongoose' 26 | y = x[::-1] 27 | print(y) 28 | # b'esoognom' 29 | 30 | 31 | # That works well for byte strings and ASCII characters, but it will break for 32 | # Unicode characters encoded as UTF-8 byte strings. 33 | 34 | 35 | w = '谢谢谢谢' 36 | # x = w.enocde('utf-8') 37 | # y = x[::-1] 38 | # z = y.decode('utf-8') 39 | # print(y) 40 | # print(z) 41 | # AttributeError: 'str' object has no attribute 'enocde' 42 | 43 | 44 | # Are negative strides besides -1 useful? Consider the following examples. 45 | 46 | 47 | a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] 48 | print(a[::2]) 49 | print(a[::-2]) 50 | # ['a', 'c', 'e', 'g'] 51 | # ['h', 'f', 'd', 'b'] 52 | 53 | 54 | # Here, ::2 means select every second item starting at the beginning. 55 | # Trickier, ::-2 means select every second item starting at the end and moving 56 | # backwards. 57 | 58 | 59 | # What do you think 2::2 means? What about -2::-2 vs. -2:2:-2 vs. 2:2:-2? 60 | print(a[2::2]) 61 | print(a[-2::-2]) 62 | print(a[-2:2:-2]) 63 | print(a[2:2:-2]) 64 | # ['c', 'e', 'g'] 65 | # ['g', 'e', 'c', 'a'] 66 | # ['g', 'e'] 67 | # [] 68 | 69 | 70 | # The point is that the stride part of the slicing syntax can be extremely 71 | # confusing. Having three numbers within the brackets is hard enough to read 72 | # because of its density. Then it's not obvious when the start and end indexes 73 | # come into effect relative to the stride value, especially when stride is 74 | # negative. 75 | 76 | 77 | # To prevent problems, avoid using stride along with start and end indexes. If 78 | # you must use a stride, prefer making it a positive value and omit start and 79 | # end indexes. If you must use stride with start and end indexes, consider 80 | # using one assignment to stride and another to slice. 81 | 82 | 83 | b = a[::2] 84 | c = b[1:-1] 85 | print(b) 86 | print(c) 87 | # ['a', 'c', 'e', 'g'] 88 | # ['c', 'e'] 89 | 90 | 91 | # Slicing and then striding will create an extra shallow copy of the data. 92 | # The first operation should try to reduce the size of the resulting slice by 93 | # as much as possible. If your program can't afford the time or memory 94 | # required for two steps, consider using the itertools built-in module's 95 | # islice method (see Item 46: Use built-in algorithms and data structures), 96 | # which doesn't permit negative values for start, end or stride. 97 | 98 | 99 | # Things to remember 100 | 101 | # 1. Specifying start, end, and stride in a slice can be extremely confusing. 102 | # 2. Prefer using positive stride values in slices without start or end 103 | # indexes. Avoid negative stride values if possible. 104 | # 3. Avoid using start, end and stride together in a single slice. If you need 105 | # all three parameters, consider doing two assignments (one to slice, 106 | # another to stride) or using islice form itertools built-in module. 107 | -------------------------------------------------------------------------------- /item_07_list_not_map_filter.py: -------------------------------------------------------------------------------- 1 | # Item 7: Use list comprehensions instead of map and filter 2 | 3 | 4 | # Python provides compact syntax for deriving one list from another. These 5 | # expressions are called list comprehensions. For example, say you want to 6 | # compute the square of each number in a list. You can do this by providing 7 | # the expression for your computation and the input sequence to loop over. 8 | 9 | 10 | a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 11 | squares = [x**2 for x in a] 12 | print(squares) 13 | # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100] 14 | 15 | 16 | # Unless you're applying a single-argument functions, list comprehensions are 17 | # clearer than map built-in function cases, map requires creating a lambda 18 | # function for the computation, which is visually noisy. 19 | 20 | 21 | squares = map(lambda x: x**2, a) 22 | print(squares) 23 | # Python 2 24 | # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100] 25 | # Python 3 26 | print(list(squares)) 27 | # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100] 28 | 29 | 30 | # Unlike may, list comprehensions let you easily filter items from the input 31 | # list, removing corresponding outputs from the result. For example, say you 32 | # only want to compute the squares of the numbers that are divisible by 2. 33 | # Here, I do this by adding a conditional expression to the list 34 | # comprehension after the loop: 35 | 36 | 37 | even_squares = [x**2 for x in a if x % 2 == 0] 38 | print(even_squares) 39 | # [4, 16, 36, 64, 100] 40 | 41 | 42 | # The filter built-in function can be used along with map to achieve the same 43 | # outcome, but it is much harder to read. 44 | 45 | 46 | alt = map(lambda x: x**2, filter(lambda x: x % 2 == 0, a)) 47 | assert even_squares == list(alt) 48 | 49 | 50 | # Dictionaries and sets have their own equivalents of list comprehensions. 51 | # These make it easy to create derivative data structures when writing 52 | # algorithms. 53 | 54 | 55 | chile_ranks = {'ghost': 1, 'habanero': 2, 'cayenne': 3} 56 | rank_dict = {rank: name for name, rank in chile_ranks.items()} 57 | chile_len_set = {len(name) for name in rank_dict.values()} 58 | print(rank_dict) 59 | print(chile_len_set) 60 | # {1: 'ghost', 2: 'habanero', 3: 'cayenne'} 61 | # {8, 5, 7} 62 | 63 | 64 | # Things to remember 65 | 66 | # 1. List comprehensions are clearer than the map and filter built-in 67 | # functions because they don't require extra lambda expressions. 68 | # 2. List comprehensions allow you easily skip items from the input list, a 69 | # behavior map doesn't support without help from filter. 70 | # 3. Dictionaries and sets also support comprehension expressions. 71 | -------------------------------------------------------------------------------- /item_08_no_more_than_2_expressions.py: -------------------------------------------------------------------------------- 1 | # Item 8: Avoid more than two expressions in list comprehensions 2 | 3 | 4 | # Beyond basic usage (see Item 7: Use list comprehensions instead of map and 5 | # filter), list comprehensions also support multiple levels of looping. For 6 | # example, say you want to simplify a matrix (a list containing other lists) 7 | # into one flat list of all cells. Here, I do this with a list comprehension 8 | # by including two for expressions. These expressions run in the order 9 | # provided from left to right. 10 | 11 | 12 | matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] 13 | flat = [x for row in matrix for x in row] 14 | print(flat) 15 | # [1, 2, 3, 4, 5, 6, 7, 8, 9] 16 | 17 | 18 | # The example above is simple, readable, and a reasonable usage of multiple 19 | # loops. Another reasonable usage of multiple loops is replicating the 20 | # two-level deep layout of the input list. For example, say you want to square 21 | # the value in each cell of a two-dimensional matrix. This expression is 22 | # noisier because of the extra [] characters, but it's still easy to read. 23 | 24 | 25 | squared = [[x**2 for x in row] for row in matrix] 26 | print(squared) 27 | # [[1, 4, 9], [16, 25, 36], [49, 64, 81]] 28 | 29 | 30 | # If this expression included another loop, the list comprehension would get 31 | # so long that you'd have to split it over multiple lines. 32 | 33 | my_lists = [ 34 | [[1, 2, 3], [4, 5, 6]], 35 | # ... 36 | [[11, 22, 33], [44, 55, 66]] 37 | ] 38 | flat = [x for sublist1 in my_lists 39 | for sublist2 in sublist1 40 | for x in sublist2] 41 | print(flat) 42 | # [1, 2, 3, 4, 5, 6, 11, 22, 33, 44, 55, 66] 43 | 44 | 45 | # At this point, the multiline comprehension isn't much shorter thant the 46 | # alternative. Here, I produce the same using normal loop statements. The 47 | # indentation of this version makes the looping clearer than the list 48 | # comprehension. 49 | 50 | 51 | flat = [] 52 | for sublist1 in my_lists: 53 | for sublist2 in sublist1: 54 | flat.extend(sublist2) 55 | print(flat) 56 | # [1, 2, 3, 4, 5, 6, 11, 22, 33, 44, 55, 66] 57 | 58 | 59 | # List comprehensions also support multiple if conditions. Multiple 60 | # conditions at the same loop level are an implicit and expression. For 61 | # example, say you want to filter a list of numbers to only even values 62 | # greater than four. These only list comprehensions are equivalent. 63 | 64 | 65 | a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 66 | b = [x for x in a if x > 4 if x % 2 == 0] 67 | c = [x for x in a if x > 4 and x % 2 == 0] 68 | print(b) 69 | print(c) 70 | # [6, 8, 10] 71 | # [6, 8, 10] 72 | 73 | 74 | # Conditions can be specified at each level of looping after the for 75 | # expression. For example, say you want to filter a matrix so the only cells 76 | # remaining are those divisible by 3 in rows that sum to 10 or higher. 77 | # Expressing this with list comprehensions is short, but extremely difficult 78 | # to read. 79 | 80 | 81 | matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] 82 | filtered = [[x for x in row if x % 3 == 0] 83 | for row in matrix if sum(row) >= 10] 84 | print(filtered) 85 | # [[6], [9]] 86 | 87 | 88 | # Though this example is a bit convoluted, in practice you'll see situations 89 | # arise where such expressions seem like a good fit. I strongly encourage you 90 | # to avoid using list comprehensions that look like this. The resulting code 91 | # is very difficult for others to comprehend. What you save in the number of 92 | # lines doesn't outweigh the difficulties it could cause later. 93 | 94 | # The rule of thumb is to avoid using more than two expressions in a list 95 | # comprehension. This could be two conditions, two loops, or one condition 96 | # and one loop. As soon as it gets more complicated than that, you should 97 | # use normal if and for statements and write a helper function (see Item 16: 98 | # Consider generators instead of returning lists). 99 | 100 | 101 | # Things to remember 102 | 103 | # 1. List comprehensions support multiple levels of loops and multiple 104 | # conditions per loop level. 105 | # 2. List comprehensions with more than two expressions are very difficult to 106 | # read and should be avoided. 107 | -------------------------------------------------------------------------------- /item_09_generator_expressions.py: -------------------------------------------------------------------------------- 1 | # Item 9: Consider generator expressions for large comprehensions 2 | 3 | 4 | # The problem with list comprehensions (see Item 7: Use list comprehensions 5 | # instead of map and filter) is that they may create a whole new list 6 | # containing one item for each value in the input sequence. This is fine for 7 | # small inputs, but for large inputs this could consume significant amounts of 8 | # memory and cause your program to crash. 9 | 10 | # For example, say you want to read a file and return the number of 11 | # characters on each line. Doing this with a list comprehension would require 12 | # holding the length of every line of the file in memory. If the file is 13 | # absolutely enormous or perhaps a never-ending network socket, list 14 | # comprehensions are problematic. Here, I use a list comprehension in a way 15 | # that can only handle small input values. 16 | 17 | 18 | value = [len(x) for x in open('item_09_generator_expressions.py')] 19 | print(value) 20 | # [66, 1, 1, 76, 70, 77, 79, 42, 1, 68, 78, 73, 69, 76, 43, 1, 1, 46, 12] 21 | print("line: %d, max length: %d\n" % (len(value), max(value))) 22 | # line: 39, max length: 79 23 | 24 | 25 | # To solve this, Python provides generator expressions, a generalization of 26 | # list comprehensions and generators. Generator expressions don't materialize 27 | # the whole output sequence when they're run. Instead, generator expressions 28 | # evaluate to an iterator that yields one item at a time form the expression. 29 | 30 | # A generator expression is created by putting list-comprehension-like syntax 31 | # between () characters. Here, I use a generator expression that is equivalent 32 | # to the code above. However, the generator expression immediately evaluates 33 | # to an iterator and doesn't make any forward progress. 34 | 35 | 36 | it = (len(x) for x in open('item_09_generator_expressions.py')) 37 | print(it) 38 | # at 0x7f5f396eaa40> 39 | 40 | 41 | # The returned iterator can be advanced one step at a time to produce the next 42 | # output from the generator expression as needed (using the next built-in 43 | # function). Your code can consume as much of the generator expression as you 44 | # want without risking a blowup in memory usage. 45 | 46 | 47 | print(next(it)) 48 | print(next(it)) 49 | # 66 50 | # 1 51 | 52 | 53 | # Another powerful outcome of generator expressions is that they can be 54 | # composed together. Here, I take the iterator returned by the generator 55 | # expression above and use it as the input for another generator expression. 56 | 57 | 58 | roots = ((x, x**0.5) for x in it) 59 | print(next(roots)) 60 | print(next(roots)) 61 | # (1, 1.0) 62 | # (76, 8.717797887081348) 63 | 64 | 65 | # Each time I advance this iterator, it will also advance the interior 66 | # iterator, creating a domino effect of looping, evaluating 67 | # conditional expressions, and passing around inputs and outputs. 68 | 69 | 70 | print(next(roots)) 71 | # (70, 8.366600265340756) 72 | 73 | 74 | # Chaining generators like this executes very quickly in Python. When you're 75 | # looking for a way to compose functionality that's operating on a large 76 | # stream of input, generator expressions are the best tool for the job. 77 | # The only gotcha is that the iterators returned by generator expressions are 78 | # stateful, so you must be careful not to use them more than once (see Item 79 | # 17: Be defensive when iterating over arguments). 80 | 81 | 82 | # Things to remember 83 | 84 | # 1. List comprehensions can cause problems for large inputs by using too much 85 | # memory. 86 | # 2. Generator expressions avoid memory issues by producing outputs one at a 87 | # time as an iterator. 88 | # 3. Generator expressions can be composed by passing the iterator from one 89 | # generator expression into the for subexpression of another. 90 | # 4. Generator expressions execute very quickly when chained together. 91 | -------------------------------------------------------------------------------- /item_10_prefer_enumerate.py: -------------------------------------------------------------------------------- 1 | # Item 10: Prefer enumerate over range 2 | import random 3 | 4 | 5 | # The range built-in function is useful for loops that iterate over a set of 6 | # integers. 7 | 8 | random_bits = 0 9 | for i in range(64): 10 | if random.randint(0, 1): 11 | random_bits |= 1 << i 12 | 13 | 14 | # When you have a data structure to iterate over, like a list of strings, you 15 | # can loop directly over the sequence. 16 | 17 | 18 | flavor_list = ['vanilla', 'chocolate', 'pecan', 'strawberry'] 19 | for flavor in flavor_list: 20 | print('%s is delicious' % flavor) 21 | # vanilla is delicious 22 | # chocolate is delicious 23 | # pecan is delicious 24 | # strawberry is delicious 25 | 26 | 27 | # Often, you'll want to iterate over a list and also know the index of the 28 | # current item in the list. For example, say you want to print the ranking of 29 | # your favorite ice cream flavors. One way to do it is using range. 30 | 31 | 32 | for i in range(len(flavor_list)): 33 | flavor = flavor_list[i] 34 | print('%d: %s' % (i+1, flavor)) 35 | # 1: vanilla 36 | # 2: chocolate 37 | # 3: pecan 38 | # 4: strawberry 39 | 40 | 41 | # This looks clumsy compared with the other examples of iterating over 42 | # flavor_list or range. You have to get the length of the list. You have to 43 | # index into the array. It's harder to read. 44 | 45 | # Python provides the enumerate built-in function for addressing this 46 | # situation. enumerate wraps any iterator with a lazy generator. This 47 | # generator yields pairs of the loop index and the next value from the 48 | # iterator. The resulting code is much clearer. 49 | 50 | 51 | for i, flavor in enumerate(flavor_list): 52 | print('%d: %s' % (i + 1, flavor)) 53 | # 1: vanilla 54 | # 2: chocolate 55 | # 3: pecan 56 | # 4: strawberry 57 | 58 | 59 | # You can make this even shorter by specifying the number from which enumerate 60 | # should begin counting (1 in this case). 61 | 62 | 63 | for i, flavor in enumerate(flavor_list, 1): 64 | print('%d: %s' % (i, flavor)) 65 | # 1: vanilla 66 | # 2: chocolate 67 | # 3: pecan 68 | # 4: strawberry 69 | 70 | 71 | # Things to remember 72 | 73 | # 1. enumerate provides concise syntax for looping over an iterator and 74 | # getting the index of each item from the iterator as you go. 75 | # 2. Prefer enumerate instead of looping over a range and indexing into a 76 | # sequence. 77 | # 3. You can supply a second parameter to enumerate to specify the number from 78 | # which to begin counting (zero is default). 79 | -------------------------------------------------------------------------------- /item_11_use_zip.py: -------------------------------------------------------------------------------- 1 | # Item 11: Use zip to process iterators in parallel 2 | 3 | 4 | # Often in Python you find yourself with many lists of related objects. List 5 | # comprehensions make it easy to take a source list and get a derived list by 6 | # applying an expression (see Item 7: Use list comprehensions instead of map 7 | # and filter). 8 | 9 | 10 | names = ['Cecilia', 'Lise', 'Marie'] 11 | letters = [len(n) for n in names] 12 | 13 | 14 | # The items in the derived list are related to the items in the source list by 15 | # their indexes. To iterate over both lists in parallel, you can iterate over 16 | # the length of the names source list. 17 | 18 | 19 | longest_name = None 20 | max_letters = 0 21 | 22 | for i in range(len(names)): 23 | count = letters[i] 24 | if count > max_letters: 25 | longest_name = names[i] 26 | max_letters = count 27 | 28 | print(longest_name) 29 | # Cecilia 30 | 31 | 32 | # The problem is that this whole loop statement is visually noisy. The indexes 33 | # into names and letters make the code hard to read. Indexing into the arrays 34 | # by the loop index i happens twice. Using enumerate (see Item 10: Prefer 35 | # enumerate over range) improves this slightly, but it's still not ideal. 36 | 37 | 38 | for i, name in enumerate(names): 39 | count = letters[i] 40 | if count > max_letters: 41 | longest_name = name 42 | max_letters = count 43 | 44 | 45 | # To make this code clearer, Python provides the zip built-in function. In 46 | # Python 3, zip wraps two or more iterators with a lazy generator. The zip 47 | # generator yields tuples containing the next value from each iterator. The 48 | # resulting code is much cleaner that indexing into multiple lists. 49 | 50 | 51 | for name, count in zip(names, letters): 52 | if count > max_letters: 53 | longest_name = name 54 | max_letters = count 55 | 56 | 57 | # There are two problems with the zip built-in. 58 | 59 | # The first issue is that in Python 2 zip is not a generator; it will fully 60 | # exhaust the supplied iterators and return a list of all the tuples it 61 | # creates. This could potentially use a lot of memory and cause your program 62 | # to crash. If you want to zip very large iterators in Python 2, you should 63 | # use izip from the itertools built-in module (see Item 46: Use built-in 64 | # algorithms and data structures). 65 | 66 | # The second issue is that zip behaves strangely if the input iterators are of 67 | # different lengths. For example, say you add other name to the list above but 68 | # forget to update the letter counts. Running zip on the two input lists will 69 | # have an unexpected result. 70 | 71 | 72 | names.append('Rosalind') 73 | for name, count in zip(names, letters): 74 | print(name) 75 | # Cecilia 76 | # Lise 77 | # Marie 78 | 79 | 80 | # The new item for 'Rosalind' isn't there. This is just now zip works. It 81 | # keeps yielding tuples until a wrapped iterator is exhausted. This approach 82 | # works fine when you know that the iterators are of the same length, which is 83 | # often the case for derived lists created by list comprehensions. In many 84 | # other cases, the truncating behavior of zip is surprising and bad. If you 85 | # aren't confident that the lengths of the list you want to zip are equal, 86 | # consider using the zip_longest function from itertools built-in module 87 | # instead (also called izip_longest in Python 2). 88 | 89 | 90 | # Things to remember 91 | 92 | # 1. The zip built-in function can be used to iterate over multiple iterators 93 | # in parallel. 94 | # 2. In Python 3, zip is a lazy generator that produces tuples. In Python 2, 95 | # zip returns the full result as a list of tuples. 96 | # 3. zip truncates its outputs silently if you supply it with iterators of 97 | # different lengths. 98 | # 4. The zip_longest function from the itertools built-in module lets you 99 | # iterate over multiple iterators in parallel regardless of their 100 | # lengths (see Item 46: Use built-in algorithms and data structures). 101 | -------------------------------------------------------------------------------- /item_12_avoid_else.py: -------------------------------------------------------------------------------- 1 | # Item 12: Avoid else blocks after for and while loops 2 | 3 | 4 | # Python loops have an extra feature that is not available in most other 5 | # programming language: you can put an else block immediately after a loop's 6 | # repeated interior block. 7 | 8 | 9 | for i in range(3): 10 | print('Loop %d' % i) 11 | else: 12 | print('Else block!') 13 | # Loop 0 14 | # Loop 1 15 | # Loop 2 16 | # Else block! 17 | 18 | 19 | # Surprisingly, the else block runs immediately after the loop finishes. Why 20 | # is the clause called "else"? Why not "and"? In an if/else statement, else 21 | # means, "Do this if the block before this doesn't happen." In a try/except 22 | # statement, except has the definition: "Do this if trying the block before 23 | # this failed." 24 | 25 | 26 | # Similarly, else from try/except/else follows this pattern (see item 13: Take 27 | # advantage of each block in try/except/else/finally) because it means, "Do 28 | # this if the block before did not fail". try/finally is also intuitive 29 | # because it means, "Always do what is final after trying the block before. 30 | 31 | # Given all of the uses of else, except, and finally in Python, a new 32 | # programmer might assume that the else part of for/else means, "Do this if 33 | # the loop wan't completed". In reality, it does exactly the opposite. Using 34 | # a break statement in a loop will actually skip the else block. 35 | 36 | 37 | for i in range(3): 38 | print('Loop %d' % i) 39 | if i == 1: 40 | break 41 | else: 42 | print('Else block!') 43 | # Loop 0 44 | # Loop 1 45 | 46 | 47 | # Another surprise is that the else block will run immediately if you loop 48 | # over an empty sequence. 49 | 50 | 51 | for x in []: 52 | print('Never runs') 53 | else: 54 | print('For Else block!') 55 | # For Else block! 56 | 57 | 58 | # The else block also runs when while loops are initially false. 59 | 60 | 61 | while False: 62 | print('Never runs!') 63 | else: 64 | print('While Else block!') 65 | # While Else block! 66 | 67 | 68 | # The rationale for these behaviors is that else blocks after loops are useful 69 | # when you're using loops to search for something. For example, say you want 70 | # to determine whether two numbers are coprime (their only common divisor is 71 | # 1). Here, I iterate through every possible common divisor and test the 72 | # numbers. After every option has been tried, the loop ends. The else block 73 | # runs when the numbers are coprime because the loop doesn't encounter a 74 | # break. 75 | 76 | 77 | a = 4 78 | b = 9 79 | for i in range(2, min(a, b) + 1): 80 | print('Testing', i) 81 | if a % i == 0 and b % i == 0: 82 | print('Not coprime') 83 | break 84 | else: 85 | print('Coprime') 86 | # Testing 2 87 | # Testing 3 88 | # Testing 4 89 | # Coprime 90 | 91 | 92 | # In practice, you wouldn't write the code this way. Instead, you'd write a 93 | # helper function to do the calculation. Such a helper function is writen in 94 | # two common styles. 95 | 96 | # The first approach is to return early when you find the condition you're 97 | # looking for. You return the default outcome if you fall through the loop. 98 | 99 | 100 | def coprime(a, b): 101 | for i in range(2, min(a, b) + 1): 102 | if a % i == 0 and b % i == 0: 103 | return False 104 | return True 105 | 106 | 107 | # The second way is to have a result variable that indicates whether you've 108 | # found what you're looking for in the loop. You break out of the loop as soon 109 | # as you find something. 110 | 111 | 112 | def coprime2(a, b): 113 | is_coprime = True 114 | for i in range(2, min(a, b) + 1): 115 | if a % i == 0 and b % i == 0: 116 | is_coprime = False 117 | break 118 | return is_coprime 119 | 120 | 121 | # Both of these approaches are so much clearer to readers of unfamiliar code. 122 | # The expressively you gain from the else block doesn't outweigh the burden 123 | # you put on people (including yourself) who want to understand your code in 124 | # the future. Simple constructs like loops should be self-evident in Python. 125 | # You should avoid using else blocks after loops entirely. 126 | 127 | 128 | # Things to remember 129 | 130 | # 1. Python has special syntax that allows else blocks to immediately follow 131 | # for and while loop interior blocks. 132 | # 2. The else block after a loop only runs if the loop body did not encounter 133 | # a break statement. 134 | # 3. Avoid using else blocks after loops because their behavior isn't 135 | # intuitive and can be confusing. 136 | -------------------------------------------------------------------------------- /item_13_try_except_else_finally.py: -------------------------------------------------------------------------------- 1 | # Item 13: Take advantage of each block in try/except/else/finally 2 | import json 3 | 4 | 5 | # There are four distinct times that you may want to take action during 6 | # exception handling in Python. These are captured in the functionality of 7 | # try, except, else, and finally blocks. Each block serves a unique purpose in 8 | # the compound statement, and their various combinations are useful (see Item 9 | # 51). 10 | 11 | 12 | # Finally Blocks 13 | 14 | # Use try/finally when you want exceptions to propagate up, but you also want 15 | # to run cleanup code when exceptions occur. One common usage to try/finally 16 | # is reliably closing file handles (see Item 43: "Consider contextlib and with 17 | # statements for reusable try/finally behavior" for another approach). 18 | 19 | 20 | handle = open('item_13_try_except_else_finally.py') # May raise IOError 21 | # handle = open('item_13_try_except_else_finally_.py') # May raise IOError 22 | # FileNotFoundError: [Errno 2] No such file or directory: 23 | # 'item_13_try_except_else_finally_.py' 24 | try: 25 | data = handle.read() # May raise UnicodeDecodeError 26 | finally: 27 | handle.close() # Always runs after try: 28 | 29 | 30 | # Any exception raised by the read method will always propagate up to the 31 | # calling code, yet the close method of handle is also guaranteed to run in 32 | # the finally block. 33 | 34 | 35 | # Else Blocks 36 | 37 | # Use try/except/else to make it clear with exceptions will be handled try 38 | # your code and which exceptions will propagate up. When the try block doesn't 39 | # raise an exception, the else block will run. The else block helps you 40 | # minimize the amount of code in the try block and improves readability. For 41 | # example, say you want to load JSON dictionary data from a string and return 42 | # the value of a key it contains. 43 | 44 | 45 | def load_json_key(data, key): 46 | try: 47 | result_dict = json.loads(data) # May raise ValueError 48 | except ValueError as e: 49 | raise KeyError from e 50 | else: 51 | return result_dict[key] # May raise KeyError 52 | 53 | 54 | # If the data isn't valid JSON, then decoding with json.load will raise a 55 | # ValueError. The exception is caught by the except block and handled. If 56 | # decoding is successful, then the key lookup will occur in the else block. if 57 | # the key lookup raises any exceptions, they will propagate up to the caller 58 | # because they are outside the try block. The else clause ensures that what 59 | # follows the try/except is visually distinguished from the except block. This 60 | # makes the exception propagation behavior clear. 61 | 62 | 63 | # Everything together 64 | 65 | # Use try/except/else/finally when you want to do it all in one compound 66 | # statement. For example, say you want to read a description of work to do 67 | # from a file, process it, and then update the file in place. Here, the try 68 | # block is used to read the file and process it. The except block is used to 69 | # handle exceptions from the try block that are expected. The else block is 70 | # used to update the file in place and to allow realted exceptions to 71 | # propagated up. The finally block cleans up the file handle. 72 | 73 | 74 | UNDEFINED = object() 75 | 76 | 77 | def divide_json(path): 78 | handle = open(path, 'r+') # May raise IOError 79 | try: 80 | data = handle.read() # May raise UnicodeDecodeError 81 | op = json.loads(data) # May raise ValueError 82 | value = ( # May raise ZeroDivisionError 83 | op['numerator']/op['denominator']) 84 | except ZeroDivisionError as e: 85 | return UNDEFINED 86 | else: 87 | op['result'] = value 88 | result = json.dumps(op) 89 | handle.seek(0) 90 | handle.write(result) # May raise IOError 91 | return value 92 | finally: 93 | handle.close() # Always runs 94 | 95 | 96 | # This layout is especially useful because all of the blocks work together in 97 | # intuitive ways. For example, if an exception gets raised in the else block 98 | # while rewriting the result data, the finally block will still run and close 99 | # the file handle. 100 | 101 | 102 | # Things to remember 103 | 104 | # 1. The try/finally compound statement lets you run cleanup code regardless 105 | # of whether exceptions were raised in the try block. 106 | # 2. The else block helps you minimize the amount of code in try blocks and 107 | # visually distinguish the success case from the try/except blocks. 108 | # 3. An else block can be used to perform additional actions after a 109 | # successful try block but before common cleanup in a finally block. 110 | -------------------------------------------------------------------------------- /item_14_prefer_exceptions.py: -------------------------------------------------------------------------------- 1 | # Chapter 2: Functions 2 | 3 | 4 | # The first organizational tool programmers use in Python is the function. As 5 | # in other programming language, functions enable you to break large programs 6 | # into smaller pieces. They improve read-ability and make code more 7 | # approachable. They allow for reuse and refactoring. 8 | 9 | # Functions in Python have a variety of extra features that make the 10 | # programmer's life easier. Some are similar to capabilities in other 11 | # programming languages, but many are unique to Python. These extras can 12 | # eliminate noise and clarify the intention of callers. They can significantly 13 | # reduce subtle bugs that are difficult to find. 14 | 15 | 16 | # Item 14: Prefer exceptions to returning None 17 | 18 | 19 | # When writing utility functions, there's a draw for Python programmers to 20 | # give special meaning to the return value of None. It seems to makes sense 21 | # in some cases. For example, say you want a helper function that divides one 22 | # number by another. In the case of dividing by zero, returning None seems 23 | # natural because the results is undefined. 24 | 25 | 26 | def divide(a, b): 27 | try: 28 | return a / b 29 | except ZeroDivisionError: 30 | return None 31 | 32 | 33 | # Code using this function can interpret the return value accordingly. 34 | 35 | 36 | x, y = 1, 2 37 | # Result is not None 38 | # x, y = 1, 0 39 | # Invalid inputs 40 | result = divide(x, y) 41 | if result is None: 42 | print('Invalid inputs') 43 | else: 44 | print('Result is not None') 45 | 46 | 47 | # What happens when the numerator is zero? That will cause the return value 48 | # to be zero (if the denominator is non-zero). This can cause problems when 49 | # you evaluate the result in a condition like an if statement. You may 50 | # accidentally look for any False equivalent value to indicate errors instead 51 | # of only looking for None (see Item 4: "What helper functions instead of 52 | # complex expressions" for a similar situation). 53 | 54 | 55 | x, y = 0, 5 56 | result = divide(x, y) 57 | if not result: 58 | print('Invalid inputs') # This is wrong! 59 | # Invalid inputs 60 | 61 | 62 | # This is a common mistake in Python code when None has special meaning. This 63 | # is why returning None from a function is error prone. There are two ways to 64 | # reduce the chance of such error. 65 | 66 | # The first way is to split the return value into a two-tuple. The first part 67 | # of the tuple indicates that the operation was a success or failure. The 68 | # second part is the actual result that was computed. 69 | 70 | 71 | def divide(a, b): 72 | try: 73 | return True, a / b 74 | except ZeroDivisionError: 75 | return False, None 76 | 77 | 78 | # Caller of this function have to unpack the tuple. That forces them to 79 | # consider the status part of the tuple instead of just looking at the 80 | # result of division. 81 | 82 | 83 | success, result = divide(x, y) 84 | if not success: 85 | print('Invalid inputs') 86 | else: 87 | print('Success') 88 | # Success 89 | 90 | 91 | # The problem is that callers can easily ignore the first part of the tuple 92 | # (using the underscore variable name, a Python convention for unused 93 | # variables). The resulting code doesn't look wrong at first glance. This 94 | # is as bad as just returning None. 95 | 96 | 97 | _, result = divide(x, y) 98 | if not result: 99 | print('Invalid inputs') 100 | else: 101 | print('Get result') 102 | # Invalid inputs 103 | 104 | 105 | # The second better way to reduce these errors is to never return None at all. 106 | # Instead, raise an exception up to the caller and make them deal with it. 107 | # Here, I turn a ZeroDivisionError into a ValueError to indicate to the caller 108 | # the input values are bad: 109 | 110 | 111 | def divide(a, b): 112 | try: 113 | return a / b 114 | except ZeroDivisionError as e: 115 | raise ValueError('Invalid inputs') from e 116 | 117 | 118 | # Now the caller should handle the exception for the invalid input case (this 119 | # behavior should be documented; see Item 49: "Write docstrings for every 120 | # function, class and module"). The caller no longer requires a condition on 121 | # the return value of the function. If the function didn't raise an exception, 122 | # then the return value must be good. The outcome of exception handing is 123 | # clear. 124 | 125 | 126 | x, y = 5, 2 127 | try: 128 | result = divide(x, y) 129 | except ValueError: 130 | print('Invalid inputs') 131 | else: 132 | print('Result is %.2f' % result) 133 | # Result is 2.50 134 | 135 | 136 | # Things to remember 137 | 138 | # 1. Functions that return None to indicate special meaning are error prone 139 | # because None and other values (e.g., zero, the empty string) all 140 | # evaluate to False in conditional expressions. 141 | # 2. Raise exceptions to indicate special situations instead of returning 142 | # None. Expect the calling code to handle exceptions properly when they 143 | # are documented. 144 | -------------------------------------------------------------------------------- /item_16_address.txt: -------------------------------------------------------------------------------- 1 | Four score and seven years ago... -------------------------------------------------------------------------------- /item_16_generators_instead_of_lists.py: -------------------------------------------------------------------------------- 1 | # Item 16: Consider generators instead of returning lists 2 | import itertools 3 | 4 | 5 | # The simple choice for functions that produce a sequence of results is to 6 | # return a list of items For example, say you want to find the index of every 7 | # word in string. Here, I accumulate results in a list using the append method 8 | # and return it at the end of the function: 9 | 10 | 11 | def index_words(text): 12 | result = [] 13 | if next: 14 | result.append(0) 15 | for index, letter in enumerate(text): 16 | if letter == ' ': 17 | result.append(index + 1) 18 | return result 19 | 20 | 21 | # This words as expected for some sample input. 22 | 23 | 24 | address = 'Four score and seven years ago...' 25 | result = index_words(address) 26 | print(result[:3]) 27 | # [0, 5, 11] 28 | 29 | 30 | # There are two problems with index_words function. 31 | 32 | # The first problem is that the code is a bit dense and noisy. Each time a 33 | # new result is found, I call the append method. The method call's bulk ( 34 | # result.append) deemphasizes the value being added to the list (index + 1). 35 | # There is one line for creating the result list and another for returning it. 36 | # While the function body contains ~130 characters (without whitespace), only 37 | # ~75 characters are important. 38 | 39 | # A better way to write this function is using a generator. Generators are 40 | # functions that use yield expressions. When called, generator functions do 41 | # not actually run but instead immediately return an iterator. With each call 42 | # to the next built-in function, the iterator will advance the generator to 43 | # its next yield expression. Each value passed to yield by the generator will 44 | # be returned by the iterator to the caller. 45 | 46 | # Here, I define a generator function that produces the same results as 47 | # before: 48 | 49 | 50 | def index_words_iter(text): 51 | if text: 52 | yield 0 53 | for index, letter in enumerate(text): 54 | if letter == ' ': 55 | yield index + 1 56 | 57 | 58 | # It's significantly easier to read because all interactions with the result 59 | # list have been eliminated. Results are passed to yield expressions instead. 60 | # The iterator returned by the generator call can easily be converted to a 61 | # list by passing it to the list built-in function (see Item 9: "Consider 62 | # generator expressions for large comprehensions" for how this works). 63 | 64 | 65 | result = list(index_words(address)) 66 | print(result) 67 | # [0, 5, 11, 15, 21, 27] 68 | 69 | 70 | # The second problem with index_words is that it requires all results to be 71 | # stored in the list before being returned. For huge inputs, this can cause 72 | # your program to return out of memory and crash. In contrast, a generator 73 | # version of this function can easily be adapted to take inputs of arbitrary 74 | # length. 75 | 76 | # Here, I define a generator that streams input from a file one line at a time 77 | # and yields outputs one word at a time. The working memory for this function 78 | # is bounded to the maximum length of one line of input. 79 | 80 | 81 | def index_file(handle): 82 | offset = 0 83 | for line in handle: 84 | if line: 85 | yield offset 86 | for letter in line: 87 | offset += 1 88 | if letter == ' ': 89 | yield offset 90 | 91 | 92 | # Running the generator produces the same results. 93 | 94 | 95 | with open('item_16_address.txt', 'r') as f: 96 | it = index_file(f) 97 | results = itertools.islice(it, 0, 3) 98 | print(list(results)) 99 | # [0, 5, 11] 100 | 101 | 102 | # The only gotcha of defining generators like this is that the callers must be 103 | # aware that the iterators returned are stateful and can't be reused (see 104 | # Item 17: "Be defensive when iterating over arguments"). 105 | 106 | 107 | # Things to remember 108 | 109 | # 1. Using generators can be clearer than the alternative of returning lists 110 | # of accumulated results. 111 | # 2. The iterator returned by a generator produces the set of values passed to 112 | # yield expressions within the generator function's body. 113 | # 3. Generators can produce a sequence of outputs for arbitrarily large inputs 114 | # because their working memory doesn't include all inputs and outputs. 115 | -------------------------------------------------------------------------------- /item_17_my_numbers.txt: -------------------------------------------------------------------------------- 1 | 15 2 | 35 3 | 80 -------------------------------------------------------------------------------- /item_18_reduce_visual_noise.py: -------------------------------------------------------------------------------- 1 | # Item 18: Reduce visual noise with variable positional arguments 2 | 3 | 4 | # Accepting optional positional arguments (often called star args in reference 5 | # to the conventional name for the parameter, *args) can make a function call 6 | # more clear and remove visual noise. 7 | 8 | # For example, say you want to log some debug information. With a fixed 9 | # number of arguments, you would need a function that takes a message and a 10 | # list of values. 11 | 12 | 13 | def log(message, values): 14 | if not values: 15 | print(message) 16 | else: 17 | valuse_str = ', '.join(str(x) for x in values) 18 | print('%s: %s' % (message, valuse_str)) 19 | 20 | log("My numbers are", [1, 2]) 21 | log("Hi there", []) 22 | # My numbers are: 1, 2 23 | # Hi there 24 | 25 | # Having to pass an empty list when you have no values to log is cumbersome 26 | # and noise. It'd be better to leave out the second argument entirely. You can 27 | # do this in Python by prefixing the last positional parameter with *. The 28 | # first parameter for the log message is required, whereas any number of 29 | # subsequent positional arguments are optional. The function body doesn't 30 | # need to change, only the callers do. 31 | 32 | 33 | def log(message, *values): # The only difference 34 | if not values: 35 | print(message) 36 | else: 37 | valuse_str = ', '.join(str(x) for x in values) 38 | print('%s: %s' % (message, valuse_str)) 39 | 40 | log("My numbers are", 1, 2) 41 | log("Hi there") # Much better 42 | # My numbers are: 1, 2 43 | # Hi there 44 | 45 | 46 | # If you already have a list and want to call a variable argument function 47 | # like log, you can do this by using the * operator. This instructs Python to 48 | # pass items from the sequence as positional arguments. 49 | 50 | 51 | favorites = [7, 33, 99] 52 | log('Favorite colors', *favorites) 53 | # Favorite colors: 7, 33, 99 54 | 55 | 56 | # There are two problems wit accepting a variable number of positional 57 | # arguments. 58 | 59 | 60 | # 1. The first issue is that the variable arguments are always turned into 61 | # a tuple before they are passed to your function. This means that if the 62 | # caller of your function uses the * operator on a generator, it will be 63 | # iterated until it's exhausted. The resulting tuple will include every value 64 | # from the generator, which could consume a lot of memory and cause your 65 | # program to crash. 66 | 67 | 68 | def my_generator(): 69 | for i in range(10): 70 | yield i 71 | 72 | 73 | def my_func(*args): 74 | print(args) 75 | 76 | 77 | it = my_generator() 78 | my_func(*it) 79 | # (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) 80 | 81 | 82 | # Function that accept *args are best for situations where you know the number 83 | # of inputs in the argument list will be reasonably small. It's ideal for 84 | # function calls that pass many literals or variable names together. It's 85 | # primarily for the convenience of the programmer and the readability of the 86 | # code. 87 | 88 | # 2. The second issue with *args is that you can't add new positional 89 | # arguments to your function in the future without migrating every caller. If 90 | # you try to add a positional argument in the front of the argument list, 91 | # existing callers will subtly break if they aren't updated. 92 | 93 | 94 | def log(sequence, message, *values): 95 | if not values: 96 | print('%s: %s' % (sequence, message)) 97 | else: 98 | values_str = ', '.join(str(x) for x in values) 99 | print('%s: %s: %s' % (sequence, message, values_str)) 100 | 101 | log(1, 'Favorites', 7, 33) # New usage is OK 102 | log('Favorite numbers', 7, 33) # Old usage breaks 103 | # 1: Favorites: 7, 33 104 | # Favorite numbers: 7: 33 105 | 106 | 107 | # The problem here is that the second call to log used 7 as the message 108 | # parameter because a sequence argument wasn't given. Bugs like this are 109 | # hard to track down because the code still runs without raising any 110 | # exceptions. To avoid this possibility entirely, you should use 111 | # key-word-only arguments when you want to extend functions that accept *args 112 | # (see Item 21: "Enforce clarity with keyword-only arguments"). 113 | 114 | 115 | # Things to remember 116 | 117 | # 1. Functions can accept a variable number of positional arguments by using 118 | # *args in the def statement. 119 | # 2. You can use the items from a sequence as the positional arguments for a 120 | # function with the * operator. 121 | # 3. Using the * operator with a generator may cause your program to run out 122 | # of memory and crash. 123 | # 4. Adding new positional parameters to functions that accept *args can 124 | # introduce hard-to-find bugs. 125 | -------------------------------------------------------------------------------- /item_19_provide_optimal_behavior.py: -------------------------------------------------------------------------------- 1 | # Item 19: Provide optimal behavior with keyword arguments 2 | 3 | 4 | # Like most other programming languages, calling a function in Python allows 5 | # for passing arguments by position. 6 | 7 | 8 | def remainder(number, divisor): 9 | return number % divisor 10 | 11 | 12 | assert remainder(20, 7) == 6 13 | 14 | 15 | # All positional arguments to Python functions can also be passed by keyword, 16 | # where the name of the argument is used in an assignment within the 17 | # parentheses of a function call. The keyword arguments can be passed in any 18 | # order as long as all of the required positional arguments are specified. 19 | # You can mix and match keyword and positional arguments. There calls are 20 | # equivalent. 21 | 22 | 23 | print(remainder(20, 7)) 24 | print(remainder(20, divisor=7)) 25 | print(remainder(number=20, divisor=7)) 26 | print(remainder(divisor=7, number=20)) 27 | # 6 28 | # 6 29 | # 6 30 | # 6 31 | 32 | 33 | # Positional arguments must be specified before keyword arguments. 34 | 35 | 36 | # remainder(number=20, 7) 37 | # line 36 38 | # remainder(number=20, 7) 39 | # SyntaxError: non-keyword arg after keyword arg 40 | 41 | 42 | # Each argument can only be specified noce. 43 | 44 | 45 | # remainder(20, number=7) 46 | # line 45, in 47 | # remainder(20, number=7) 48 | # TypeError: remainder() got multiple values for keyword argument 'number' 49 | 50 | 51 | # The flexibility of keyword arguments provides three significant benefits. 52 | 53 | # The first advantage is that keyword arguments make the function call clearer 54 | # to new reader of the code. With the call remainder(20, 7), it's not evident 55 | # looking at the implementation of the remainder method. In the call with 56 | # keyword arguments, number=20 and divisor=7 make it immediately obvious which 57 | # parameter is being used for each purpose. 58 | 59 | # The second impact of arguments is that they can have default values 60 | # specified in the function definition. This allows a function to provide 61 | # additional capabilities when you need them but lets you accept the default 62 | # behavior most of the time. This can eliminate repetitive code and reduce 63 | # noise. 64 | 65 | # For example, say you want to compute the rate of fluid flowing into a vat. 66 | # If the vat is also on a scale, then you could use the difference between two 67 | # weight measurements at two different times to determine the flow rate. 68 | 69 | 70 | def flow_rate(weight_diff, time_diff): 71 | return weight_diff / time_diff 72 | 73 | weight_diff = 0.5 74 | time_diff = 3 75 | flow = flow_rate(weight_diff, time_diff) 76 | print('%.3f kg per second' % flow) 77 | # 0.167 kg per second 78 | 79 | 80 | # In the typical case, it's useful to know the flow rate in kilograms per 81 | # second. Other times, it'd be helpful to use the last sensor measurements 82 | # to larger time scales, like hours or days. You can provide this behavior 83 | # in the same function by adding an argument for the time period scaling 84 | # factor. 85 | 86 | 87 | def flow_rate(weight_diff, time_diff, period): 88 | return (weight_diff / time_diff) * period 89 | 90 | 91 | # The problem is that now you need to specify the period argument every time 92 | # you call the function, even in the common case of flow rate per second ( 93 | # where the period is 1). 94 | 95 | 96 | flow_per_second = flow_rate(weight_diff, time_diff, 1) 97 | 98 | 99 | # To make this less noisy, I can give the period arguments a default value. 100 | 101 | 102 | def flow_rate(weight_diff, time_diff, period=1): 103 | return (weight_diff / time_diff) * period 104 | 105 | 106 | # The period argument is now optional. 107 | 108 | 109 | flow_per_second = flow_rate(weight_diff, time_diff) 110 | flow_per_hour = flow_rate(weight_diff, time_diff, period=3600) 111 | print(flow_per_second) 112 | print(flow_per_hour) 113 | # 0.166666666667 114 | # 600.0 115 | 116 | 117 | # This works well for simple default values (it gets tricky for complex 118 | # default values--see Item 20: "Use None and Docstrings to specify dynamic 119 | # default arguments"). 120 | 121 | # The third reason to use keyword arguments is that they provide a powerful 122 | # way to extend a function's parameters while remaining backwards compatible 123 | # with existing callers. This lets you provide additional functionality 124 | # without having to migrate a lot of code, reducing the chance of introducing 125 | # bugs. 126 | 127 | 128 | def flow_rate(weight_diff, time_diff, period=1, units_per_kg=1): 129 | return ((weight_diff / units_per_kg) / time_diff) * period 130 | 131 | 132 | # The default argument value for units_per_kg is 1, which makes the return 133 | # weight units remain as kilograms. This means that all existing callers will 134 | # see no change in behavior. New callers to flow_rate can specify the new 135 | # keyword argument to see the new behavior. 136 | 137 | 138 | pounds_per_hour = flow_rate( 139 | weight_diff, time_diff, period=3600, units_per_kg=2.2) 140 | print(pounds_per_hour) 141 | # 272.727272727 142 | 143 | 144 | # The only problem with this approach is that optional keyword arguments like 145 | # period and units_per_kg may still be specified as positional arguments. 146 | 147 | 148 | pounds_per_hour = flow_rate(weight_diff, time_diff, 3600, 2.2) 149 | print(pounds_per_hour) 150 | # 272.727272727 151 | 152 | 153 | # Supplying optional arguments positionally can be confusing because it isn't 154 | # clear that the values 3600 and 2.2 correspond to. The best practice is to 155 | # always specify optional arguments using the keyword names and never pass 156 | # them as positional arguments. 157 | 158 | # Note: 159 | # Backwards compatibility using optional keyword arguments like this is 160 | # crucial for functions that accept *args (see Item 18: "Reduce visual noise 161 | # with variable positional arguments"). But a even better practice is to use 162 | # keyword-only arguments (see Item 21: "Enforce clarity with keyword-only 163 | # arguments"). 164 | 165 | 166 | # Things to remember 167 | 168 | # 1. Function arguments can be specified by position or by keyword. 169 | # 2. Keywords make it clear what the purpose of each arguments is when it 170 | # would be confusing with only positional arguments. 171 | # 3. Keywords arguments with default values make it easy to add new behaviors 172 | # to a function, especially when the function has existing callers. 173 | # 4. Optional keyword arguments should always be passed by keyword instead of 174 | # by position. 175 | -------------------------------------------------------------------------------- /item_20_use_none_and_docstrings.py: -------------------------------------------------------------------------------- 1 | # Item 20: Use None and Docstrings to specify dynamic default arguments 2 | import datetime 3 | import time 4 | import json 5 | 6 | 7 | # Sometimes you need to use a non-static type as a keyword arguments's default 8 | # value. For example, say you want to print logging messages that are marked 9 | # with the time of the logged event. In the default case, you want the message 10 | # to include the time when the function was called. You might try the 11 | # following approach, assuming the default arguments are reevaluated each time 12 | # the function is called. 13 | 14 | 15 | def log(message, when=datetime.datetime.now()): 16 | print('%s: %s' % (when, message)) 17 | 18 | log('Hi there!') 19 | time.sleep(0.1) 20 | log('Hi again!') 21 | # 2017-02-23 18:27:27.045710: Hi there! 22 | # 2017-02-23 18:27:27.045710: Hi again! 23 | 24 | 25 | # The timestamps are the same because datetime.now is only executed a single 26 | # time: a single time: when the function is defined. Default argument values 27 | # are evaluated only once per module load, which usually happens when a 28 | # program starts up. After the module containing this code is loaded, the 29 | # datetime.now default argument will never be evaluated again. 30 | 31 | 32 | # The convention for achieving the desired result in Python is to provide a 33 | # default value of None and to document the actual behavior in the docstring 34 | # (see Item 49: "Write Docstrings for every function, class, and module"). 35 | # When your code sees an argument value of None, you allocate the default 36 | # value accordingly. 37 | 38 | 39 | def log(message, when=None): 40 | """Log a message with a timestamp. 41 | Args: 42 | message: Message to print. 43 | when: datetime of when the message occurred. Defaults to the present 44 | time. 45 | """ 46 | when = datetime.datetime.now() if when is None else when 47 | print('%s: %s' % (when, message)) 48 | 49 | 50 | # Now the timestamps will be different. 51 | 52 | 53 | log('Hi there!') 54 | time.sleep(0.1) 55 | log('Hi again!') 56 | # 2017-02-23 18:38:27.510581: Hi there! 57 | # 2017-02-23 18:38:27.610755: Hi again! 58 | 59 | 60 | # Using None for default argument values is especially important when the 61 | # arguments are mutable. For example, say you want to load a value encoded as 62 | # JSON data. If decoding the data fails, you want an empty dictionary to be 63 | # returned by default. You might try this approach. 64 | 65 | 66 | def decode(data, default={}): 67 | try: 68 | return json.loads(data) 69 | except ValueError: 70 | return default 71 | 72 | 73 | # The problem here is the same as the datetime.now example above. The 74 | # dictionary specified for default will be shared by all calls to decode 75 | # because default argument values are only evaluated once (at module load 76 | # time). This can cause extremely surprising behavior. 77 | 78 | 79 | foo = decode('bad data') 80 | foo['stuff'] = 5 81 | bar = decode('also bad') 82 | bar['meep'] = 1 83 | print('Foo:', foo) 84 | print('Bar:', bar) 85 | # Foo: {'stuff': 5, 'meep': 1} 86 | # Bar: {'stuff': 5, 'meep': 1} 87 | 88 | 89 | # You'd expect two different dictionaries, each with a single key and value. 90 | # But modifying one seems to also modify the other. The culprit is that foo 91 | # and bar are both equal to the default parameter. They are the same 92 | # dictionary object. 93 | 94 | 95 | assert foo is bar 96 | 97 | 98 | # The fix is to set the keyword argument default value to None and then 99 | # document the behavior in the function's docstring. 100 | 101 | 102 | def decode(data, default=None): 103 | """Load JSON data from a string. 104 | Args: 105 | data: JSON data to decode. 106 | default: Value to return if decoding fails. Defaults to an empty 107 | dictionary. 108 | """ 109 | if default is None: 110 | default = {} 111 | try: 112 | return json.loads(data) 113 | except ValueError: 114 | return default 115 | 116 | 117 | # Now running the same data test code as before produces the expected result. 118 | 119 | 120 | foo = decode('bad data') 121 | foo['stuff'] = 5 122 | bar = decode('also bad') 123 | bar['meep'] = 1 124 | print('Foo:', foo) 125 | print('Bar:', bar) 126 | # Foo: {'stuff': 5} 127 | # Bar: {'meep': 1} 128 | 129 | 130 | # Things to remember 131 | 132 | # 1. Default arguments are only evaluated once: during function definition at 133 | # module load time. This can cause odd behavior for dynamic values 134 | # (like{} or []). 135 | # 2. Use None as the default value for value for keyword arguments that have a 136 | # dynamic value. Document the actual default behavior in the function's 137 | # docstring. 138 | -------------------------------------------------------------------------------- /item_21_enforce_clarity.py: -------------------------------------------------------------------------------- 1 | # Item 21: Enforce clarity with key-word only arguments 2 | 3 | 4 | # Passing arguments by keyword is a powerful feature of Python functions (see 5 | # Item 19: "Provide optimal behavior with keyword arguments"). The flexibility 6 | # of keyword arguments enables you to write code that will be clear for your 7 | # use cases. 8 | 9 | # For example, say you want to divide one number by another but be very 10 | # careful about special cases. Sometimes you want to ignore ZeroDivisionError 11 | # exceptions and return infinity instead. Other times, you want to ignore 12 | # OverflowError exceptions and return Zero instead. 13 | 14 | 15 | def safe_division(number, divisor, ignore_overflow, ignore_zero_division): 16 | try: 17 | return number / divisor 18 | except OverflowError: 19 | if ignore_overflow: 20 | return 0 21 | else: 22 | raise 23 | except ZeroDivisionError: 24 | if ignore_zero_division: 25 | return float('inf') 26 | else: 27 | raise 28 | 29 | 30 | # Using this function is straightforward. This call will ignore the float 31 | # overflow from division and will return zero. 32 | 33 | 34 | result = safe_division(1, 100**500, True, False) 35 | print(result) 36 | # 0.0 37 | 38 | 39 | # This call will ignore the error from dividing by zero and will return 40 | # infinity. 41 | 42 | 43 | result = safe_division(1, 0, False, True) 44 | print(result) 45 | # inf 46 | 47 | 48 | # The problem is that it's easy to confuse the position of the two Boolean 49 | # arguments that control the exception-ignoring behavior. This can easily 50 | # cause bugs that are hard to track down. One way to improve the readability 51 | # of this code is to use keyword arguments. By default, the function can be 52 | # overly cautions and can always re-raise exceptions. 53 | 54 | 55 | def safe_division_b(number, divisor, 56 | ignore_overflow=False, 57 | ignore_zero_division=False): 58 | try: 59 | return number / divisor 60 | except OverflowError: 61 | if ignore_overflow: 62 | return 0 63 | else: 64 | raise 65 | except ZeroDivisionError: 66 | if ignore_zero_division: 67 | return float('inf') 68 | else: 69 | raise 70 | 71 | 72 | # Then callers can use keyword arguments to specify which of the ignore flags 73 | # they want to flip for specific operations, overriding the default behavior. 74 | 75 | 76 | print(safe_division_b(1, 10**500, ignore_overflow=True)) 77 | print(safe_division_b(1, 0, ignore_zero_division=True)) 78 | # 0.0 79 | # inf 80 | 81 | 82 | # The problem is, since these keyword arguments are optional behavior, there's 83 | # nothing forcing callers of your functions to use keyword arguments for 84 | # clarity. Even with the new definition of safe_division_b, you can will still 85 | # call it the old way with positional arguments. 86 | 87 | 88 | print(safe_division_b(1, 10**500, True, False)) 89 | # 0.0 90 | 91 | 92 | # With complex functions like this, it's better to require that callers are 93 | # clear about their intentions. In Python 3, you can demand clarity by 94 | # defining your functions with keyword-only arguments. These arguments can 95 | # only be supplied by keyword, never by position. 96 | 97 | # Here, I redefine the safe_division function to accept keyword-only 98 | # arguments. The * symbol in the argument list indicates the end of positional 99 | # arguments and the beginning of the keyword-only arguments. 100 | 101 | 102 | def safe_division_c(number, divisor, *, 103 | ignore_overflow=False, 104 | ignore_zero_division=False): 105 | try: 106 | return number / divisor 107 | except OverflowError: 108 | if ignore_overflow: 109 | return 0 110 | else: 111 | raise 112 | except ZeroDivisionError: 113 | if ignore_zero_division: 114 | return float('inf') 115 | else: 116 | raise 117 | 118 | 119 | # Now, calling the function with positional arguments for the keyword argument 120 | # won't work. 121 | 122 | 123 | # result = safe_division_c(1, 10**500, True, False) 124 | # line 123, in 125 | # result = safe_division_c(1, 10**500, True, False) 126 | # TypeError: safe_division_c() takes 2 positional arguments but 4 were given 127 | 128 | 129 | # Keyword arguments and their default values work as expected. 130 | 131 | 132 | result = safe_division_c(1, 0, ignore_zero_division=True) # OK 133 | print(result) 134 | # inf 135 | 136 | try: 137 | result = safe_division_c(1, 0) 138 | print(result) 139 | except ZeroDivisionError: 140 | print("Exception ZeroDivisionError") 141 | pass # Expected 142 | # Exception ZeroDivisionError 143 | 144 | 145 | # Keyword-only arguments in Python 2 146 | 147 | # Unfortunately, Python 2 doesn't have explicit syntax for specifying 148 | # keyword-only arguments like Python 3. But you can achieve the same behavior 149 | # of raising TypeErrors for invalid function calls by using the ** operator in 150 | # in argument list. The ** operator is similar to the * operator (see Item 18: 151 | # "Reduce visual noise with variable positional arguments"), except that 152 | # instead of accepting a variable number of positional arguments, it accepts 153 | # any number of keyword arguments, even when they're not defined. 154 | 155 | 156 | # Python 2 157 | def print_args(*args, **kwargs): 158 | print('Positional:', args) 159 | print('Keyword: ', kwargs) 160 | 161 | print_args(1, 2, foo='bar', stuff='meep') 162 | # ('Positional:', (1, 2)) 163 | # ('Keyword: ', {'foo': 'bar', 'stuff': 'meep'}) 164 | 165 | 166 | # To make safe_division take keyword-only arguments in Python 2, you have the 167 | # function accept **kwargs. Then you pop keyword arguments that you expect out 168 | # of the kwargs dictionary, using the pop method's second argument to specify 169 | # the default value when the key is messing. Finally, you makere sure there are 170 | # no more keyword arguments left in kwargs to prevent callers from supplying 171 | # arguments that are invalid. 172 | 173 | 174 | # Python 2 175 | def safe_division_d(number, divisor, **kwargs): 176 | ignore_overflow = kwargs.pop('ignore_overflow', False) 177 | ignore_zero_div = kwargs.pop('ignore_zero_division', False) 178 | if kwargs: 179 | raise TypeError('Unexpected **kwargs: %r' % kwargs) 180 | 181 | try: 182 | return number / divisor 183 | except OverflowError: 184 | if ignore_overflow: 185 | return 0 186 | else: 187 | raise 188 | except ZeroDivisionError: 189 | if ignore_zero_div: 190 | return float('inf') 191 | else: 192 | raise 193 | 194 | 195 | # Now you can call the function with or without keyword arguments. 196 | 197 | 198 | print(safe_division_d(1, 10.0)) 199 | print(safe_division_d(1, 0, ignore_zero_division=True)) 200 | print(safe_division_d(1, 10**500, ignore_overflow=True)) 201 | # 0.1 202 | # inf 203 | # 0.0 204 | 205 | 206 | # Trying to pass keyword-only arguments by position won't work, just like in Python 3. 207 | 208 | 209 | # safe_division_d(1, 0, False, True) 210 | # line 209, in 211 | # safe_division_d(1, 0, False, True) 212 | # TypeError: safe_division_d() takes 2 positional arguments but 4 were given 213 | 214 | 215 | # Trying to pass unexpected keyword arguments also won't work. 216 | 217 | 218 | safe_division_d(0, 0, unexpected=True) 219 | # line 179, in safe_division_d 220 | # raise TypeError('Unexpected **kwargs: %r' % kwargs) 221 | # TypeError: Unexpected **kwargs: {'unexpected': True} 222 | 223 | 224 | # Things to remember 225 | 226 | # 1. Keyword arguments make the intention of a function call more clear. 227 | # 2. Use keyword-only arguments to force callers to supply keyword arguments 228 | # for potentially confusing functions, especially those that accept 229 | # multiple Boolean flags. 230 | # 3. Python 3 supports explicit syntax for keyword-only arguments in 231 | # functions. 232 | # 4. Python 2 can emulate keyword-only arguments for functions by using 233 | # **kwargs and manually raising TypeError exceptions. 234 | -------------------------------------------------------------------------------- /item_23_accepts_functions_4_interfaces.py: -------------------------------------------------------------------------------- 1 | # Item 23: Accept functions for simple interfaces instead of classes 2 | from collections import defaultdict 3 | 4 | 5 | # Many of Python' built-in APIs allow you to customize behavior by passing 6 | # in a function. These hooks are used by APIs to call back your code while 7 | # they execute. For example, the list type's sort method takes an optional key 8 | # argument that's used to determine each index's value for sorting. Here, I 9 | # sort a list of names based on their lengths by providing a lambda expression 10 | # as the key hook: 11 | 12 | 13 | names = ['Socrates', 'Archimedes', 'Plato', 'Aristotle'] 14 | names.sort(key=lambda x: len(x)) 15 | print(names) 16 | # ['Plato', 'Socrates', 'Aristotle', 'Archimedes'] 17 | 18 | 19 | # In other language, you might expect hooks to be defined by an abstract 20 | # class. In Python, many hooks are just stateless functions with well- 21 | # defined arguments and return values. Functions are ideal for hooks because 22 | # they are easier to describe and simpler to define than classes. Functions 23 | # work as hooks because Python has first-class functions: Functions and 24 | # methods can be passed around and referenced like any other value in the 25 | # language. 26 | 27 | # For example, say you want to customize the behavior of the defaultdict class 28 | # (see Item 46: "Use built-in algorithms and data structures" for details). 29 | # This data structure allows you to supply a function that will be called each 30 | # time a missing key is accessed. The function must return the default value 31 | # the missing key should have in the dictionary. Here, I define a hook that 32 | # logs each time a key is missing and return 0 for the default value: 33 | 34 | 35 | def log_missing(): 36 | print('Key added') 37 | return 38 | 39 | 40 | # Given an initial dictionary and a set of desired increments, I can cause the 41 | # log_missing function to run and print twice (for 'red' and 'orange'). 42 | 43 | 44 | current = {'green': 12, 'blue': 3} 45 | increments = [ 46 | ('red', 5), 47 | ('blue', 17), 48 | ('orange', 9), 49 | ] 50 | result = defaultdict(log_missing, current) 51 | print('Before:', dict(result)) 52 | for key, amount in increments: 53 | # result[key] += amount 54 | result[key] = amount 55 | print('After:', dict(result)) 56 | # line 53, in 57 | # result[key] += amount 58 | # TypeError: unsupported operand type(s) for +=: 'NoneType' and 'int' 59 | # Before: {'blue': 3, 'green': 12} 60 | # After: {'blue': 17, 'green': 12, 'red': 5, 'orange': 9} 61 | 62 | 63 | # Supplying functions like log_missing makes APIs easy to build and test 64 | # because it separates side effects from deterministic behavior. For example, 65 | # say you now want the default value hook passed to defaultdict to count the 66 | # total number of keys that were missing. One way to achieve this is using 67 | # a stateful closure (see Item 15: "Know how to closures interact with 68 | # variable scope" for details). Here, I define a helper function that uses 69 | # such a closure as the default value hook: 70 | 71 | 72 | def increment_with_report(current, increments): 73 | added_count = 0 74 | 75 | def missing(): 76 | nonlocal added_count # Stateful closure 77 | added_count += 1 78 | return 0 79 | 80 | result = defaultdict(missing, current) 81 | for key, amount in increments: 82 | result[key] += amount 83 | 84 | return result, added_count 85 | 86 | 87 | # Running this function produces the expected results (2), even though the 88 | # defaultdict has no idea that the missing hook maintains state. This is 89 | # another benefit of accepting simple functions for interfaces. It's easy to 90 | # add functionality later by hiding state in a closure. 91 | 92 | 93 | result, count = increment_with_report(current, increments) 94 | assert count == 2 95 | print('After:', dict(result)) 96 | # After: {'orange': 9, 'blue': 20, 'green': 12, 'red': 5} 97 | 98 | 99 | # The problem with defining a closure for stateful hooks is that it's harder 100 | # to read than the stateless function example. Another approach is to define 101 | # a small class that encapsulates the state you want to track. 102 | 103 | 104 | class CountMissing(object): 105 | def __init__(self): 106 | self.added = 0 107 | 108 | def missing(self): 109 | self.added += 1 110 | return 0 111 | 112 | 113 | # In other languages, you might expect that now defaultdict would have to be 114 | # modified to accommodate the interface of CountMissing. But in Python, thanks 115 | # to first-class functions, you can reference the CountMissing.missing method 116 | # directly on an object and pass it to defaultdict as the default value hook. 117 | # it's trivial to have a method satisfy a function interface. 118 | 119 | 120 | counter = CountMissing() 121 | result = defaultdict(counter.missing, current) # Method ref 122 | 123 | for key, amount in increments: 124 | result[key] += amount 125 | assert counter.added == 2 126 | print('After:', dict(result)) 127 | # After: {'orange': 9, 'blue': 20, 'green': 12, 'red': 5} 128 | 129 | 130 | # Using a helper class like this to provide the behavior of a stateful closure 131 | # is clearer than increment_with_report function above. However, in isolation 132 | # it's still not immediately obvious that the purpose of the CountMissing 133 | # class is. Who constructs a CountMissing object? Who calls the missing 134 | # method? Will the class need other public methods to be added in the future? 135 | # Until you see its usage with defaultdict, the class is a mystery. 136 | 137 | # To clarify this situation, Python allows classes to define the __call__ 138 | # special method, __call__ allows an object to be called just like a function. 139 | # It also causes the callable built-in function to return True for such an 140 | # instance. 141 | 142 | 143 | class BetterCountMissing(object): 144 | def __init__(self): 145 | self.added = 0 146 | 147 | def __call__(self): 148 | self.added += 1 149 | return 0 150 | 151 | counter = BetterCountMissing() 152 | counter() 153 | assert callable(counter) 154 | 155 | 156 | # Here, I use a BetterCountMissing instance as the default value hook for a 157 | # defaultdict to track the number of missing keys that were added: 158 | 159 | 160 | counter = BetterCountMissing() 161 | result = defaultdict(counter, current) # Relies on __call__ 162 | for key, amount in increments: 163 | result[key] += amount 164 | assert counter.added == 2 165 | print('After:', dict(result)) 166 | # After: {'orange': 9, 'blue': 20, 'green': 12, 'red': 5} 167 | 168 | 169 | # This is much clearer than the CountMissing.missing example. The __call__ 170 | # method indicates that a class's instances will be used somewhere a function 171 | # argument would also be suitable (like API hooks). It directs new readers of 172 | # the code to the entry point that's responsible for the class's primary 173 | # behavior. It provides a strong hint that the goal of the class is to act as 174 | # a stateful closure. 175 | 176 | # Best of all, defaultdict still has no view into what's going on when you 177 | # use __call__. All that defaultdict requires is a function for the default 178 | # value hook. Python provides many different ways to satisfy a simple function 179 | # interface depending on what you need to accomplish. 180 | 181 | 182 | # Things to remember 183 | 184 | # 1. Instead of defining and instantiating classes, functions are often all 185 | # you need for simple interfaces between components in Python. 186 | # 2. References to functions and methods in Python are first class, meaning 187 | # they can be used in expressions like any other type. 188 | # 3. The __call__ special method enables instances of a class to be called 189 | # like plain Python functions. 190 | # 4. When you need a function to maintain state, consider defining a class 191 | # that provides the __call__ method instead of defining a stateful closure 192 | # (see Item 15: "Know how closures interact with variable scope"). 193 | -------------------------------------------------------------------------------- /item_25_init_parent_classes_with_super.py: -------------------------------------------------------------------------------- 1 | # Item 25: Initialize parent classes with super 2 | from pprint import pprint 3 | 4 | 5 | # The old way to initialize a parent class from a child class is to directly 6 | # call the parent class's __init__ method with the child instance. 7 | 8 | 9 | class MyBaseClass(object): 10 | def __init__(self, value): 11 | self.value = value 12 | 13 | 14 | class MyChildClass(MyBaseClass): 15 | def __init__(self): 16 | MyBaseClass.__init__(self, 5) 17 | 18 | 19 | # This approach works fine for simple hierarchies but breaks down in many 20 | # cases. 21 | 22 | # If your class is affected by multiple inheritance (something to avoid in 23 | # general; see Item 26: "Use multiple inheritance only for mix-in utility 24 | # classes"), calling the superclass' __init__ methods directly can lead to 25 | # unpredictable behavior. One problem is that the __init__ call order isn't 26 | # specified across all subclass. For example, here I define two parent classes 27 | # that operate on the instance's value field: 28 | 29 | 30 | class TimesTwo(object): 31 | def __init__(self): 32 | self.value *= 2 33 | 34 | 35 | class PlusFive(object): 36 | def __init__(self): 37 | self.value += 5 38 | 39 | 40 | # This class defines its parent classes in one ordering. 41 | 42 | 43 | class OneWay(MyBaseClass, TimesTwo, PlusFive): 44 | def __init__(self, value): 45 | MyBaseClass.__init__(self, value) 46 | TimesTwo.__init__(self) 47 | PlusFive.__init__(self) 48 | 49 | 50 | # And constructing it produces a result that matches the parent class ordering. 51 | 52 | 53 | foo = OneWay(5) 54 | print("First ordering is (5*2)+5=", foo.value) 55 | # First ordering is (5*2)+5= 15 56 | 57 | 58 | # Here's another class that defines the same parent classes but in a different 59 | # ordering: 60 | 61 | 62 | class AnotherWay(MyBaseClass, PlusFive, TimesTwo): 63 | def __init__(self, value): 64 | MyBaseClass.__init__(self, value) 65 | TimesTwo.__init__(self) 66 | PlusFive.__init__(self) 67 | 68 | 69 | # However, I left the calls to the parent class constructors PlusFive.__init__ 70 | # and TimesTwo.__init__ in the same order as before, causing this class's 71 | # behavior not to match the order of the parent classes in its definition. 72 | 73 | 74 | bar = AnotherWay(5) 75 | print("Second ordering still is ", bar.value) 76 | # Second ordering still is 15 77 | 78 | 79 | # Another problem occurs with diamond inheritance. Diamond inheritance happens 80 | # when a subclass inherits from two separate classes that have the same 81 | # superclass superclass somewhere in the hierarchy. Diamond inheritance causes 82 | # the common superclass's __init__ method to run multiple times, causing 83 | # unexpected behavior. For example, here I define two child classes that 84 | # inherit from MyBaseClass. 85 | 86 | 87 | class TimesFive(MyBaseClass): 88 | def __init__(self, value): 89 | MyBaseClass.__init__(self, value) 90 | self.value *= 5 91 | 92 | 93 | class PlusTwo(MyBaseClass): 94 | def __init__(self, value): 95 | MyBaseClass.__init__(self, value) 96 | self.value += 2 97 | 98 | 99 | # Then, I define a child class that inherit from both of these classes, making 100 | # MyBaseClass the top of the diamond. 101 | 102 | 103 | class ThisWay(TimesFive, PlusTwo): 104 | def __init__(self, value): 105 | TimesFive.__init__(self, value) 106 | PlusTwo.__init__(self, value) 107 | 108 | 109 | foo = ThisWay(5) 110 | print("Should be (5*5)+2 = 27 but is ", foo.value) 111 | # Should be (5*5)+2 = 27 but is 7 112 | 113 | 114 | # The output should be 27 because (5*5)+2 = 27. But the call to the second 115 | # parent class's constructor, PlusTwo.__init__, causes self.value to be reset 116 | # back to 5 when MyBaseClass.__init__ gets a second time. 117 | 118 | # To solve these problems, Python 2.2 added the super built-in function and 119 | # defined the method resolution order (MRO). The MRO standardizes which 120 | # superclasses are initialized before others (e.g. depth-first, 121 | # left-to-right). It also ensures that common superclass in diamond 122 | # hierarchies are only run once. 123 | 124 | # Here, I create a diamond-shaped class hierarchy again, but this time I use 125 | # super (in Python 2 style) to initialize the parent class: 126 | 127 | 128 | # Python 2 129 | class TimesFiveCorrect(MyBaseClass): 130 | def __init__(self, value): 131 | super(TimesFiveCorrect, self).__init__(value) 132 | self.value *= 5 133 | 134 | 135 | class PlusTwoCorrect(MyBaseClass): 136 | def __init__(self, value): 137 | super(PlusTwoCorrect, self).__init__(value) 138 | self.value += 2 139 | 140 | 141 | # Now the top part of the diamond, MyBaseClas.__init__, is only run a single 142 | # time. The other parent classes are run in the order specified in the class 143 | # statement. 144 | 145 | 146 | # Python 2 147 | class GoodWay(TimesFiveCorrect, PlusTwoCorrect): 148 | def __init__(self, value): 149 | super(GoodWay, self).__init__(value) 150 | 151 | 152 | foo = GoodWay(5) 153 | print("Should be 5*(5+2) = 35 and is ", foo.value) 154 | # Should be 5*(5+2) = 35 and is 35 155 | 156 | 157 | # This order may seem backwards at first. Shouldn't TimesFiveCorrect.__init__ 158 | # have run first? Shouldn't the result be (5*5)+2 = 27? The answer is no. 159 | # This ordering matches what the MRO defines for this class. The MRO ordering 160 | # available on a class method called mro. 161 | 162 | 163 | pprint(GoodWay.mro()) 164 | # [, 165 | # , 166 | # , 167 | # , 168 | # ] 169 | 170 | 171 | # When I call Goodway(5), it in turn calls TimesFiveCorrect.__init__, which 172 | # calls PlusTwoCorrect.__init__, which calls MyBaseClass.__init__. Once this 173 | # reaches the top of the diamond, then all of the initialization method 174 | # actually do their work in the opposite order from how their __init__ 175 | # functions were called. MyBaseClass.__init__ assigns the value to 5. 176 | # PlusTwoCorrect.__init__ adds 2 to make value equal 7. 177 | # TimesFiveCorrect.__init__ multiple it by 5 to make value equal 35. 178 | 179 | # The super built-in function works well, but it still has two noticeable 180 | # problems in Python 2: 181 | 182 | # - Its syntax is a bit verbose. You have to specify the class you're in, 183 | # the self object, the method name (usually __init__), and all the 184 | # arguments. This construction can be confusing to new Python programmers. 185 | 186 | # - You have to specify the current class by name in the call to super. If you 187 | # ever change the class's name--a very common activity when improving a 188 | # class hierarchy--you also need to update every call to super. 189 | 190 | # Thankfully, Python 3 fixes these issues by making calls to super with no 191 | # arguments equivalent to calling super with __class__ and self specified. In 192 | # Python 3, you should always use super because it's clear, concise, and 193 | # always does the right things. 194 | 195 | 196 | class Explicit(MyBaseClass): 197 | def __init__(self, value): 198 | super(__class__, self).__init__(value * 2) 199 | 200 | 201 | class Implicit(MyBaseClass): 202 | def __init__(self, value): 203 | super().__init__(value * 2) 204 | 205 | 206 | assert Explicit(10).value == Implicit(10).value 207 | 208 | 209 | # This works because Python 3 lets you reliably reference the current class 210 | # in methods using the __class__ variable. This doesn't work in Python 2 211 | # because __class__ isn't defined. You may guess that you could use 212 | # self.__class__ as an argument to super, but this breaks because of the way 213 | # super is implemented in Python 2. 214 | 215 | 216 | # Things to remember 217 | 218 | # 1. Python's standard method resolution order (MRO) solves the problems to 219 | # superclass initialization order and diamond inheritance. 220 | # 2. Always use the super built-in function to initialize parent classes. 221 | -------------------------------------------------------------------------------- /item_28_inherit_from_collections_abc.py: -------------------------------------------------------------------------------- 1 | # Item 28: Inherit from collections.abc for custom container types 2 | # ToDo: need to debug. 3 | 4 | 5 | # Much of programming in Python is defining classes that contain data and 6 | # describing how such objects relate to each other. Every Python class is a 7 | # container of some kind, encapsulating attributes and functionality together. 8 | # Python also provides built-in container types for managing data: lists, 9 | # tuples, sets, and dictionaries. 10 | 11 | # When you'r designing classes for simple use cases like sequence, it's 12 | # natural that you'd want to subclass Python built-in list type directly. 13 | # For example, say you want to create your own custom list type that has 14 | # additional methods for counting the frequency of its members. 15 | 16 | 17 | class FrequencyList(list): 18 | def __init__(self, members): 19 | super().__init__(members) 20 | 21 | def frequency(self): 22 | counts = {} 23 | for item in self: 24 | counts.setdefault(item, 0) 25 | counts[item] += 1 26 | return counts 27 | 28 | 29 | # By subclassing list, you get all of list's standard functionality and 30 | # preserve the semantics familiar to all Python programmers. Your additional 31 | # methods can add any custom behaviors you need. 32 | 33 | 34 | foo = FrequencyList(['a', 'b', 'a', 'c', 'b', 'a', 'd']) 35 | print('Length is', len(foo)) 36 | foo.pop() 37 | print('After pop:', repr(foo)) 38 | print('Frequency:', foo.frequency()) 39 | # Length is 7 40 | # After pop: ['a', 'b', 'a', 'c', 'b', 'a'] 41 | # Frequency: {'a': 3, 'b': 2, 'c': 1} 42 | 43 | 44 | # Now imagine you want to provide an object that feels like a list, allowing 45 | # indexing, but isn't a list subclass. For example, say you want to provide 46 | # sequence semantic (like list or tuple) for a binary tree class. 47 | 48 | 49 | class BinaryNode(object): 50 | def __init__(self, value, left=None, right=None): 51 | self.value = value 52 | self.left = left 53 | self.right = right 54 | 55 | 56 | # How do you make this act like a sequence type? Python implements its 57 | # container behaviors with instance methods that have special names. When you 58 | # access a sequence item by index. 59 | 60 | bar = [1, 2, 3] 61 | print(bar[0]) 62 | # 1 63 | 64 | 65 | # it will be interpreted as: 66 | 67 | 68 | print(bar.__getitem__(0)) 69 | # 1 70 | 71 | 72 | # To make the BinaryNode class act like a sequence, you can provide a custom 73 | # implementation of __getitem__ that traverses the object tree depth first. 74 | 75 | 76 | class IndexableNode(BinaryNode): 77 | def __init__(self, value, left=None, right=None): 78 | self.value = value 79 | self.left = left 80 | self.right = right 81 | 82 | def _search(self, count, index): 83 | found = False 84 | return (found, count) 85 | # ... 86 | # returns (found, count) 87 | 88 | def __getitem__(self, index): 89 | found, _ = self._search(0, index) 90 | if not found: 91 | raise IndexError("Index out of range") 92 | return found.value 93 | 94 | 95 | # You can construct your binary tree as usual. 96 | 97 | 98 | tree = IndexableNode( 99 | 10, 100 | left=IndexableNode( 101 | 5, 102 | left=IndexableNode(2), 103 | right=IndexableNode( 104 | 6, 105 | right=IndexableNode(7) 106 | ) 107 | ), 108 | right=IndexableNode( 109 | 15, left=IndexableNode(11) 110 | ) 111 | ) 112 | 113 | 114 | # But you can also access it like a list in addition to tree traversal. 115 | 116 | 117 | print('LRR', tree.left.right.right.value) 118 | # print('Index 0 = ', tree[0]) 119 | # print('Index 1 = ', tree[1]) 120 | # print('11 in the tree?', 11 in tree) 121 | # print('17 in the tree?', 17 in tree) 122 | # print('Tree is ', list(tree)) 123 | 124 | 125 | # The problem is that implementing __getitem__ isn't enough to provide all of 126 | # the sequence semantics you'd expect. 127 | 128 | 129 | # len(tree) 130 | # TypeError: object of type 'IndexableNode' has no len() 131 | 132 | 133 | # The len built-in function requires another special method named __len__ that 134 | # must have an implementation for your custom sequence type. 135 | 136 | 137 | class SequenceNode(IndexableNode): 138 | def __len__(self): 139 | _, count = self._search(0, None) 140 | return count 141 | 142 | tree = IndexableNode( 143 | 10, 144 | left=IndexableNode( 145 | 5, 146 | left=IndexableNode(2), 147 | right=IndexableNode( 148 | 6, 149 | right=IndexableNode(7) 150 | ) 151 | ), 152 | right=IndexableNode( 153 | 15, left=IndexableNode(11) 154 | ) 155 | ) 156 | 157 | print('Tree has %d nodes' % len(tree)) 158 | 159 | 160 | # Unfortunately, this still isn't enought. Also missing are the count and 161 | # index methods that a Python programmer would expect to see on a sequence 162 | # like list or tuple. Defining your own container types is much harder than 163 | # it looks. 164 | 165 | # To avoid this difficulty throughout the Python universe, the built-in 166 | # collections.abc mudule defines a set of abstract base classes that provide 167 | # all of the typical methods for each container type. When you subclass from 168 | # these abstract base classes and forget to implement required methods, the 169 | # module will tell you something is wrong. 170 | 171 | 172 | # from collections.abc import Sequence 173 | from collections import Sequence 174 | 175 | 176 | class BadType(Sequence): 177 | pass 178 | 179 | foo = BadType() 180 | # TypeError: Can't instantiate abstract class BadType with abstract methods __getitem__, __len__ 181 | 182 | 183 | # When you do implement all of the methods required by an abstract base class, 184 | # as I did above with SequenceNode, it will provide all of the additional 185 | # methods like index and count for free. 186 | 187 | 188 | class BetterNode(SequenceNode, Sequence): 189 | pass 190 | 191 | tree = IndexableNode( 192 | 10, 193 | left=IndexableNode( 194 | 5, 195 | left=IndexableNode(2), 196 | right=IndexableNode( 197 | 6, 198 | right=IndexableNode(7) 199 | ) 200 | ), 201 | right=IndexableNode( 202 | 15, left=IndexableNode(11) 203 | ) 204 | ) 205 | 206 | print('Index of 7 is', tree.index(7)) 207 | print('Count of 10 is', tree.count(10)) 208 | 209 | 210 | # The benefit of using these abstract base class is even greater for more 211 | # complex types like Set and MutableMapping, which have a large number of 212 | # special methods that need to be implemented to match Python conventions. 213 | 214 | 215 | # Things to remember 216 | 217 | # 1. Inherit directly from Python's container types (like list or dict) for 218 | # simple use cases. 219 | # 2. Beware of the large number of methods required to implement custom 220 | # container types correctly. 221 | # 3. Have your custom container types inherit from the interface defined in 222 | # collections.abc to ensure that your classes match required interfaces 223 | # and behaviors. 224 | -------------------------------------------------------------------------------- /item_29_use_plain_attributes.py: -------------------------------------------------------------------------------- 1 | # Chapter 4: Metaclasses and Attributes 2 | 3 | 4 | # Metaclasses are often mentioned in lists of Python's features, but few 5 | # understand what they accomplish in practice. The name metaclass vaguely 6 | # implies a concept above and beyond a class. Simply but, metaclasses let you 7 | # intercept Python's class statement and provide special behavior each time a 8 | # class is defined. 9 | 10 | 11 | # Similarly mysterious and powerful are Python's built-in features for 12 | # dynamically customizing attribute accesses. Along with Python's 13 | # object-oriented constructs, these facilities provide wonderful tools to ease 14 | # the transition from simple classes to complex ones. 15 | 16 | 17 | # However, with these powers come many pitfalls. Dynamic attributes enable you 18 | # to override objects and cause unexpected side effects. Metaclasses can 19 | # create extremely bizarre behaviors that are unapproachable to newcomers. It's 20 | # important that you follow the rule of least surprise and only use these 21 | # mechanisms to implement well understood idioms. 22 | 23 | 24 | # Item 29: Use plain attributes instead of get and set methods 25 | 26 | 27 | # Programmers coming to Python from other languages may naturally try to 28 | # implement explicit getter and setter methods in their classes. 29 | 30 | 31 | class OldDResistor(object): 32 | def __init__(self, ohms): 33 | self._ohms = ohms 34 | 35 | def get_ohms(self): 36 | return self._ohms 37 | 38 | def set_ohms(self, ohms): 39 | self._ohms = ohms 40 | 41 | 42 | # Using these setters and getters is simple, but it's not Pythonic. 43 | 44 | 45 | r0 = OldDResistor(50e3) 46 | print('Before: %5r' % r0.get_ohms()) 47 | r0.set_ohms(10e3) 48 | print('After: %5r' % r0.get_ohms()) 49 | # Before: 50000.0 50 | # After: 10000.0 51 | 52 | 53 | # Such methods are especially clumsy for operations like incrementing in 54 | # place. 55 | 56 | 57 | r0.set_ohms(r0.get_ohms() + 5e3) 58 | print('Add 5e3: %5r' % r0.get_ohms()) 59 | # Add 5e3: 15000.0 60 | 61 | 62 | # These utility methods do help define the interface for your class, making it 63 | # easier to encapsulate functionality, validate usage, and define boundaries. 64 | # Those are important goals when designing a class to ensure you don't break 65 | # callers as your class evolves over time. 66 | 67 | # In Python, however, you almost never need to implement explicit setter or 68 | # getter methods. Instead, you should always start your implementations with 69 | # simple public attributes. 70 | 71 | 72 | class Resistor(object): 73 | def __init__(self, ohms): 74 | self.ohms = ohms 75 | self.voltage = 0 76 | self.current = 0 77 | 78 | r1 = Resistor(50e3) 79 | print('Before: %5r' % r1.ohms) 80 | # These make operations like incrementing in place natural and clear. 81 | r1.ohms = 10e3 82 | print('After: %5r' % r1.ohms) 83 | r1.ohms += 5e3 84 | print('Add 5e3: %5r' % r1.ohms) 85 | # Before: 50000.0 86 | # After: 10000.0 87 | # Add 5e3: 15000.0 88 | 89 | 90 | # Later, if you decide you need special behavior when an attribute is set, you 91 | # can migrate to the @property decorator and its corresponding setter 92 | # attribute. Here, I define a new subclass of Resistor that lets me vary the 93 | # current by assigning the voltage property. Note that in order to work 94 | # properly the name of both the setter and getter methods mush match the 95 | # intended property name. 96 | 97 | 98 | class VoltageResistance(Resistor): 99 | def __init__(self, ohms): 100 | super().__init__(ohms) 101 | self._voltage = 0 102 | 103 | 104 | @property 105 | def voltage(self): 106 | return self._voltage 107 | 108 | @voltage.setter 109 | def voltage(self, voltage): 110 | self._voltage = voltage 111 | self.current = self._voltage / self.ohms 112 | 113 | 114 | # Now, assigning the voltage property will run the voltage setter method, 115 | # updating the current property of the object to match. 116 | 117 | 118 | r2 = VoltageResistance(1e3) 119 | print('Before: %5r amps' % r2.current) 120 | r2.voltage = 10 121 | print('After: %5r amps' % r2.current) 122 | # Before: 0 amps 123 | # After: 0.01 amps 124 | 125 | 126 | # Specifying a setter on a property also lets you perform type checking and 127 | # validation on values passed to your class. Here, I define a class that 128 | # ensures all resistance values are above zero ohms: 129 | 130 | 131 | class BoundedResistance(Resistor): 132 | def __init__(self, ohms): 133 | super().__init__(ohms) 134 | 135 | @property 136 | def ohms(self): 137 | return self._ohms 138 | 139 | @ohms.setter 140 | def ohms(self, ohms): 141 | if ohms <= 0: 142 | raise ValueError('%f ohms mush be > 0' % ohms) 143 | self._ohms = ohms 144 | 145 | 146 | # Assigning an invalid resistance to the attribute raises an excpetion. 147 | 148 | 149 | r3 = BoundedResistance(1e3) 150 | # r3.ohms = 0 151 | # ValueError: 0.000000 ohms mush be > 0 152 | 153 | 154 | # An exception will also be raise if you pass an invalid value to the 155 | # constructor. 156 | 157 | 158 | # BoundedResistance(-5) 159 | # ValueError: -5.000000 ohms mush be > 0 160 | 161 | 162 | # This happens because BoundedResistance.__init__ calls Resistorl.__init__, 163 | # which assigns self.ohms = -5. That assignment causes the @ohms.setter method 164 | # from BoundedResistance to be called, immediately running the validation code 165 | # before object construction has completed. 166 | 167 | # You can even use @property to make attributes from parent classes immutable. 168 | 169 | 170 | class FixedResistance(Resistor): 171 | def __init__(self, ohms): 172 | super().__init__(ohms) 173 | 174 | @property 175 | def ohms(self): 176 | return self._ohms 177 | 178 | @ohms.setter 179 | def ohms(self, ohms): 180 | if hasattr(self, '_ohms'): 181 | raise AttributeError("Can't set attribute") 182 | self._ohms = ohms 183 | 184 | 185 | # Trying to assign to the property after construction raise an exception. 186 | 187 | r4 = FixedResistance(1e3) 188 | # r4.ohms = 2e3 189 | # AttributeError: Can't set attribute 190 | 191 | 192 | # The biggest shortcoming of @property is that the methods for an attribute 193 | # can be shared by subclass. Unrelated classes can't share the same 194 | # implementation. However, Python also supports descriptors (see Item 31: Use 195 | # descriptors for reusable @property methods) that enable reusable property 196 | # logic and many other use cases. 197 | 198 | # Finally, when you use @property methods to implement setters and getters, be 199 | # sure that the behavior you implement is not surprising. For example, don't 200 | # set other attributes in getter property methods. 201 | 202 | class MysteriousResistor(Resistor): 203 | def __init__(self, ohms): 204 | super().__init__(ohms) 205 | 206 | @property 207 | def ohms(self): 208 | self.voltage = self._ohms * self.current 209 | return self._ohms 210 | 211 | @ohms.setter 212 | def ohms(self, ohms): 213 | # if hasattr(self, '_ohms'): 214 | # raise AttributeError("Can't set attribute") 215 | self._ohms = ohms 216 | 217 | 218 | # This leads to extremely bizarre behavior. 219 | 220 | 221 | r7 = MysteriousResistor(10) 222 | r7.current = 0.01 223 | print('Before: %5r' % r7.voltage) 224 | r7.ohms 225 | print('After: %5r' % r7.voltage) 226 | # Before: 0 227 | # After: 0.1 228 | 229 | 230 | # The best policy is to only modify related object state in @property.setter 231 | # methods. Be sure to avoid any other side effects the caller may not expect 232 | # beyond the object, such as importing modules dynamically, running slow 233 | # helper functions, or making expensive database queries. Users of your class 234 | # will expect its attributes to be like any other Python object: quick and 235 | # easy. Use normal methods to do anything more complex or slow. 236 | 237 | 238 | # Things to remember 239 | 240 | # 1. Define new class interfaces using simple public attributes, and avoid set 241 | # and get methods. 242 | # 2. Use @property to define special behavior when attributes are accessed on 243 | # your objects, if necessary. 244 | # 3. Follow the rule of least surprise and void weird side effects in your 245 | # @property methods. 246 | # 4. Ensure that @property methods are fast; do slow or complex work using 247 | # normal methods. 248 | -------------------------------------------------------------------------------- /item_30_consider_property.py: -------------------------------------------------------------------------------- 1 | # Item 30: Consider @property instead of refactoring attributes 2 | from datetime import timedelta 3 | import datetime 4 | 5 | 6 | # The built-in @property decorator makes it easy for simple accesses of an 7 | # instance's attributes to act smarter (see Item 29: "Use plain attributes 8 | # instead of get and set methods"). One advanced but common use of @property 9 | # is transitioning what was once a simple numerical attribute into an 10 | # on-the-fly calculation. This is extremely helpful because it lets you 11 | # migrate all existing usage of a class to have new behaviors without 12 | # rewriting any of the call sites. It also provides an important stopgap for 13 | # improving your interfaces over time. 14 | 15 | # For example, say you want to implement a leaky bucket quota using plain 16 | # Python objects. Here, the Bucket class represents how much quota remains 17 | # and the duration for which the quota will be available: 18 | 19 | 20 | class Bucket(object): 21 | def __init__(self, period): 22 | self.period_delta = timedelta(seconds=period) 23 | self.reset_time = datetime.datetime.now() 24 | self.quota = 0 25 | 26 | def __repr__(self): 27 | return 'Bucket(quota=%d)' % self.quota 28 | 29 | 30 | # The leaky bucket algorithm works by ensuring that, whenever the bucket is 31 | # filled, the amount of quota does not carry over from one period to the next. 32 | 33 | 34 | def fill(bucket, amount): 35 | now = datetime.datetime.now() 36 | if now - bucket.reset_time > bucket.period_delta: 37 | bucket.quota = 0 38 | bucket.reset_time = now 39 | bucket.quota += amount 40 | 41 | 42 | # Each time a quota consumer wants to do something, it first must ensure that 43 | # it can deduct the amount of quota it needs to use. 44 | 45 | 46 | def deduct(bucket, amount): 47 | now = datetime.datetime.now() 48 | if now - bucket.reset_time > bucket.period_delta: 49 | return False 50 | if bucket.quota - amount < 0: 51 | return False 52 | bucket.quota -= amount 53 | return True 54 | 55 | 56 | # To use this class, first I fill the bucket. 57 | 58 | bucket = Bucket(60) 59 | fill(bucket, 100) 60 | print(bucket) 61 | # Bucket(quota=100) 62 | 63 | # Then, I deduct the quota that I need. 64 | 65 | 66 | if deduct(bucket, 99): 67 | print('Had 99 quota') 68 | else: 69 | print('Not enough for 99 quota') 70 | print(bucket) 71 | # Had 99 quota 72 | # Bucket(quota=1) 73 | 74 | # Eventually, I'm prevented from making progress because I try to deduct more 75 | # quota than is available. In this case, the bucket's quota level remains 76 | # unchanged. 77 | 78 | 79 | if deduct(bucket, 3): 80 | print('Had 3 quota') 81 | else: 82 | print('Not enough for 3 quota') 83 | print(bucket) 84 | # Not enough for 3 quota 85 | # Bucket(quota=1) 86 | 87 | 88 | # The problem with this implementation is that I never know that quota level 89 | # the bucket started with. The quota is deducted over the course of the period 90 | # until it reaches zero. At that point, deduct will always return False. When 91 | # that happens, it would be useful to know whether callers to deduct are being 92 | # blocked because the Bucket ran out of quota or because the Bucket never had 93 | # quota in the first place. 94 | 95 | # To fix this, I can change the class to keep track of the max_quota issued in 96 | # the period and the quota_consumed in the period. 97 | 98 | 99 | class Bucket(object): 100 | def __init__(self, period): 101 | self.period_delta = timedelta(seconds=period) 102 | self.reset_time = datetime.datetime.now() 103 | self.max_quota = 0 104 | self.quota_consumed = 0 105 | 106 | def __repr__(self): 107 | return ('Bucket(max_quota=%d, quota_consumed=%d)' % 108 | (self.max_quota, self.quota_consumed)) 109 | 110 | # I use a @property method to compute the current level of quota on-the-fly 111 | # using these new attributes. 112 | @property 113 | def quota(self): 114 | return self.max_quota - self.quota_consumed 115 | 116 | # When the quota attribute is assigned, I take special action matching the 117 | # current interface of the class used by fill and decuct. 118 | @quota.setter 119 | def quota(self, amount): 120 | delta = self.max_quota - amount 121 | if amount == 0: 122 | '''quota being reset for a new period''' 123 | self.quota_consumed = 0 124 | self.max_quota = 0 125 | elif delta < 0: 126 | '''quota being filled for the new period''' 127 | assert self.quota_consumed == 0 128 | self.max_quota = amount 129 | else: 130 | '''quota being consumed during the period''' 131 | assert self.max_quota >= self.quota_consumed 132 | self.quota_consumed += delta 133 | 134 | 135 | # Rerunning the demo code from above produces the same results. 136 | 137 | 138 | bucket = Bucket(60) 139 | print('Initial', bucket) 140 | fill(bucket, 100) 141 | print('Filled', bucket) 142 | 143 | if deduct(bucket, 99): 144 | print('Had 99 quota') 145 | else: 146 | print('Not enough for 99 quota') 147 | print('Now', bucket) 148 | 149 | if deduct(bucket, 3): 150 | print('Had 3 quota') 151 | else: 152 | print('Not enough for 3 quota') 153 | print('Still', bucket) 154 | # Initial Bucket(max_quota=0, quota_consumed=0) 155 | # Filled Bucket(max_quota=100, quota_consumed=0) 156 | # Had 99 quota 157 | # Now Bucket(max_quota=100, quota_consumed=99) 158 | # Not enough for 3 quota 159 | # Still Bucket(max_quota=100, quota_consumed=99) 160 | 161 | # The best part is that the code using Bucket.quota doesn't have to change or 162 | # know that the class has changed. New usage of Bucket can do the right thing 163 | # and access max_quota and quota_consumed directly. 164 | 165 | # I especially like @property because it lets you make incremental progress 166 | # toward a better data model over time. Reading the Bucket example above, you 167 | # may have though to yourself, "fill and deduct should have been implemented 168 | # as instance methods in the first place." Although you're probably right (see 169 | # Item 22: "Prefer helper classes over bookkeeping with dictionaries and 170 | # tuples"), in practice there are many situations in which objects start with 171 | # poorly defined interfaces or act as dumb data containers. This happens when 172 | # code grows over time, scope increases, multiple authors contribute without 173 | # any one considering long-term hygiene, etc. 174 | 175 | # @property is a tool to help you address problems you'll come across in real- 176 | # world code. Don't overuse it. When you find yourself repeatedly extending 177 | # @property methods, it's probably time to refactor your class instead of 178 | # further paving over your code's poor design. 179 | 180 | 181 | # Things to remember 182 | 183 | # 1. Use @property to give existing instance attributes new functionality. 184 | # 2. Make incremental progress toward better data models by using @property. 185 | # 3. Consider refactoring a class and all call sites when you find yourself 186 | # using @property too heavily. 187 | -------------------------------------------------------------------------------- /item_33_validate_subclass.py: -------------------------------------------------------------------------------- 1 | # Item 33: Validate subclass with metaclass 2 | 3 | 4 | # One of simplest applications of metaclass is verifying that a class was 5 | # defined correctly. When you're building a complex class hierarchy, you may 6 | # want to enforce style, require overriding methods, or have strict 7 | # relationships between class attributes. Metaclass enable these use cases by 8 | # providing a reliable way to run your validation code each time a new 9 | # subclass is defined. 10 | 11 | # Often a class's validation code runs in the __init__ method, when an object 12 | # of the class's type is constructed (see Item 28: "inherit from 13 | # collections.abc for custom container types" for an example). Using metaclass 14 | # for validation can raise errors much earlier. 15 | 16 | # Before I get into how to define a metaclass for validating subclasses, it's 17 | # important to understand the metaclass action for standard objects. A 18 | # metaclass is defined by inheriting from type. In the default case, a 19 | # metaclass receives the contents of associated class statements in its 20 | # __new__ method. Here, you can modify the class information before the type 21 | # is actually constructed: 22 | 23 | 24 | class Meta(type): 25 | def __new__(meta, name, bases, class_dict): 26 | print((meta, name, bases, class_dict)) 27 | return type.__new__(meta, name, bases, class_dict) 28 | 29 | 30 | class MyClass(object, metaclass=Meta): 31 | stuff = 123 32 | 33 | def foo(self): 34 | pass 35 | 36 | 37 | # (, 38 | # 'MyClass', 39 | # (,), 40 | # {'stuff': 123, 41 | # 'foo': , 42 | # '__qualname__': 'MyClass', 43 | # '__module__': '__main__'}) 44 | 45 | 46 | # The metaclass has access to the name of the class, the parent classes it 47 | # inherits from, and all of the class attributes that were defined in the 48 | # class's body. 49 | 50 | 51 | # Python 2 has slightly different syntax and specifies a metaclass using the 52 | # __metaclass__ class attribute. The Meta.__new__ interface is the same. 53 | 54 | 55 | # Python 2 56 | # class Meta(type): 57 | # def __new__(meta, name, bases, class_dict): 58 | # print((meta, name, bases, class_dict)) 59 | # return type.__new__(meta, name, bases, class_dict) 60 | # 61 | # 62 | # class MyClassInPython2(object): 63 | # __metaclass__ = Meta 64 | # stuff = 123 65 | # 66 | # def foo(self): 67 | # pass 68 | 69 | 70 | # (, 71 | # 'MyClassInPython2', 72 | # (,), 73 | # {'__module__': '__main__', 74 | # 'stuff': 123, 75 | # '__metaclass__': , 76 | # 'foo': }) 77 | 78 | 79 | # You can add functionality to the Meta.__new__ method in order to validate 80 | # all the parameters of a class before it's defined. For example, say you want 81 | # to represent any type of multi-sided polygon. You can do this by defining a 82 | # special validating metaclass and using it in the base class of your polygon 83 | # class hierarchy. Note that it's important not to apply the same validation 84 | # to the base class. 85 | 86 | 87 | class ValidatePolygon(type): 88 | def __new__(meta, name, bases, class_dict): 89 | '''Don't validate the abstract Polygon class''' 90 | if bases != (object,): 91 | if class_dict['sides'] < 3: 92 | raise ValueError('Polygons need 3+ sides') 93 | return type.__new__(meta, name, bases, class_dict) 94 | 95 | 96 | class Polygon(object, metaclass=ValidatePolygon): 97 | sides = None # Specified by subclass 98 | 99 | @classmethod 100 | def interior_angles(cls): 101 | return (cls.sides - 2) * 180 102 | 103 | 104 | class Triangle(Polygon): 105 | sides = 3 106 | 107 | 108 | # If you try to define a polygon with fewer that three sides, the validation 109 | # will cause the class statement to fail immediately after the class statement 110 | # body. This means your program will not even be able to start running when 111 | # you define such a class. 112 | 113 | 114 | print('Before class') 115 | 116 | 117 | class Line(Polygon): 118 | print('Before side') 119 | sides = 1 120 | print('After side') 121 | print('After class') 122 | # Before class 123 | # Before side 124 | # ValueError: Polygons need 3+ sides 125 | 126 | 127 | # Things to remember 128 | 129 | # 1. Use metaclasses to ensure that subclass are well formed at the time they 130 | # are defined, before objects of their type are constructed. 131 | # 2. Metaclass have slightly different syntax in Python 2 vs. Python 3. 132 | # 3. The __new__ method of metaclasses is run after the class statement's 133 | # entire body has been processed. 134 | 135 | 136 | -------------------------------------------------------------------------------- /item_34_register_class_existence.py: -------------------------------------------------------------------------------- 1 | # Item 34: Register class existence with metaclass 2 | import json 3 | 4 | 5 | # Another common use of metaclass is to automatically register types in your 6 | # program. Registration is useful for doing reverse lookups, where you need to 7 | # map a simple identifier back to a corresponding class. 8 | 9 | # For example, say you want to implement your own serialized representation of 10 | # a Python object using JSON. You need a way to take an object and turn it 11 | # into a JSON string. Here, I do this generically by defining a base class 12 | # that records the constructor parameters and turns them into a JSON 13 | # dictionary. 14 | 15 | 16 | class Serializable(object): 17 | def __init__(self, *args): 18 | self.args = args 19 | 20 | def serialize(self): 21 | return json.dumps({'args': self.args}) 22 | 23 | 24 | # This class makes it easy to serialize simple, immutable data structures like 25 | # Point2D to a string. 26 | 27 | 28 | class Point2D(Serializable): 29 | def __init__(self, x, y): 30 | super().__init__(x, y) 31 | self.x = x 32 | self.y = y 33 | 34 | def __repr__(self): 35 | return 'Point2D(%d, %d)' % (self.x, self.y) 36 | 37 | 38 | point = Point2D(5, 3) 39 | print('Object: ', point) 40 | print('Serialized:', point.serialize()) 41 | # Object: Point2D(5, 3) 42 | # Serialized: {"args": [5, 3]} 43 | 44 | 45 | # Now, I need to deserialized this JSON string and construct the Point2D 46 | # object it represents. Here, I define another class that can deserialize 47 | # the data from its Serializable parent class: 48 | 49 | 50 | class Deserializable(Serializable): 51 | @classmethod 52 | def deserialize(cls, json_data): 53 | params = json.loads(json_data) 54 | return cls(*params['args']) 55 | 56 | 57 | # Using Deserizlizable makes it easy to serialize and deserialize simple, 58 | # immutable objects in a generic way. 59 | 60 | 61 | class BetterPoint2D(Deserializable): 62 | def __init__(self, x, y): 63 | super().__init__(x, y) 64 | self.x = x 65 | self.y = y 66 | 67 | def __repr__(self): 68 | return 'Point2D(%d, %d)' % (self.x, self.y) 69 | 70 | 71 | point = BetterPoint2D(5, 3) 72 | print('Before: ', point) 73 | data = point.serialize() 74 | print('Serialized: ', data) 75 | after = BetterPoint2D.deserialize(data) 76 | print('After: ', after) 77 | # Before: Point2D(5, 3) 78 | # Serialized: {"args": [5, 3]} 79 | # After: Point2D(5, 3) 80 | 81 | 82 | # The problem with this approach is that it only works if you know the 83 | # intended type of the serialized data ahead of time (e.g., Point2D, 84 | # BetterPoint2D). Ideally, you'd have a large number classes serializing to 85 | # JSON and one common function that could deserialize any of them back to a 86 | # corresponding Python object. 87 | 88 | # To do this, I include the serialized object's class name in the JSON data. 89 | 90 | 91 | class BetterSerializable(object): 92 | def __init__(self, *args): 93 | self.args = args 94 | 95 | def serialize(self): 96 | return json.dumps({ 97 | 'class': self.__class__.__name__, 98 | 'args': self.args 99 | }) 100 | 101 | def __repr__(self): 102 | return 'Point2D(%d, %d)' % (self.x, self.y) 103 | 104 | 105 | # Then, I can maintain a mapping of class names back to constructors for those 106 | # objects. The general deserialize function will work for any class passed to 107 | # register_class. 108 | 109 | 110 | registry = {} 111 | 112 | 113 | def register_class(target_class): 114 | registry[target_class.__name__] = target_class 115 | 116 | 117 | def deserialize(data): 118 | params = json.loads(data) 119 | name = params['class'] 120 | target_class = registry[name] 121 | return target_class(*params['args']) 122 | 123 | 124 | # To ensure that deserialize always works properly, I must call register_class 125 | # for every class I want to deserialize in the future. 126 | 127 | 128 | class EvenBetterPoint2D(BetterSerializable): 129 | def __init__(self, x, y): 130 | super().__init__(x, y) 131 | self.x = x 132 | self.y = y 133 | 134 | def __repr__(self): 135 | return 'EvenBetterPoint2D(%d, %d)' % (self.x, self.y) 136 | 137 | 138 | register_class(EvenBetterPoint2D) 139 | 140 | 141 | # Now, I can deserialize an arbitrary JSON string without having to know which 142 | # class it contains. 143 | 144 | 145 | point = EvenBetterPoint2D(5, 3) 146 | print('Before: ', point) 147 | data = point.serialize() 148 | print('Serialized: ', data) 149 | after = deserialize(data) 150 | print('After: ', after) 151 | # Before: EvenBetterPoint2D(5, 3) 152 | # Serialized: {"class": "EvenBetterPoint2D", "args": [5, 3]} 153 | # After: EvenBetterPoint2D(5, 3) 154 | 155 | 156 | # The problem with this approach is that you can forget to call 157 | # register_class. 158 | 159 | 160 | class Point3D(BetterSerializable): 161 | def __init__(self, x, y, z): 162 | super().__init__(x, y, z) 163 | self.x = x 164 | self.y = y 165 | self.z = z 166 | 167 | def __repr__(self): 168 | return 'Point3D(%d, %d, %d)' % (self.x, self.y, self.z) 169 | 170 | # Forgot to call register_class! Whoops! 171 | 172 | 173 | # This will cause your code to break at runtime, when you finally try to 174 | # deserialize an object of a class you forgot to register. 175 | 176 | 177 | point = Point3D(5, 9, -4) 178 | data = point.serialize() 179 | # deserialize(data) 180 | # KeyError: 'Point3D' 181 | 182 | 183 | # Even though you chose to subclass BetterSerializable, you won't actually get 184 | # all of its features if you forget to call register_class after your class 185 | # statement body. This approach is error prone and especially challenging for 186 | # beginners. The same omission can happen with class decorators in Python 3. 187 | 188 | # What if you could somehow act on the programmer's intent to use 189 | # BetterSerialized and ensure that register_class is called in all cases? 190 | # Metaclasses enable this by intercepting the class statement when subclasses 191 | # are defined (see Item 33: "Validate subclasses with Metaclass"). This lets 192 | # you register the new type immediately after the class's body. 193 | 194 | 195 | class Meta(type): 196 | def __new__(meta, name, bases, class_dict): 197 | cls = type.__new__(meta, name, bases, class_dict) 198 | register_class(cls) 199 | return cls 200 | 201 | 202 | class RegisteredSerializable(BetterSerializable, metaclass=Meta): 203 | pass 204 | 205 | 206 | # When I define a subclass of RegisteredSerializable, I can be confident that 207 | # the call to register_class happened and deserialize will always work as 208 | # expected. 209 | 210 | 211 | class Vector3D(RegisteredSerializable): 212 | def __init__(self, x, y, z): 213 | super().__init__(x, y, z) 214 | self.x, self.y, self.z = x, y, z 215 | 216 | def __repr__(self): 217 | return 'Vector3D(%d, %d, %d)' % (self.x, self.y, self.z) 218 | 219 | 220 | v3 = Vector3D(10, -7, 3) 221 | print('Before: ', v3) 222 | data = v3.serialize() 223 | print('Serialized: ', data) 224 | after = deserialize(data) 225 | print('After: ', after) 226 | # Before: Vector3D(10, -7, 3) 227 | # Serialized: {"class": "Vector3D", "args": [10, -7, 3]} 228 | # After: Vector3D(10, -7, 3) 229 | 230 | # Using metaclass for class registration ensures that you'll never miss a 231 | # class as long as the inheritance tree is right. This works well for 232 | # serialization, as I've shown, and also applies to database 233 | # object-relationship mappings (ORMs), plug-in systems, and system hooks. 234 | 235 | 236 | # Things to remember 237 | 238 | # 1. Class registration is a helpful pattern for building modular Python 239 | # programs. 240 | # 2. Metaclass let you run registration code automatically each time your 241 | # base class is subclassed in a program. 242 | # 3. Using metaclass for class registration avoids errors by ensuring that 243 | # you never miss a registration call. 244 | -------------------------------------------------------------------------------- /item_35_annotate_class_attributes.py: -------------------------------------------------------------------------------- 1 | # Item 35: Annotate class attributes with metaclass 2 | 3 | 4 | # One more useful feature enable by metaclasses is the ability to modify or 5 | # annotate properties after a class is defined but before the class is 6 | # actually used. This approach is commonly used with descriptors (see Item 31: 7 | # "Use descriptors for reuseable @property methods") to give them more 8 | # introspection into how they're being used within their containing class. 9 | 10 | # For example, say you want to define a new class that represents a row in 11 | # your customer database. You'd like a corresponding property on the class 12 | # for each column in the database table. To do this, here I define a 13 | # descriptor class to connect attributes to column names. 14 | 15 | 16 | class Field(object): 17 | def __init__(self, name): 18 | self.name = name 19 | self.internal_name = '_' + self.name 20 | 21 | def __get__(self, instance, instance_type): 22 | if instance is None: 23 | return self 24 | return getattr(instance, self.internal_name) 25 | 26 | def __set__(self, instance, value): 27 | setattr(instance, self.internal_name, value) 28 | 29 | 30 | # With the column name stored in the Field descriptor, I can save all of the 31 | # per-instance state directly in the instance dictionary as protected fields 32 | # using the setattr and getattr built-in functions. At first, this seems to be 33 | # much more convenient than building descriptors with weakref to avoid memory 34 | # leaks. 35 | 36 | # Defining the class representing a row requires supplying the column name for 37 | # each class attribute. 38 | 39 | 40 | class Customer(object): 41 | first_name = Field('first_name') 42 | last_name = Field('last_name') 43 | prefix = Field('prefix') 44 | suffix = Field('suffix') 45 | 46 | 47 | # Using the class is simple. Here, you can see how the Field descriptors 48 | # modify the instance dictionary __dict__ as expected: 49 | 50 | 51 | foo = Customer() 52 | # print('Before: ', repr(foo.first_name), foo.__dict__) 53 | print('Before: ', foo.__dict__) 54 | foo.first_name = 'Euclid' 55 | print('After: ', repr(foo.first_name), foo.__dict__) 56 | # Before: {} 57 | # After: 'Euclid' {'_first_name': 'Euclid'} 58 | 59 | 60 | # But it seems redundant. I already declared the name of the field when I 61 | # assigned the constructed Field object to Customer.first_name in the class 62 | # statement body. Why do I also have to pass the field name ('first_name' in 63 | # this case) to the Field constructor? 64 | 65 | # The problem is that the order of operations in the Customer class definition 66 | # is the opposite of how it reads from left to right. First, the Field 67 | # constructor is called as Field('first_name'). Then, the return value of that 68 | # is assigned to Customer.field_name. There's no way for the Field to know 69 | # upfront which class attribute it will be assigned to. 70 | 71 | # To eliminate the redundancy, I can use a metaclass. Metaclasses let you hook 72 | # the class statement directly and take action as soon as a class body is 73 | # finished. In this case, I can use the metalcass to assign Field.name and 74 | # Field.internal_name on the descriptor automatically instead of manually 75 | # specifying the field name multiple times. 76 | 77 | 78 | class Meta(type): 79 | def __new__(meta, name, bases, class_dict): 80 | for key, value in class_dict.items(): 81 | if isinstance(value, Field): 82 | value.name = key 83 | value.internal_name = '_' + key 84 | cls = type.__new__(meta, name, bases, class_dict) 85 | return cls 86 | 87 | 88 | # Here, I define a base class that uses the metaclass. All classes 89 | # representing database rows should inherit from this class to ensure that 90 | # they use the metaclass: 91 | 92 | 93 | class DatabaseRow(object, metaclass=Meta): 94 | pass 95 | 96 | 97 | # To work with the metaclass, the field descriptor is largely unchanged. The 98 | # only difference is that it no longer requires any arguments to be passed to 99 | # its constructor. Instead, its attributes are set by the Meta.__new__ method 100 | # above. 101 | # 102 | class Field(object): 103 | def __init__(self): 104 | self.name = None 105 | self.internal_name = None 106 | 107 | def __get__(self, instance, instance_type): 108 | if instance is None: 109 | return self 110 | return getattr(instance, self.internal_name) 111 | 112 | def __set__(self, instance, value): 113 | setattr(instance, self.internal_name, value) 114 | 115 | 116 | # By using the metaclass, the new DatabaseRow base class, and the new Field 117 | # descriptor, the class definition for a database row no longer has the 118 | # redundancy from before. 119 | 120 | 121 | class BetterCustomer(DatabaseRow): 122 | first_name = Field() 123 | last_name = Field() 124 | prefix = Field() 125 | suffix = Field() 126 | 127 | 128 | # The behavior of the new class is identical to the old one. 129 | 130 | 131 | foo = BetterCustomer() 132 | # print('Before: ', repr(foo.first_name), foo.__dict__) 133 | print('Before: ', foo.__dict__) 134 | foo.first_name = 'Euler' 135 | print('After: ', repr(foo.first_name), foo.__dict__) 136 | # Before: {} 137 | # After: 'Euler' {'_first_name': 'Euler'} 138 | 139 | 140 | # Things to remember 141 | 142 | # 1. Metaclass enable you to modify a class's attributes before the class is 143 | # fully defined. 144 | # 2. Descriptors and metaclasses make a powerful combination for declarative 145 | # behavior and runtime introspection. 146 | # 3. You can avoid both memory leaks and the weakref module by using 147 | # metaclasses along with descriptors. 148 | -------------------------------------------------------------------------------- /item_37_use_threads.py: -------------------------------------------------------------------------------- 1 | # Item 37: Use threads for blocking I/O, avoid for parallelism 2 | import time 3 | from threading import Thread 4 | import select 5 | 6 | 7 | # The standard implementation of Python is call CPython. CPython runs a Python 8 | # program in two steps. First, it parses and compiles the source text into 9 | # bytecode. Then, it runs the bytecode using a stack-based interpreter. The 10 | # bytecode interpreter has state that must be maintained and coherent while 11 | # the Python program executes. Python enforces coherence with a mechanism 12 | # called the global interpreter lock (GIL). 13 | 14 | # Essentially, the GIL is a mutual-exclusion lock (mutex) that prevents 15 | # CPython from being affected by preemptive multi-threading, where one thread 16 | # takes control of a program by interrupting state if it comes at an 17 | # unexpected time. The GIL prevents these interruptions and ensures that every 18 | # bytecode instruction works correctly with the CPython implementation and its 19 | # C-extension modules. 20 | 21 | # The GIL has an important negative side effect. With programs written in 22 | # languages like C++ or Java, having multiple threads of execution means your 23 | # program could utilize multiple CPU cores at the same time. Although Python 24 | # supports multiple threads of execution, the GIL causes only one of them to 25 | # make forward progress at a time. This means that when you reach for threads 26 | # to do parallel computation and speed up your Python programs, you will be 27 | # sorely disappointed. 28 | 29 | # For example, say you want to do something computationally intensive with 30 | # Python. I'll use a naive number factorization algorithm as a proxy. 31 | 32 | 33 | def factorize(number): 34 | for i in range(1, number + 1): 35 | if number % i == 0: 36 | yield i 37 | 38 | # Factoring a set of numbers in serial takes quite a long time. 39 | 40 | numbers = [2139079, 1214759, 1516637, 1852285] 41 | start = time.time() 42 | for number in numbers: 43 | list(factorize(number)) 44 | end = time.time() 45 | print('Took %.3f seconds' % (end - start)) 46 | # Took 0.624 seconds 47 | 48 | 49 | # Using multiple threads to do this computation would make sense in other 50 | # languages because you could take advantage of all the CPU cores of your 51 | # computer. Let me try that in Python. Here, I define a Python thread for 52 | # doing the same computation as before: 53 | 54 | 55 | class FactorizeThread(Thread): 56 | def __init__(self, number): 57 | super().__init__() 58 | self.number = number 59 | self.factors = list([]) 60 | 61 | def run(self): 62 | self.factors = list(factorize(self.number)) 63 | 64 | 65 | # Then, I start a thread for factorizing each number in parallel. 66 | 67 | 68 | start = time.time() 69 | threads = [] 70 | for number in numbers: 71 | thread = FactorizeThread(number) 72 | thread.start() 73 | threads.append(thread) 74 | 75 | # Finally, I wait for all of the threads to finish. 76 | 77 | for thread in threads: 78 | thread.join() 79 | end = time.time() 80 | print('Tool %.3f seconds' % (end - start)) 81 | # Tool 0.662 seconds 82 | 83 | # What's surprising is that this takes even longer than running factorize in 84 | # serial. With on thread per number, you may expect less than a n times 85 | # speedup on the dual-core machine I used to run this code. But you would 86 | # never expect the performance of these threads to be worse when you have 87 | # multiple CPUs to utilize. This demonstrates the effect of the GIL on 88 | # programs running in the standard CPython interpreter. 89 | 90 | # There are ways to get CPython to utilize multiple cores, but it doesn't 91 | # work with the standard Thread class (see Item 41: 92 | # "Consider concurrent.futures for true parallelism") and it can require 93 | # substantial effort. Knowing these limitations you may wonder, why does 94 | # Python support threads at all? There are two good reasons. 95 | 96 | # First, multiple threads make it easy for your program to seem like it's 97 | # doing multiple things at the same time. Managing the juggling act of 98 | # simultaneous tasks is difficult to implement yourself (see Item 40: 99 | # "Consider co-routines to run many functions concurrently" for an example). 100 | # With threads, you can leave it to Python to run your functions seemingly in 101 | # parallel. This works because CPython ensures a level of fairness between 102 | # Python threads of execution, even though only one of them makes forward 103 | # progress at a time due to the GIL. 104 | 105 | # The second reason Python supports threads is to deal with blocking I/O, 106 | # which happens when Python does certain types of system calls. System calls 107 | # are how your Python program asks your computer's operating system to 108 | # interact with the external environment on your behalf. Blocking I/O includes 109 | # things like reading and writing files, interacting with networks, 110 | # communicating with devices like displays, etc. Threads help you handle 111 | # blocking I/O by insulating your program from the time it takes for the 112 | # operating system to respond to your requests. 113 | 114 | # For example, say you want to send a singal to a remote-controlled helicopter 115 | # through a serial port. I'll use a slow system call (select) as a proxy for 116 | # this activity. This function asks the operating system to block for 0.1 117 | # second and then return control to my program, similar to what would happen 118 | # when using a synchronous serial port. 119 | 120 | 121 | def slow_systemcall(): 122 | select.select([], [], [], 0.1) 123 | 124 | 125 | # Running this system call in serial requires a linearly increasing amount of 126 | # time. 127 | 128 | start = time.time() 129 | for _ in range(5): 130 | slow_systemcall() 131 | end = time.time() 132 | print('Took %.3f seconds' % (end - start)) 133 | # Took 0.501 seconds 134 | 135 | # The problem is that while the slow_systemcall function is running, my 136 | # program can't make any other progress. My program's main thread of execution 137 | # is blocked on the select system call. This situation is awful in practice. 138 | # You need to be able to compute your helicopter's next move while you're 139 | # sending it a signal, otherwise it's crash. When you find yourself needing to 140 | # do blocking I/O and computation simultaneously, it's time to consider moving 141 | # your system calls to threads. 142 | 143 | # Here, I run multiple invocation of the slow_systemcall function in separate 144 | # threads. This would allow you to communicate with multiple serial ports (and 145 | # helicopters) at the same time, while leaving the main thread to do whatever 146 | # computation is required. 147 | 148 | 149 | start = time.time() 150 | threads = [] 151 | for _ in range(5): 152 | thread = Thread(target=slow_systemcall) 153 | thread.start() 154 | threads.append(thread) 155 | 156 | 157 | # With the threads started, here I do some work to calculate the next 158 | # helicopter move before waiting for the system call threads to finish. 159 | 160 | 161 | def compute_helicopter_location(index): 162 | return index**2 163 | 164 | for i in range(5): 165 | compute_helicopter_location(i) 166 | 167 | for thread in threads: 168 | thread.join() 169 | end = time.time() 170 | print('Took %.3f seconds' % (end - start)) 171 | # Took 0.101 seconds 172 | 173 | # The parallel time is 5 times less than the serial time. This shows that the 174 | # system call will all run in parallel from multiple Python threads even 175 | # though they're limited by the GIL. The GIL prevents my Python code from 176 | # running in parallel, but it has no negative effect on system calls. This 177 | # works because Python threads release the GIL just before they make system 178 | # calls and reacquire the GIL as soon as the system calls are done. 179 | 180 | # There are many other ways to deal with blocking I/O besides threads, such as 181 | # the asyncio built-in module, and these alternatives have important benefits. 182 | # But these options also require extra work in refactoring your code to fit a 183 | # different model of execution (see Item 40: "Consider coroutines to run many 184 | # functions concurrently"). Using threads is the simplest way to do blocking 185 | # I/O in parallel with minimal changes to your program. 186 | 187 | 188 | # Things to remember 189 | 190 | # 1. Python threads can't bytecode in parallel on multiple CPU cores because 191 | # of the global interpreter lock (GIL). 192 | # 2. Python threads are still useful despite the GIL because they provide an 193 | # easy way to do multiple things at seemingly the same time. 194 | # 3. Use Python threads to make multiple system calls in parallel. This allows 195 | # you to do blocking I/O at the same time as computation. 196 | -------------------------------------------------------------------------------- /item_38_use_lock.py: -------------------------------------------------------------------------------- 1 | # Item 38: Use lock to prevent data races in threads 2 | from threading import Thread 3 | from threading import Lock 4 | 5 | 6 | # After learning about the global interpreter lock (GIL) (see Item 37: "Use 7 | # threads for blocking I/O, Avoid for parallelism"), many new Python 8 | # programmers assume they can forgo using mutual-exclusion locks () in their 9 | # code altogether. If the GIL is already preventing Python threads form 10 | # running on multiple CPU cores in parallel, it must also act as a lock for a 11 | # program's data structure, right? Some testing on types like lists and 12 | # dictionaries may even show that this assumption appears to hold. 13 | 14 | # But beware, this is truly not the case. The GIL will not protect you. 15 | # Although only one Python thread runs at a time, a thread's operations on 16 | # data structures can be interrupted between any two bytecode instructions in 17 | # the Python interpreter. This is dangerous if you access the same objects 18 | # from multiple threads simultaneously. The invariants of your data structures 19 | # could be violated at practically any time because of these interruptions, 20 | # leaving your program in a corrupted state. 21 | 22 | # For example, say you want to write a program that counts many things in 23 | # parallel, like sampling light levels from a whole network of sensors. If you 24 | # want to determine the total number of light samples over time, you can 25 | # aggregate them with a new class. 26 | 27 | 28 | class Counter(object): 29 | def __init__(self): 30 | self.count = 0 31 | 32 | def increment(self, offset): 33 | self.count += offset 34 | 35 | 36 | # Imagine that each sensor has its own worker thread because reading from the 37 | # sensor requires blocking I/O. After each sensor measurement, the worker 38 | # thread increments the counter up to a maximum number of desired readings. 39 | 40 | 41 | def worker(sensor_index, how_many, counter): 42 | for _ in range(how_many): 43 | # Read from the sensor 44 | counter.increment(1) 45 | 46 | 47 | # Here, I define a function that starts a worker thread for each sensor and 48 | # waits for them all to finish their readings: 49 | 50 | 51 | def run_threads(func, how_many, counter): 52 | threads = [] 53 | for i in range(5): 54 | args = (i, how_many, counter) 55 | thread = Thread(target=func, args=args) 56 | threads.append(thread) 57 | thread.start() 58 | 59 | for thread in threads: 60 | thread.join() 61 | 62 | 63 | # Running five threads in parallel seems simple, and the outcome should be 64 | # abvious. 65 | 66 | 67 | how_many = 10**5 68 | counter = Counter() 69 | run_threads(worker, how_many, counter) 70 | print('Counter should be %d, found %d' % (5 * how_many, counter.count)) 71 | # Counter should be 500000, found 468713 72 | 73 | 74 | # But this result is way off! What happened here? How could something so 75 | # simple go so wrong, especially since only one Python interpreter thread can 76 | # run at a time? 77 | 78 | # The Python interpreter enforces fairness between all of the threads that 79 | # are executing to ensure they get a roughly equal amount of processing time. 80 | # To do this, Python will suspend a thread as it's running and will resume 81 | # another thread in turn. The problem is that you don't know exactly when 82 | # Python will suspend your threads. A thread can even be paused seemingly 83 | # halfway through what looks like an atomic operation. That's what happened 84 | # in this case. 85 | 86 | # The Counter object's increment method looks simple. 87 | # counter.count += offset 88 | # But the += operator used on an object attribute actually instructs Python to 89 | # do three separate operations behind the scenes. The statement above is 90 | # equivalent to this: 91 | # value = getattr(counter, 'count') 92 | # result = value + offset 93 | # setattr(counter, 'count', result) 94 | 95 | # Python threads incrementing the counter can be suspended between any two of 96 | # these of these operations. This is problematic if the way the operations 97 | # interleave causes old versions of value to be assigned to the counter. Here 98 | # is an example of bad interaction between two threads, A and B: 99 | 100 | 101 | # Running in thread A 102 | value_a = getattr(counter, 'count') 103 | # context switch to thread B 104 | value_b = getattr(counter, 'count') 105 | result_b = value_b + 1 106 | setattr(counter, 'count', result_b) 107 | # context switch back to Thread A 108 | result_a = value_a + 1 109 | setattr(counter, 'count', result_a) 110 | 111 | 112 | # Thread A stomped on thread B, erasing all of its progress incrementing the 113 | # counter. This is exactly what happened in the right sensor example above. 114 | 115 | # To prevent data races like these and other forms of data structure 116 | # corruption, Python includes a robust set of tools in the threading built-in 117 | # module. The simplest and most useful of them is the Lock class, a 118 | # mutual-exclusion lock (mutex). 119 | 120 | # By using a lock, I can have the Counter class protect its current value 121 | # against simultaneous access from multiple threads. Only one thread will be 122 | # able to acquire the lock at a time. Here, I use a with statement to acquire 123 | # and release the lock; this makes it easier to see which code is executing 124 | # while the lock is held (see Item 43: "Consider contextlib and with 125 | # statements for reusable try/finally behavior" for details): 126 | 127 | 128 | class LockingCounter(object): 129 | def __init__(self): 130 | self.lock = Lock() 131 | self.count = 0 132 | 133 | def increment(self, offset): 134 | with self.lock: 135 | self.count += offset 136 | 137 | 138 | # Now I run the worker threads as before, but use a LockingCounter instead. 139 | 140 | counter = LockingCounter() 141 | run_threads(worker, how_many, counter) 142 | print('Counter should be %d, found %d' % (5 * how_many, counter.count)) 143 | # Counter should be 500000, found 500000 144 | 145 | # The result is exactly what I expect. The Lock solved the problem. 146 | 147 | 148 | # Things to remember 149 | 150 | # 1. Even though Python has a global interpreter lock, you're still 151 | # responsible for protecting against objects without locks. 152 | # 2. Your programs will corrupt their data structures if you allow multiple 153 | # threads to modify the same objects without locks. 154 | # 3. The lock class in the threading built-in module is Python's standard 155 | # mutual exclusion lock implementation. 156 | -------------------------------------------------------------------------------- /item_42_define_function_decorators.py: -------------------------------------------------------------------------------- 1 | # Chapter 6 2 | 3 | 4 | # Python takes a "batteries included" approach to the standard library. Many 5 | # other languages ship with a small number of common packages and require you 6 | # to look elsewhere for important functionality. Although Python also has an 7 | # impressive repository of community-built modules, it strives to provide, in 8 | # its default installation, the most important modules for common uses of the 9 | # language. 10 | 11 | # The full set of standard modules is too large to cover in this book. But 12 | # some of these built-in packages are so closely intertwined with idiomatic 13 | # Python that they may as well as be part of the language specification. These 14 | # essential build-in modules are especially important when writing the 15 | # intricate, error-prone parts of programs. 16 | 17 | 18 | # Item 42: Define function decorators with functools.wraps 19 | from functools import wraps 20 | 21 | 22 | # Python has special syntax for decorators that can be applied to functions. 23 | # Decorators have the ability to run additional code before and after any 24 | # calls to the functions they wrap. This allows them to access and modify 25 | # input arguments and return values. This functionality can be useful for 26 | # enforcing semantics, debugging, registering functions, and more. 27 | 28 | # For example, say you want to print the arguments and return value of a 29 | # function call. This is especially helpful when debugging a stack of function 30 | # calls from a recursive function. Here, I define such a decorator: 31 | 32 | 33 | def trace(func): 34 | def wrapper(*args, **kwargs): 35 | result = func(*args, **kwargs) 36 | print('%s(%r, %r) -> %r' % 37 | (func.__name__, args, kwargs, result)) 38 | return result 39 | return wrapper 40 | 41 | # I can apply this to a function using the @ symbol. 42 | 43 | 44 | @ trace 45 | def fibonacci(n): 46 | """Return the n-th Fibonacci number""" 47 | if n in (0, 1): 48 | return n 49 | return fibonacci(n - 2) + fibonacci(n - 1) 50 | 51 | 52 | # The @ symbol is equivalent to calling the decorator on the function it wraps 53 | # and assigning the return value to the original name in the same scope. 54 | 55 | # fibonacci = trace(fibonacci) 56 | 57 | # Calling this decorated function will run the wrapper code before and after 58 | # fibonacci runs, printing the arguments and return value at each level in the 59 | # recursive stack. 60 | 61 | fibonacci(3) 62 | # fibonacci((1,), {}) -> 1 63 | # fibonacci((0,), {}) -> 0 64 | # fibonacci((1,), {}) -> 1 65 | # fibonacci((2,), {}) -> 1 66 | # fibonacci((3,), {}) -> 2 67 | 68 | # This works well, but it has an unintended side effect. The value returned by 69 | # the decorator--the function that's called above--doesn't think it's named 70 | # fibonacci. 71 | 72 | print(fibonacci) 73 | # .wrapper at 0x7fac26042a60> 74 | 75 | # This cause of this isn't hard to see. The trace function returns the wrapper 76 | # it defines. The wrapper function is what's assigned to the fibonacci name in 77 | # the containing module because it undermines tools that do introspection, 78 | # such as debuggers (see Item 57: "Consider interactive debugging with pdb") 79 | # and object serializers (see Item 44: "Make pickle reliable with copyreg"). 80 | 81 | # For example, the help built-in function is useless on the decorated 82 | # fibonacci function. 83 | 84 | help(fibonacci) 85 | # Help on function wrapper in module __main__: 86 | # 87 | # wrapper(*args, **kwargs) 88 | 89 | # The solution is to use the wraps helper function from the functools built-in 90 | # module. This is a decorator that helps you write decorators. Applying it to 91 | # the wrapper function will copy all of the important meta-data about the 92 | # inner function to the outer function. 93 | 94 | 95 | def trace(func): 96 | @wraps(func) 97 | def wrapper(*args, **kwargs): 98 | result = func(*args, **kwargs) 99 | print('%s(%r, %r) -> %r' % 100 | (func.__name__, args, kwargs, result)) 101 | return result 102 | return wrapper 103 | 104 | @ trace 105 | def fibonacci(n): 106 | """Return the n-th Fibonacci number""" 107 | if n in (0, 1): 108 | return n 109 | return fibonacci(n - 2) + fibonacci(n - 1) 110 | 111 | # Now, running the help function produces the expected result, even though the 112 | # function is decorated. 113 | 114 | help(fibonacci) 115 | # Help on function fibonacci in module __main__: 116 | # 117 | # fibonacci(n) 118 | # Return the n-th Fibonacci number 119 | 120 | # Calling help is just one example of how decorators can subtly cause 121 | # problems. Python functions have many other standard attributes 122 | # (e.g. __name__, __module__) that must be preserved to maintain the 123 | # interface of functions in the language. Using wraps ensures that you'll 124 | # always get the correct behavior. 125 | 126 | 127 | # Things to remember 128 | 129 | # 1. Decorators are Python syntax for allowing one function to modify another 130 | # function at runtime. 131 | # 2. Using decorators can cause strange behaviors in tools that do 132 | # introspection, such as debuggers. 133 | # 3. Use the wraps decorator from the functools built-in module when you 134 | # define your own decorators to avoid any issues. 135 | -------------------------------------------------------------------------------- /item_43_consider_contexlib.py: -------------------------------------------------------------------------------- 1 | # Item 43: Consider contextlib and with statements for reusable try/finally 2 | # behavior 3 | from threading import Lock 4 | import logging 5 | from contextlib import contextmanager 6 | 7 | 8 | # The with statement in Python is used to indicate when code is running in a 9 | # special context. For example, mutual exclusion locks (see Item 38: "Use lock 10 | # to prevent data races in threads") can be used in with statements to 11 | # indicate that the indented code only runs while the lock is held. 12 | 13 | lock = Lock() 14 | with lock: 15 | print('Lock is held') 16 | 17 | # The example above is equivalent to this try/finally construction because the 18 | # Lock class properly enables the with statement. 19 | 20 | lock.acquire() 21 | try: 22 | print('Lock is held') 23 | finally: 24 | lock.release() 25 | 26 | # The with statement version of this is better because it eliminates the need 27 | # to write the repetitive code of the try/finally construction. It's easy to 28 | # make your objects and functions capable of use in with statements by using 29 | # the contextlib built-in module. This module contains the contextmanager 30 | # decorator, which lets a simple function be used in with statements. This is 31 | # much easier than defining a new class with the special methods __enter__ and 32 | # __exit__ (the standard way). 33 | 34 | # For example, say you want a region of your code to have more debug logging 35 | # sometimes. Here, I define a function that does logging at two severity 36 | # levels: 37 | 38 | 39 | def my_function(): 40 | logging.debug('Some debug data') 41 | logging.error('Error log here') 42 | logging.debug('More debug data') 43 | 44 | # The default log level for my program is WARNING, so only the error message 45 | # will print to screen when I run the function. 46 | 47 | # my_function() 48 | # ERROR:root:Error log here 49 | 50 | # I can elevate the log level of this function temporarily by defining a 51 | # context manager. This helper function boosts the logging severity level 52 | # before running the code in the with block and reduces the logging severity 53 | # level afterward. 54 | 55 | 56 | @contextmanager 57 | def debug_logging(level): 58 | logger = logging.getLogger() 59 | old_level = logger.getEffectiveLevel() 60 | logger.setLevel(level) 61 | try: 62 | yield 63 | finally: 64 | logger.setLevel(old_level) 65 | 66 | 67 | # The yield expression is the point at which the with block's contents will 68 | # execute. Any exceptions that happen in the with block will be re-raised by 69 | # the yield expression for you to catch in the helper function (see Item 40: 70 | # "Consider coroutines to run many functions concurrently" for an explanation 71 | # of how that works). 72 | 73 | # Now, I can call the sam logging function again, but in the debug_logging 74 | # context. This time, all of the debug messages are printed to the screen 75 | # during the with block. The same function running outside the with block 76 | # won't print debug messages. 77 | 78 | with debug_logging(logging.DEBUG): 79 | print('Inside:') 80 | my_function() 81 | print('After:') 82 | my_function() 83 | # Inside: 84 | # DEBUG:root:More debug data 85 | # After: 86 | # ERROR:root:Error log here 87 | 88 | 89 | # Using with Targets 90 | 91 | # The context manager passed to a with statement may also return an object. 92 | # This object is assigned to a local variable in the as part of the compound 93 | # statement. This gives the code running in the with block the ability to 94 | # directly interact with its context. 95 | 96 | # For example, say you want to write a file and ensure that it's always closed 97 | # correctly. You can do this by passing open to the with statement. open 98 | # returns a file handle for the as target of with and will close the handle 99 | # when the with block exits. 100 | 101 | with open('/tmp/my_output.txt', 'w') as handle: 102 | handle.write('This is some data!') 103 | 104 | 105 | # This approach is preferable to manually opening and closing the file handle 106 | # every time. It gives you confidence that the file is eventually closed when 107 | # execution leaves the with statement. It also encourages you to reduce the 108 | # amount of code that executes while the file handle is open, which is good 109 | # practice in general. 110 | 111 | # To enable your own functions to supply values for as targets, all you need 112 | # to is yield a value from your context manager. For example, here I define 113 | # a context manager to fetch a Logger instance, set its level, and then yield 114 | # it for the as target. 115 | 116 | 117 | @contextmanager 118 | def log_level(level, name): 119 | logger = logging.getLogger(name) 120 | old_level = logger.getEffectiveLevel() 121 | logger.setLevel(level) 122 | try: 123 | yield logger 124 | finally: 125 | logger.setLevel(old_level) 126 | 127 | 128 | # Calling logging methods like debug on the as target will produce output 129 | # because the logging severity level is set how enough in the with block. 130 | # Using the logging module directly won't print anything because the default 131 | # logging severity level for the default program logger is WARNING. 132 | 133 | 134 | with log_level(logging.DEBUG, 'my_log') as logger: 135 | logger.debug('This is my message!') 136 | logging.debug('This will not print') 137 | 138 | 139 | # After the with statement exits, calling debug logging methods on the Logger 140 | # named 'my-log' will not print anything because the default logging severity 141 | # level has been restored. Error log messages will always print. 142 | 143 | logger = logging.getLogger('my_log') 144 | logger.debug('Debug will not print') 145 | logger.error('Error will print') 146 | # DEBUG:my_log:This is my message! 147 | # ERROR:my_log:Error will print 148 | 149 | 150 | # Things to remember 151 | 152 | # 1. The with statement allows you to reuse logic from try/finally blocks and 153 | # reduce visual noise. 154 | # 2. The contextlib built-in module provides a contextmanager decorator that 155 | # makes it easy to use your own functions in with statements. 156 | # 3. The value yielded by context managers is supplied to the as part of the 157 | # with statement. It's useful for letting your code directly access the 158 | # cause of the special context. 159 | -------------------------------------------------------------------------------- /item_45_use_datetime.py: -------------------------------------------------------------------------------- 1 | # Item 45: Use datetime instead of time for local clocks 2 | 3 | 4 | # Coordinate Universal Time (CUT) is the standard, time-zone-independent 5 | # representation of time. UTC works great for computers that represent time as 6 | # seconds since the UNIX epoch. But UTC isn't ideal for humans. Humans 7 | # reference time relative to where they're currently located. People say 8 | # "noon" or "8 am" instead of "UTC 15:00 minus 7 hours." If your program 9 | # handles times, you'll probably fin yourself converting time between UTC and 10 | # local clocks to make it easier for humans to understand. 11 | 12 | # Python provides two ways of accomplishing time zone conversation. The old 13 | # way, using the time built-in module, is disastrously error prone. The new 14 | # way, using the datetime built-in module, works great with some help from 15 | # the community-built package named pytz. 16 | 17 | # You should be acquainted with both time and datetime to thoroughly 18 | # understand why datetime is the best choice and time should be avoided. 19 | 20 | 21 | # The time Module 22 | 23 | # The localtime function from the time built-in module lets you convert a UNIX 24 | # timestamp (seconds since the UNIX epoch in UTC) to a local time that matches 25 | # the host computer's time zone (Pacific Daylight Time, in my case). 26 | 27 | from time import localtime, strftime 28 | 29 | now = 1407694710 30 | local_tuple = localtime(now) 31 | time_format = '%Y-%m-%d %H:%M:%S' 32 | time_str = strftime(time_format, local_tuple) 33 | print(time_str) 34 | # 2014-08-11 02:18:30 35 | 36 | # You'll often need to go the other way as well, starting with user input in 37 | # local time and converting it to UTC time. You can do this by using the 38 | # strptime function to parse the time string, then call mktime to convert 39 | # local time to a UNIX timestamp. 40 | 41 | from time import mktime, strptime 42 | 43 | time_tuple = strptime(time_str, time_format) 44 | utc_now = mktime(time_tuple) 45 | print(utc_now) 46 | # 1407694710.0 47 | 48 | # How do you convert local time in one time zone to local time in another? For 49 | # example, say you are taking a flight between San Francisco and New York, and 50 | # want to know what time it will be in San Francisco once you've arrived in 51 | # New York. 52 | 53 | # Directly manipulating the return values from the time, localtime, and 54 | # strptime functions to do time zone conversations is a bad idea. Time zones 55 | # change all the time due to local laws. It's too complicated to manage 56 | # yourself, especially if you want to handle every global city for flight 57 | # departure and arrival. 58 | 59 | # Many operating systems have configuration files that keep up with the time 60 | # zone changes automatically. Python lets you use these time zones through the 61 | # time module. For example, here I parse the departure time from the San 62 | # Francisco time zone of Pacific Daylight Time: 63 | 64 | parse_format = '%Y-%m-%d %H:%M:%S' 65 | depart_sfo = '2014-05-01 15:45:16' 66 | # parse_format = '%Y-%m-%d %H:%M:%S %Z' 67 | # depart_sfo = '2014-05-01 15:45:16 PDT' 68 | time_tuple = strptime(depart_sfo, parse_format) 69 | time_str = strftime(time_format, time_tuple) 70 | print(time_str) 71 | # 2014-05-01 15:45:16 72 | # ValueError: time data '2014-05-01 15:45:16 PDT' does not match format '%Y-%m-%d %H:%M:%S %Z' 73 | 74 | # After seeing that PDT works with the strptime function, you might also 75 | # assume that time zones known to my computer will also work. Unfortunately, 76 | # this isn't the case. Instead, strptime raises an exception when it sees 77 | # Eastern Daylight Time (the time zone for New York). 78 | 79 | arrival_nyc = '2014-05-01 23:33:24 EDT' 80 | # time_tuple = strptime(arrival_nyc, time_format) 81 | # ValueError: unconverted data remains: EDT 82 | 83 | # The problem here is the platform-dependent nature of the time module. Its 84 | # actual behavior is determined by how the underlying C functions work with 85 | # the host operating system. This makes the funcionality of the time module 86 | # unreliable in Python. The time module fails to consistently work properly 87 | # for multiple local times. Thus, you should avoid the time module for this 88 | # purpose. If you must use time, only use it to convert between UTC and the 89 | # host computer's local time. For all other types of conversations, use the 90 | # datetime module. 91 | 92 | 93 | # The datetime Module 94 | 95 | # The second option for representing times in Python is the datetime class 96 | # from the datetime built-in module. Like the time module, datetime can be 97 | # used to convert from the current time in UTC to local time. 98 | 99 | # Here, I take the present time in UTC and convert it to my computer's local 100 | # time (Pacific Dayligh Time): 101 | 102 | from datetime import datetime, timezone 103 | 104 | now = datetime(2014, 8, 10, 18, 18, 30) 105 | now_utc = now.replace(tzinfo=timezone.utc) 106 | now_local = now_utc.astimezone() 107 | print(now_local) 108 | # 2014-08-11 02:18:30+08:00 109 | 110 | # The datetime module can also easily convert a local time back to a UNIX 111 | # timestamp in UTC. 112 | 113 | time_str = '2014-08-10 11:18:30' 114 | now = datetime.strptime(time_str, time_format) 115 | time_tuple = now.timetuple() 116 | utc_now = mktime(time_tuple) 117 | print(utc_now) 118 | # 1407640710.0 119 | 120 | # Unlike the time module, the datetime module has facilities for reliably 121 | # converting from one local time to another local time. However, datetime 122 | # only provides the machinery for time zone operations with its tzinfo class 123 | # and related methods. What's missing are the time zone definitions basides 124 | # UTC. 125 | 126 | # Luckily, the Python community has addressed this gap with the pytz module 127 | # that's available for download from the Python Package Index 128 | # (https://pypi.python.org/pypi/pytz/). pytz contains a full database of every 129 | # time zone definition you might need. 130 | 131 | # To use pytz effectively, you should always convert local times to UTC first. 132 | # Perform any datetime operations you need on the UTC values (such as 133 | # offsetting). Then, convert to local times as a final step. 134 | 135 | # For example, here I convert an NYC flight arrival time to a UTC datetime. 136 | # Although some of these calls seem redundant, all of them are necessary when 137 | # using pytz. 138 | 139 | import pytz 140 | 141 | arrival_nyc = '2014-05-01 23:33:24' 142 | nyc_dt_naive = datetime.strptime(arrival_nyc, time_format) 143 | eastern = pytz.timezone('US/Eastern') 144 | nyc_dt = eastern.localize(nyc_dt_naive) 145 | utc_dt = pytz.utc.normalize(nyc_dt.astimezone(pytz.utc)) 146 | print(utc_dt) 147 | # 2014-05-02 03:33:24+00:00 148 | 149 | # Once I have a UTC datetime, I can convert it to San Francisco local time. 150 | 151 | pacific = pytz.timezone('US/Pacific') 152 | sf_dt = pacific.normalize(utc_dt.astimezone(pacific)) 153 | print(sf_dt) 154 | # 2014-05-01 20:33:24-07:00 155 | 156 | # Just as easily, I can convert it to the local time in Nepal. 157 | 158 | nepal = pytz.timezone('Asia/Katmandu') 159 | nepal_dt = nepal.normalize(utc_dt.astimezone(nepal)) 160 | print(nepal_dt) 161 | # 2014-05-02 09:18:24+05:45 162 | 163 | # With datetime and pytz, these conversations are consistent across all 164 | # environments regardless of what operating system the host computer is 165 | # running. 166 | 167 | 168 | # Things to remember 169 | 170 | # 1. Avoid using the time module for translating between different time zones. 171 | # 2. Use the datetime built-in module along with the pytz module to reliably 172 | # convert between times in different time zones. 173 | # 3. Always represent time in UTC and do conversations to local time as the 174 | # final step before presentation. 175 | -------------------------------------------------------------------------------- /item_47_use_decimal.py: -------------------------------------------------------------------------------- 1 | # Item 47: Use decimal when precision ia paramount 2 | from decimal import Decimal 3 | from decimal import ROUND_UP 4 | 5 | 6 | # Python is excellent language for writing code that interacts with numerical 7 | # data. Python's integer type can represent values of any practical size. Its 8 | # double-precision floating point type complies with the IEEE 754 standard. 9 | # The language also provides a standard complex number type for imaginary 10 | # values. However, these aren't enough for every situation. 11 | 12 | # For example, say you want to compute the amount to charge a customer for an 13 | # international phone call. You know the time in minutes and seconds that the 14 | # customer was on the phone (say, 3 minutes 42 seconds). You also have a set 15 | # rate for the cost of calling Antarctica from the United States 16 | # ($1.45/minute). What should the charge be? 17 | 18 | # With floating point math, the computed charge seems reasonable. 19 | 20 | rate = 1.45 21 | seconds = 3*60 + 42 22 | cost = rate * seconds / 60 23 | print(cost) 24 | # 5.364999999999999 25 | 26 | # But rounding it to the nearest whole cent rounds down when you want it to 27 | # round up to properly cover all costs incurred by the customer. 28 | 29 | print(round(cost, 2)) 30 | # 5.36 31 | 32 | # Say you also want to support very short phone calls between places that are 33 | # much cheaper to connect. Here, I compute the charge for a phone call that 34 | # was 5 seconds long with a rate of $0.05/minute: 35 | 36 | rate = 0.05 37 | seconds = 5 38 | cost = rate * seconds / 60 39 | print(cost) 40 | # 0.004166666666666667 41 | 42 | # The resulting float is so low that it rounds down to zero. This won't do! 43 | 44 | print(round(cost, 2)) 45 | # 0.0 46 | 47 | # The solution is to use the Decimal class from the decimal built-in module. 48 | # The Decimal class provides fixed point math of 28 decimal points by default. 49 | # It can go even higher if required. This works around the precision issues in 50 | # IEEE 754 floating point numbers. The class also gives you more control over 51 | # rounding behaviors. 52 | 53 | # For example, redoing the Antarctica calculation with Decimal results in an 54 | # exact charge instead of an approximation. 55 | 56 | rate = Decimal('1.45') 57 | seconds = Decimal('222') # 3*60 + 42 58 | cost = rate * seconds / Decimal('60') 59 | print(cost) 60 | # 5.365 61 | 62 | # The Decimal class has a built-in function for rounding to exactly the 63 | # decimal place you need with rounding behavior you want. 64 | 65 | rounded = cost.quantize(Decimal('0.01'), rounding=ROUND_UP) 66 | print(rounded) 67 | # 5.37 68 | 69 | # Using the quantize method this way also properly handles the small usage 70 | # case for short, cheep phone calls. Here, you can see the Decimal cost is 71 | # still less than 1 cent fro the call: 72 | 73 | rate = Decimal('0.05') 74 | seconds = Decimal('5') 75 | cost = rate * seconds / Decimal('60') 76 | print(cost) 77 | # 0.004166666666666666666666666667 78 | 79 | # But the quantize behavior ensures that this is rounded up to one whole cent. 80 | 81 | rounded = cost.quantize(Decimal('0.01'), rounding=ROUND_UP) 82 | print(rounded) 83 | # 0.01 84 | 85 | # While Decimal works great for fixed point numbers, it still has limitations 86 | # in its precision (e.g. 1/3 will be an approximation). For representing 87 | # rational numbers with no limit to precision, consider using the Fraction 88 | # class from the fractions built-in module. 89 | 90 | 91 | # Things to remember 92 | 93 | # 1. Python has built-in types and classes in modules that can represent 94 | # practically every type of numerical value. 95 | # 2. The Decimal class is ideal for situations that require high precision and 96 | # exact rounding behavior, such as computations of monetary values. 97 | -------------------------------------------------------------------------------- /item_48_community_built_modules.py: -------------------------------------------------------------------------------- 1 | # Item 48: Know where to find community built modules 2 | 3 | 4 | # Python has a central repository of modules (https://pypi.python.org) for you 5 | # to install and use in your programs. These modules are built and maintained 6 | # by people like you: the Python community. When you find yourself facing an 7 | # unfamiliar challenge, the Python Package Index (PyPI) is a great place to 8 | # look for code that will get you closer to your goal. 9 | 10 | # To use the Package Index, you'll need to use a command-line tool named pip. 11 | # pip is installed by default in Python 3.4 and above (it's also accessible 12 | # with python -m pip). For earlier versions, you can find instructions for 13 | # pip on the Python Packaging website (https:packaging.python.org). 14 | 15 | # Once installed, using pip to install a new module is simple. For example, 16 | # here I install the pytz module that I used in another item in this chapter 17 | # (see Item 45: Use datatime instead of time for local clocks). 18 | 19 | # $ pip3 install pytz 20 | 21 | # In the example above, I used the pip3 command-line to install the Python 3 22 | # version of the package. The pip command-line (without the 3) is also 23 | # available for installing packages for Python 2. The majority of popular 24 | # packages are now available for either version of Python (see Item 1: "Know 25 | # which version of Python you're using"). pip can also be used with pyvenv to 26 | # track sets of packages to install for your projects (see Item 53: "Use 27 | # virtual environments for isolated and reproducible dependencies"). 28 | 29 | # Each module in the PyPI has its own software license. Most of the packages, 30 | # especially the popular ones, have free or open source licenses (see 31 | # http://opensource.org for details). In most cases, these licenses how you to 32 | # include a copy of the module with your program (when in doubt, talk to a 33 | # lawyer). 34 | 35 | 36 | # Things to remember 37 | 38 | # 1. The Python Package Index (PyPI) contains a wealth of common packages 39 | # that are built and maintained by the Python community. 40 | # 2. pip is the command-line to use for installing packages from PyPI. 41 | # 3. pip is installed by default in Python 3.4 and above; you must install it 42 | # yourself for older versions. 43 | # 4. The majority of PyPI modules are free and open source software. 44 | -------------------------------------------------------------------------------- /item_51_define_a_root_exception.py: -------------------------------------------------------------------------------- 1 | # Item 51: Define a root exception to insulate callers from APIs 2 | 3 | 4 | # When you're defining a module's API, the exceptions you throw are just as 5 | # much a part of your interface as the functions and classes you define (see 6 | # Item 14: "Prefer exceptions to returning None"). 7 | 8 | # Python has a built-in hierarchy of exceptions for the language and standard 9 | # library. There's a draw to using the built-in exception types for reporting 10 | # errors instead of defining your own new types. For example, you could raise 11 | # a ValueError exception whenever an invalid parameter is passed to your 12 | # function. 13 | 14 | 15 | def determine_weight(volume, density): 16 | if density < 0: 17 | raise ValueError('Density must be positive') 18 | # ... 19 | 20 | 21 | # In some cases, using ValueError makes sense, but for APIs it's much more 22 | # powerful to define your own hierarchy of exceptions. You can do this by 23 | # providing a root Exception in your module. Then, have all other exceptions 24 | # raised by that module inherit from the root exception.from 25 | 26 | # my_module.py 27 | class Error(Exception): 28 | """Base-class for all exceptions raised by this module.""" 29 | pass 30 | 31 | 32 | class InvalidDensityError(Error): 33 | """There was a problem with a provided density value.""" 34 | pass 35 | 36 | # Having a root exception in a module makes it easy for consumers of your API 37 | # to catch all of the exceptions that you raise on purpose. For example, here 38 | # a consumer of your API makes a function all with a try/except statement that 39 | # catches your root exception: 40 | 41 | # try: 42 | # weight = my_module.determine_weight(1, -1) 43 | # except my_module.Error as e: 44 | # logging.error('Unexpected error: %s', e) 45 | 46 | # The try/except prevents your API's exceptions from progagating too far 47 | # upward and breaking the calling program. It insulates the calling code from 48 | # your API. This insulation has three helpful effects. 49 | 50 | # First, root exceptions let callers understand when there's a problem with 51 | # their usage of your API. If callers are using your API properly, they should 52 | # catch the various exceptions that you deliberately raise. If they don't 53 | # handle such an exception, it will propagate all the way up to the insulating 54 | # except block that catches your module's root exception. That block can bring 55 | # the exception to the attention of the API consumer, giving them a chance to 56 | # add proper handling of the exception type. 57 | 58 | # try: 59 | # weight = my_module.determine_weight(1, -1) 60 | # except my_module.InvalidDensityError: 61 | # weight = 0 62 | # except my_module.Error as e: 63 | # logging.error('Bug in the calling code: %s', e) 64 | 65 | # The second advantage of using root exceptions is that they can help find 66 | # bugs in your API module's code. If your code only deliberately raises 67 | # exceptions that you define within your module's hierarchy, then all other 68 | # types of exceptions raised by your module must be the ones that you didn't 69 | # intend to raise. These are bugs in your API's code. 70 | 71 | # Using the try/except statement above will not insulate API consumers from 72 | # bugs in your API module's code. To do that, the caller need to add another 73 | # except block that catches Python's base Exception class. This allows the 74 | # API consumer to detect when there's a bug in the API module's implementation 75 | # that needs to be fixed. 76 | 77 | # try: 78 | # weight = my_module.determine_weight(1, -1) 79 | # except my_module.InvalidDensityError: 80 | # weight = 0 81 | # except my_module.Error as e: 82 | # logging.error('Bug in the calling code: %s', e) 83 | # except Exception as e: 84 | # logging.error('Bug in the API code: %s', e) 85 | 86 | # The third impact of using root exceptions is future-proofing your API. Over 87 | # time, you may want to expand your API to provide more specific exceptions in 88 | # certain situation. For example, you could add an Exception subclass that 89 | # indicates the error condition of supplying negative densities. 90 | 91 | 92 | # my_module.py 93 | class NegativeDensityError(InvalidDensityError): 94 | """A provided density value was negative.""" 95 | pass 96 | 97 | 98 | def determine_weight(volume, density): 99 | if density < 0: 100 | raise NegativeDensityError 101 | 102 | 103 | # The calling code will continue to work exactly as before because it already 104 | # catches InvalidDensityError exceptions (the parent class of 105 | # NegativeDensityError). In the future, the caller could decide to 106 | # special-case the new type of exception and change its behavior accordingly. 107 | 108 | # try: 109 | # weight = my_module.determine_weight(1, -1) 110 | # except my_module.NegativeDensityError as e: 111 | # raise ValueError('Must supply non-negative density') from e 112 | # except my_module.InvalidDensityError: 113 | # weight = 0 114 | # except my_module.Error as e: 115 | # logging.error('Bug in the calling code: %s', e) 116 | # except Exception as e: 117 | # logging.error('Bug in the API code: %s', e) 118 | 119 | # You can take API future-proofing further by providing a broader set of 120 | # exceptions directly below the root exception. For example, imagine you had 121 | # one set of errors related to calculating weights, another related to 122 | # calculating volume, and a third related to calculating density. 123 | 124 | 125 | # my_module.py 126 | class WeightError(Error): 127 | """Base-class for weight calculation errors.""" 128 | 129 | 130 | class VolumeError(Error): 131 | """Base-class for volume calculation errors.""" 132 | 133 | 134 | class DensityError(Error): 135 | """Base-class for density calculation errors.""" 136 | 137 | 138 | # Specific exceptions would inherit from these general exceptions. Each 139 | # intermediate exception acts as its own kind of root exception. This makes 140 | # it easier to insulate layers of calling code from API code based on broad 141 | # functionality. This is much better than having all callers catch a long 142 | # list of very specific Exception subclasses. 143 | 144 | 145 | # Things to remember 146 | 147 | # 1. Defining root exceptions for your modules allows API consumers to 148 | # insulate themselves from your API. 149 | # 2. Catching root exceptions can help you find bugs in code that consumes an 150 | # API. 151 | # 3. Catching the Python Exception base class can help you find bugs in API 152 | # implementations. 153 | # 4. Intermediate root exceptions let you add more specific types of 154 | # exceptions in the future without breaking your API consumers. 155 | -------------------------------------------------------------------------------- /item_52_break_circular_dependencies.py: -------------------------------------------------------------------------------- 1 | # Item 52: Know how to break circular dependencies 2 | 3 | 4 | # Inevitably, while you're collaborating with others, you'll find a mutual 5 | # interdependency between modules. It can even happen while you work by 6 | # yourself on the various parts of a single program. 7 | 8 | # There are three other ways to break circular dependencies. 9 | 10 | # Reordering Imports 11 | # The first approach is to change the order of imports. 12 | 13 | # Import, Configure, Run 14 | # A second solution to the circular imports problem is to have your modules 15 | # minimize side effects at import time. 16 | 17 | # Dynamic Import 18 | # The third--and often simplest--solution to the circular imports problem is 19 | # to use an import statement within a function or method. This is called a 20 | # dynamic import because the module import happens while the program is 21 | # running, not while the program is first starting up and initializing its 22 | # modules. 23 | 24 | 25 | # Things to remember 26 | 27 | # 1. Circular dependencies happen when two modules must call into each other 28 | # at import time. They can cause your program to crash at startup. 29 | # 2. The best way to break a circular dependency is refactoring mutual 30 | # dependencies into a separate module at the bottom of the dependency tree. 31 | # 3. Dynamic imports are the simplest solution for breaking a circular 32 | # dependency between modules while minimizing refactoring and complexity. 33 | -------------------------------------------------------------------------------- /item_54_consider_module_scoped_code.py: -------------------------------------------------------------------------------- 1 | # Chapter 8: Production 2 | 3 | 4 | # Putting a Python program to use requires moving it from a development 5 | # environment to a production environment. Supporting disparate configurations 6 | # like this can be a challenge. Making programs that are dependable in 7 | # multiple situations is just as important as making programs with correct 8 | # functionality. 9 | 10 | # The goal is to productionize your Python programs and make them bulletproof 11 | # while they're in use. Python has built-in modules that aid in hardening your 12 | # programs. It provides facilities for debugging, optimizing, and testing to 13 | # maximize the quality and performance of your programs at runtime. 14 | 15 | 16 | # Item 54: Consider module-scoped code to configure deployment environments 17 | 18 | # A deployment environment is a configuration in which your program runs. 19 | # Every program has at least one deployment environment, the production 20 | # environment. The goal of writing a program in the first place is to put it 21 | # to work in the production environment and achieve some kind of outcome. 22 | 23 | # Writing or modifying a program requires being able to run it on the computer 24 | # you use for developing. The configuration of your development may be much 25 | # different from your production environments have the same Python packages 26 | # installed. The trouble is that production environment often require many 27 | # external assumptions that are hard to reproduce in development environments. 28 | 29 | # For example, say you want to run your program in a web server container and 30 | # give it access to a database. This means that very time you want to modify 31 | # your program's code, you need to run a server container, the database must 32 | # be set up properly, and your program needs to password for access. That's a 33 | # very high cost if all you're trying to do is verify that a one-line change 34 | # to your program works correctly. 35 | 36 | # The best way to work around these issues is to override parts of your 37 | # program at startup time to provide different functionality depending on the 38 | # deployment environment. For example, you could have two different __main__ 39 | # files, one for production and one for development. 40 | 41 | # # dev_main.py 42 | # TESTING = True 43 | # import db_connection 44 | # db = db_connection.Database() 45 | 46 | # # prod_main.py 47 | # TESTING = False 48 | # import db_connection 49 | # db = db_connection.Database() 50 | 51 | # The only difference between the two files is the value of the TESTING 52 | # constant. Other modules in your program can then import the __main__ module 53 | # and use the value of TESTING to decide how they define their own attributes. 54 | 55 | # # db_connection.py 56 | # import __main__ 57 | # 58 | # 59 | # class TestingDatabase(object): 60 | # #... 61 | # pass 62 | # 63 | # 64 | # class RealDatabase(object): 65 | # #... 66 | # pass 67 | # 68 | # 69 | # if __main__.TESTING: 70 | # Database = TestingDatabase 71 | # else: 72 | # Database = RealDatabase 73 | 74 | # The key behavior to notice here is that code running in module scope--not 75 | # inside any function or method--is just normal Python code. You can use an 76 | # if statement at the module level to decide how the module will define names. 77 | # This makes it easy to tailor modules to your various deployment 78 | # environments. You avoid having to reproduce costly assumptions like 79 | # database configurations when they aren't needed. You can inject fake or mock 80 | # implementations that ease interactive development and testing (see Item 56: 81 | # "Test everything with unittest") 82 | 83 | # Note 84 | # Once your deployment environments get complicated, you should consider moving 85 | # them out of Python constants (like TESTING) and into dedicated configuration 86 | # files. Tools like the configparser built-in module let you maintain 87 | # production configurations separate from code, a distinction that's crucial for 88 | # collaborating with an operations team. 89 | 90 | # This approach can be used for more than working around external assumptions. 91 | # For example, if you know that your program must work differently based on its 92 | # host platform, you can inspect the sys module before defining top-level 93 | # constructs in a module. 94 | 95 | # db_connection.py 96 | import sys 97 | 98 | 99 | class Win32Database(object): 100 | #... 101 | pass 102 | 103 | 104 | class PosixDatabase(object): 105 | #... 106 | pass 107 | 108 | 109 | if sys.platform.startswith('win32'): 110 | Database = Win32Database 111 | else: 112 | Database = PosixDatabase 113 | 114 | 115 | # Similarly, you can use environment variable from os.environ to guide your 116 | # module definitions. 117 | 118 | 119 | # Things to remember 120 | 121 | # 1. Programs often need to run in multiple deployment environments that each 122 | # have unique assumptions and configurations. 123 | # 2. You can tailor a module's contents to different deployment environments 124 | # by using normal Python statements in module scope. 125 | # 3. Module contents can be the product of any external condition, including 126 | # host introspection through the sys and os modules. 127 | -------------------------------------------------------------------------------- /item_55_use_repr_strings.py: -------------------------------------------------------------------------------- 1 | # Item 55: Use repr strings for debugging output 2 | 3 | 4 | # When debugging a Python program, the print function (or output via the 5 | # logging built-in module) will get you surprisingly far. Python internals are 6 | # often easy to access via plain attributes (see Item 27: "Prefer public 7 | # attributes over private ones"). All you need to do is print how the state of 8 | # your program changes while it runs and see where it goes wrong. 9 | 10 | # The print function outputs a human-readable string version of whatever you 11 | # supply it. For example, printing a basic string will print the contents of 12 | # the string without the surrounding quote characters. 13 | 14 | # The problem is that the human-readable string for a value doesn't make it 15 | # clear what the actual type of the value is. For example, notice how in the 16 | # default output of print you can't distinguish between the types of the 17 | # number 5 and the string '5'. 18 | 19 | print(5) 20 | print('5') 21 | # 5 22 | # 5 23 | 24 | # If you're debugging a program with print, these type differences matter. 25 | # What you almost always want while debugging is to see the repr version of an 26 | # object. The repr built-in function returns the printable representation of 27 | # an object, which should be its most clearly understandable string 28 | # representation. For built-in types, the string returned by repr is a valid 29 | # Python expression. 30 | 31 | a = '\x07' 32 | print(repr(a)) 33 | # '\x07' 34 | 35 | # Passing the value from repr to the eval built-in function should result in 36 | # the same Python object you started with (of course, in practice, you should 37 | # only use eval with extreme caution). 38 | 39 | b = eval(repr(a)) 40 | assert a == b 41 | 42 | # When you're debugging with print, you should repr the value before printing 43 | # to ensure that any difference in types is clear. 44 | 45 | print(repr(5)) 46 | print(repr('5')) 47 | # 5 48 | # '5' 49 | 50 | # For dynamic Python objects, the default human-readable string value is the 51 | # same as the repr value. This means that passing a dynamic object to print 52 | # will do the right thing, and you don't need to explicitly call repr on it. 53 | # Unfortunately, the default value of repr for object instances isn't 54 | # especially helpful. For example, here I define a simple class and then print 55 | # its value: 56 | 57 | 58 | class OpaqueClass(object): 59 | def __init__(self, x, y): 60 | self.x = x 61 | self.y = y 62 | 63 | 64 | obj = OpaqueClass(1, 2) 65 | print(obj) 66 | print(repr(obj)) 67 | # <__main__.OpaqueClass object at 0x7f454b200828> 68 | # <__main__.OpaqueClass object at 0x7f454b200828> 69 | 70 | # This output can't be passed to the eval function, and it says nothing about 71 | # the instance fields of the object. 72 | 73 | # There are two solutions to this problem. If you have control of the class, 74 | # you can define your own __repr__ special method that returns a string 75 | # containing the Python expression that recreates the object. Here, I define 76 | # that function for the class above: 77 | 78 | 79 | class BetterClass(object): 80 | def __init__(self, x, y): 81 | self.x = x 82 | self.y = y 83 | 84 | def __repr__(self): 85 | return 'BetterClass(%d, %d)' % (self.x, self.y) 86 | 87 | 88 | # Now, the repr value is much more useful. 89 | 90 | obj = BetterClass(1, 2) 91 | print(obj) 92 | print(repr(obj)) 93 | # BetterClass(1, 2) 94 | # BetterClass(1, 2) 95 | 96 | # When you don't have control over the class definition, you can reach into 97 | # the object's instance dictionary, which is stored in the __dict__ 98 | # attribute. Here, I print out the contents of an OpaqueClass instance: 99 | 100 | obj = OpaqueClass(4, 5) 101 | print(obj.__dict__) 102 | # {'y': 5, 'x': 4} 103 | 104 | 105 | # Things to remember 106 | 107 | # 1. Calling print on built-in Python types will produce the human-readable 108 | # string version of a value, which hides type information. 109 | # 2. Calling repr on built-in Python types will produce the printable string 110 | # version of a value. These repr strings could be passed to the eval 111 | # built-in function to get back the original value. 112 | # 3. %s in format strings will produce human-readable strings like str.%r will 113 | # produce printable strings like repr. 114 | # 4. You can define the __repr__ method to customize the printable 115 | # representation of a class and provide more detailed debugging 116 | # information. 117 | # 5. You can reach into any object's __dict__ attribute to view its internals. 118 | -------------------------------------------------------------------------------- /item_56_test_utils.py: -------------------------------------------------------------------------------- 1 | # utils_test.py 2 | from unittest import TestCase, main 3 | from item_56_utils import to_str 4 | from tempfile import TemporaryDirectory 5 | 6 | 7 | class UtilsTestCase(TestCase): 8 | def test_to_str_bytes(self): 9 | self.assertEqual('hello', to_str(b'hello')) 10 | 11 | def test_to_str_str(self): 12 | self.assertEqual('hello', to_str('hello')) 13 | 14 | def test_to_str_bad(self): 15 | self.assertRaises(TypeError, to_str, object()) 16 | 17 | 18 | class MyTest(TestCase): 19 | def setUp(self): 20 | self.test_dir = TemporaryDirectory() 21 | 22 | def tearDown(self): 23 | self.test_dir.cleanup() 24 | 25 | def test_to_str_bytes(self): 26 | self.assertEqual('hello', to_str(b'hello')) 27 | 28 | def test_to_str_str(self): 29 | self.assertEqual('hello', to_str('hello')) 30 | 31 | def test_to_str_bad(self): 32 | self.assertRaises(TypeError, to_str, object()) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /item_56_unittest.py: -------------------------------------------------------------------------------- 1 | # Item 56: Test everything with unittest 2 | from unittest import TestCase, main 3 | from item_56_utils import to_str 4 | 5 | 6 | # Python doesn't have static type checking. There's nothing int eh compiler 7 | # that ensure your program will work when you run it. With Python you don't 8 | # know whether the functions your program calls will be defined at runtime, 9 | # even when their existence is evident in the source code. This dynamic 10 | # behavior is a blessing and a curse. 11 | 12 | # The large numbers of Python programmers out there say it's worth it because 13 | # of the productivity gained from the resulting brevity and simplicity. But 14 | # most people have heard at least one horror story about Python in which a 15 | # program encountered a boneheaded error at run time. 16 | 17 | # One of the worst examples I've heard is when a SyntaxError was raised in 18 | # production as a side effect of a dynamic import (see Item 52: "Know how to 19 | # break circular dependencies"). The programmer I know who was hit by this 20 | # surprising occurrence has since ruled out using Python ever again. 21 | 22 | # But I have to wonder, why wasn't the code tested before the program was 23 | # deployed to production? Type safety isn't everything. You should always test 24 | # your code, regardless of what language it's written in. However, I'll admit 25 | # that the big difference between Python and many other languages is that the 26 | # only way to have any confidence in a Python program is by writing tests. 27 | # There is no veil of static type checking to make you feel safe. 28 | 29 | # Luckily, the same dynamic features that prevent static type checking in 30 | # Python also make it extremely easy to write test for your code. You can use 31 | # Python's dynamic nature and easily overridable behaviors to implement tests 32 | # and ensure that your programs work as expected. 33 | 34 | # You should think of tests as an insurance policy on your code. Good tests 35 | # give you confidence that your code is correct. If you refactor or expand 36 | # your code, tests make it easy to identify how behaviors have changed. It 37 | # sounds counter-intuitive, but having good tests actually makes it easier to 38 | # modify Python code, not harder. 39 | 40 | # The simplest way to write tests is to use the unittest built-in module. For 41 | # example, say you have the following utility function defined in utils.py 42 | 43 | 44 | def to_str(data): 45 | if isinstance(data, str): 46 | return data 47 | elif isinstance(data, bytes): 48 | return data.decode('utf-8') 49 | else: 50 | raise TypeError('Must supply str or bytes, ' 51 | 'found: %r' % data) 52 | 53 | 54 | # To define tests, I create a second file named test_utils.py or utils_test.py 55 | 56 | 57 | class UtilsTestCase(TestCase): 58 | def test_to_str_bytes(self): 59 | self.assertEqual('hello', to_str(b'hello')) 60 | 61 | def test_to_str_str(self): 62 | self.assertEqual('hello', to_str('hello')) 63 | 64 | def test_to_str_bad(self): 65 | self.assertRaises(TypeError, to_str, object()) 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | 71 | 72 | # Tests are organized into TestCase classes. Each test is a method beginning 73 | # with the word test. If a test method runs without raising any kind of 74 | # Exception (including AssertionError from assert statements), then the test 75 | # is considered to have passed successfully. 76 | 77 | # The TestCase class provides helper methods for making assertions in your 78 | # tests, such as assertEqual for verifying equality, assertTrue for verifying 79 | # that exceptions are raised when appropriate (see help (TestCase) for more). 80 | # You can define your own helper methods in TestCase subclasses to make your 81 | # tests more readable; just ensure that your method names don't begin with 82 | # the word test. 83 | 84 | # Note 85 | # Another common practice when writing test is to use mock functions and 86 | # classes to stub out certain behaviors. For this purpose, Python 3 provides 87 | # the unittest.mock built-in module, which is available for Python 2 as an 88 | # open source package. 89 | 90 | # Sometimes, your TestCase classes need to set up the test environment before 91 | # running test methods. To do this, you can override the setUp and tearDown 92 | # methods. These methods are called before and after each test method, 93 | # respectively, and they let you ensure that each test runs in isolation (an 94 | # important best practice of proper testing). For example, here I define a 95 | # TestCase that creates a temporary directory before each test and deletes its 96 | # contents after each test finishes: 97 | 98 | 99 | class MyTest(TestCase): 100 | def setUp(self): 101 | self.test_dir = TemporaryDirectory() 102 | 103 | def tearDown(self): 104 | self.test_dir.cleanup() 105 | # 106 | # def test_to_str_bytes(self): 107 | # self.assertEqual('hello', to_str(b'hello')) 108 | # 109 | # def test_to_str_str(self): 110 | # self.assertEqual('hello', to_str('hello')) 111 | # 112 | # def test_to_str_bad(self): 113 | # self.assertRaises(TypeError, to_str, object()) 114 | 115 | # I usually define one TestCase for each set of related tests. Sometimes I 116 | # have one TestCase for each function that has many edge cases. Other times, 117 | # a TestCase spans all functions in a single module. I'll also create one 118 | # TestCase for testing a single class and all of its methods. 119 | 120 | # When programs get complicated, you'll want additional test for verifying 121 | # the interactions between your modules, instead of only testing code in 122 | # isolation. This is the difference between unit tests and integration tests. 123 | # In Python, it's important to write both types of test for exactly the same 124 | # reason: You have no guarantee that your modules will actually work together 125 | # unless you prove it. 126 | 127 | # Note 128 | # Depending on your project, it can also be useful to define data-driven tests 129 | # or organize test into different suites of related functionality. For these 130 | # purpose, code coverage reports, and other advanced use cases, the nose 131 | # (http://nose.readthedocs.org/) and pytest (http://pytest.org/) open source 132 | # packages can be especially helpful. 133 | 134 | 135 | # Things to remember 136 | 137 | # 1. The only way to have confidence in a Python program is to write tests. 138 | # 2. The unittest built-in module provides most of the facilities you'll need 139 | # to write good tests. 140 | # 3. You can define tests by subclassing TestCase and defining one method per 141 | # behavior you'd like to test. Test methods on TestCase classes must start 142 | # with the word test. 143 | # 4. It's important to write both unit tests (for isolated functionality) and 144 | # integration tests (for modules that interact). 145 | -------------------------------------------------------------------------------- /item_56_utils.py: -------------------------------------------------------------------------------- 1 | # utils.py 2 | 3 | 4 | def to_str(data): 5 | if isinstance(data, str): 6 | return data 7 | elif isinstance(data, bytes): 8 | return data.decode('utf-8') 9 | else: 10 | raise TypeError('Must supply str or bytes, ' 11 | 'found: %r' % data) 12 | -------------------------------------------------------------------------------- /item_57_pdb.py: -------------------------------------------------------------------------------- 1 | # Item 57: Consider interactive debugging with pdb 2 | 3 | 4 | # Everyone encounters bugs in their code while developing programs. Using the 5 | # print function can help you track down the source of many issues (see Item 6 | # 55: "Use repr strings for debugging output"). Writing tests for specific 7 | # cases that cause trouble is another great way to isolate problems (see Item 8 | # 56: "Test everything with unittest"). 9 | 10 | # But these tools aren't enough to find every root cause. When you need 11 | # something more powerful, it's time to try Python's built-in interactive 12 | # debugger. The debugger lets you inspect program state, print local 13 | # variables, and set through a Python program one statement at a time. 14 | 15 | # In most other programming language, you use a debugger by specifying what 16 | # line of a source file you'd like to stop on, then execute the program. In 17 | # contrast, with Python the easiest way to use the debugger is by modifying 18 | # your program to directly initiate the debugger just before you think you'll 19 | # have an issue worth investigating. There is no difference between running a 20 | # Python program under a debugger and running it normally. 21 | 22 | # To initiate the debugger, all you have to do is import the pdb built-in 23 | # module and run its set_trace function. You'll often see this done in a 24 | # single line so programmers can comment it out with a single # character. 25 | 26 | 27 | def complex_func(a, b, c): 28 | # ... 29 | import pdb 30 | pdb.set_trace() 31 | 32 | 33 | # As soon as this statement runs, the program will pause its execution. The 34 | # terminal that started you program will turn into an interactive Python 35 | # shell. 36 | 37 | # -> import pdb; pdb.set_trace() 38 | # (Pdb) 39 | 40 | # At the (Pdb) prompt, you can type in the same of local variables to see 41 | # their values printed out. You can see a list of all local variables by 42 | # calling the locals built-in function. YOu can import modules, inspect global 43 | # state, construct new objects, run the help built-in function, and even 44 | # modify parts of the program--whatever you need to to to aid in your 45 | # debugging. In addition, the debugger has three commands that make inspecting 46 | # the running program easier. 47 | # 1. bt: Print the trackback of the current execution call back. This lets you 48 | # figure out where you are in your program anc how you arrived at the 49 | # pdb.set_trace trigger point. 50 | # 2. up: Move your scope up the function call stack to the caller of the 51 | # current function. This allows you to inspect the local variables in 52 | # higher levels of the call stack. 53 | # 3. down: Move your scope back down the function call stack one level. 54 | 55 | # Once you're done inspecting the current state, you can use debugger commands 56 | # to resume the program's execution under precise control. 57 | # 1. step: Run the program until the next line of execution in the program, 58 | # then return control back to the debugger. If the next line of execution 59 | # includes calling a function, the debugger will stop in the function that 60 | # was called. 61 | # 2. next: Run the program until the line of execution in the current 62 | # function, then return control back to the debugger. If the next line of 63 | # execution includes calling a function, the debugger will not stop until 64 | # the called function has returned. 65 | # 3. return: Run the program until the current function returns, then return 66 | # control back to the debugger. 67 | # 4. continue: Continue running the program until the next breakpoint (or 68 | # set_trace is called again). 69 | 70 | 71 | # Things to remember 72 | 73 | # 1. You can initiate the Python interactive debugger at a point of interest 74 | # directly in your program with the import pdb; pdb.set_trace() statements. 75 | # 2. The Python debugger prompt is a full Python shell that lets you inspect 76 | # and modify the state of a running program. 77 | # 3. pdb shell commands let you precisely control program execution, allowing 78 | # you to alternate between inspecting program state and progressing program 79 | # execution. 80 | -------------------------------------------------------------------------------- /item_59_use_tracemalloc.py: -------------------------------------------------------------------------------- 1 | # Item 59: Use tracemalloc to understand memory usage and leaks 2 | 3 | 4 | # Memory management in the default implementation of Python, CPython, uses 5 | # reference counting. This ensures that as soon as all references to an 6 | # object have expired, the referenced object is also cleared. CPython also 7 | # has a built-in cycle detector to ensure that self-referencing objects are 8 | # eventually garbage collected. 9 | 10 | # In theory, this means that most Python programmers don't have to worry about 11 | # allocating or deallocating memory in their programs. It's taken care of 12 | # automatically by the language and the CPython runtime. However, in practice, 13 | # programs eventually do run out of memory due to held reference. Figuring out 14 | # where your Python programs are using or leaking memory proves to be a 15 | # challenge. 16 | 17 | # The first way to debug memory usage is to ask the gc built-in module to list 18 | # every object currently known by the garbage collector. Although it's quite 19 | # a blunt tool, this approach does let you quickly get a sense of where your 20 | # program's memory is being used. 21 | 22 | # Here, I run a program that wastes memory by keeping references. It prints 23 | # out how many objects were created during execution and a small sample of 24 | # allocated objects. 25 | 26 | # item_59_use_tracemalloc_using_pc.py 27 | import item_59_use_tracemalloc_using_gc 28 | # 4944 objects before 29 | # 4955 objects after 30 | # {'_loaders': [('.cpython-35m-x86_64-linux-gnu.so', :476: size=485 B (+485 B), count=6 (+6), average=81 B 50 | 51 | # It's immediately clear which objects are dominating my program's memory 52 | # usage and where in the source code they were allocated. 53 | 54 | # The tracemalloc module can also print out the full stack trace of each 55 | # allocation (up to the number of frames passed to the start method). Here, I 56 | # print out the stack trace of the biggest source of memory usage in the 57 | # program: 58 | 59 | # item_59_use_tracemalloc_with_trace.py 60 | import item_59_use_tracemalloc_with_trace 61 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py", line 7 62 | # a.append(10 * 230 * i) 63 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_with_trace.py", line 6 64 | # x = waste_memory.run() 65 | 66 | # A stack trace like this is most valuable for figuring out which particular 67 | # usage of a common function is responsible for memory consumption in a 68 | # program. 69 | 70 | # Unfortunately, Python 2 doesn't provide the tracemalloc built-in module. 71 | # There are open source packages for tracking memory usage in Python 2 (such 72 | # as heapy), though they do not fully replicate the functionality of 73 | # tracemalloc. 74 | 75 | 76 | # Things to remember 77 | 78 | # 1. It can be difficult to understand how Python programs use and leak 79 | # memory. 80 | # 2. The gc module can help you understand which objects exist, but it has no 81 | # information about how they were allocated. 82 | # 3. The tracemalloc built-in module provides powerful tools for understanding 83 | # the source of memory usage. 84 | # 4. tracemalloc is only available in Python 3.4 and above. 85 | -------------------------------------------------------------------------------- /item_59_use_tracemalloc_top_n.py: -------------------------------------------------------------------------------- 1 | import tracemalloc 2 | tracemalloc.start(10) # Save up to 10 stack frames 3 | 4 | time1 = tracemalloc.take_snapshot() 5 | import item_59_use_tracemalloc_waste_memory as waste_memory 6 | x = waste_memory.run() 7 | time2 = tracemalloc.take_snapshot() 8 | 9 | stats = time2.compare_to(time1, 'lineno') 10 | for stat in stats[:3]: 11 | print(stat) 12 | 13 | # /home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py:7: size=3539 KiB (+3539 KiB), count=100000 (+100000), average=36 B 14 | # /home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_top_n.py:6: size=1264 B (+1264 B), count=2 (+2), average=632 B 15 | # :476: size=485 B (+485 B), count=6 (+6), average=81 B 16 | -------------------------------------------------------------------------------- /item_59_use_tracemalloc_using_gc.py: -------------------------------------------------------------------------------- 1 | import gc 2 | found_objects = gc.get_objects() 3 | print('%d objects before' % len(found_objects)) 4 | 5 | 6 | import item_59_use_tracemalloc_waste_memory as waste_memory 7 | x = waste_memory.run() 8 | found_objects = gc.get_objects() 9 | print('%d objects after' % len(found_objects)) 10 | for obj in found_objects[:3]: 11 | print(repr(obj)[:100]) 12 | 13 | # 4916 objects before 14 | # 5446 objects after 15 | # 16 | # 17 | # {'_fields': ('context_expr', 'optional_vars'), '__doc__': None, '__module__': '_ast', '__weakref__': -------------------------------------------------------------------------------- /item_59_use_tracemalloc_waste_memory.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def run(): 4 | a = [] 5 | for i in range(100000): 6 | c = i**2 + 1 7 | a.append(10 * 230 * i) 8 | return a 9 | -------------------------------------------------------------------------------- /item_59_use_tracemalloc_with_trace.py: -------------------------------------------------------------------------------- 1 | import tracemalloc 2 | tracemalloc.start(10) # Save up to 10 stack frames 3 | 4 | time1 = tracemalloc.take_snapshot() 5 | import item_59_use_tracemalloc_waste_memory as waste_memory 6 | x = waste_memory.run() 7 | time2 = tracemalloc.take_snapshot() 8 | 9 | stats = time2.compare_to(time1, 'traceback') 10 | top = stats[0] 11 | print('\n'.join(top.traceback.format())) 12 | 13 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py", line 7 14 | # a.append(10 * 230 * i) 15 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_with_trace.py", line 6 16 | # x = waste_memory.run() --------------------------------------------------------------------------------