├── .idea
└── vcs.xml
├── README.md
├── item_01_version_of_python.py
├── item_02_PEP8Style.py
├── item_03_Difference_bytes_str_unicode.py
├── item_04_helper_function.py
├── item_05_slice_sequence.py
├── item_06_avoid_using.py
├── item_07_list_not_map_filter.py
├── item_08_no_more_than_2_expressions.py
├── item_09_generator_expressions.py
├── item_10_prefer_enumerate.py
├── item_11_use_zip.py
├── item_12_avoid_else.py
├── item_13_try_except_else_finally.py
├── item_14_prefer_exceptions.py
├── item_15_closure_variable_scope.py
├── item_16_address.txt
├── item_16_generators_instead_of_lists.py
├── item_17_be_defensive.py
├── item_17_my_numbers.txt
├── item_18_reduce_visual_noise.py
├── item_19_provide_optimal_behavior.py
├── item_20_use_none_and_docstrings.py
├── item_21_enforce_clarity.py
├── item_22_prefer_helper_classes.py
├── item_23_accepts_functions_4_interfaces.py
├── item_24_use_classmethod.py
├── item_25_init_parent_classes_with_super.py
├── item_26_when_use_multiple_inheritance.py
├── item_27_prefer_public_attributes.py
├── item_28_inherit_from_collections_abc.py
├── item_29_use_plain_attributes.py
├── item_30_consider_property.py
├── item_31_use_descriptors.py
├── item_32_use_getattr.py
├── item_33_validate_subclass.py
├── item_34_register_class_existence.py
├── item_35_annotate_class_attributes.py
├── item_36_use_subprocess.py
├── item_37_use_threads.py
├── item_38_use_lock.py
├── item_39_use_queue.py
├── item_40_consider_coroutines.py
├── item_41_consider_concurrent_futures.py
├── item_42_define_function_decorators.py
├── item_43_consider_contexlib.py
├── item_44_make_pickle_reliable.py
├── item_45_use_datetime.py
├── item_46_use_built_in_algorithm.py
├── item_47_use_decimal.py
├── item_48_community_built_modules.py
├── item_49_write_docstrings_4_everything.py
├── item_50_use_packages.py
├── item_51_define_a_root_exception.py
├── item_52_break_circular_dependencies.py
├── item_53_use_virtual_environments.py
├── item_54_consider_module_scoped_code.py
├── item_55_use_repr_strings.py
├── item_56_test_utils.py
├── item_56_unittest.py
├── item_56_utils.py
├── item_57_pdb.py
├── item_58_profile_before_optimizing.py
├── item_59_use_tracemalloc.py
├── item_59_use_tracemalloc_top_n.py
├── item_59_use_tracemalloc_using_gc.py
├── item_59_use_tracemalloc_waste_memory.py
└── item_59_use_tracemalloc_with_trace.py
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/item_01_version_of_python.py:
--------------------------------------------------------------------------------
1 | # Chapter 1: Pythonic thinking
2 |
3 |
4 | # The idioms of a programming language are defined by its users. Over the
5 | # years, the Python community has come to use the adjective Pythonic to
6 | # describe code that follows a particular style. The Pythonic style isn't
7 | # regimented or enforced by the compiler. It has emerged over time through
8 | # experience using the language and working with others. Python programmers
9 | # prefer to be explicit, to choose simple over complex, and to maximize
10 | # readability (type import this).
11 |
12 | # Programmers familiar with other language may try to write Python as if it's
13 | # C++, Java, or whatever know best. New programmers may still be getting
14 | # comfortable with the vast range of concepts expressible in Python. It's
15 | # important for everyone to know the best--the Pythonic--way to do the most
16 | # common things in Python. These patterns will affect every program you write.
17 |
18 |
19 | # Item 1: Know which version of python you're using
20 |
21 |
22 | # $ python --version
23 | # Python 2.7.12
24 | #
25 | # $ python3 --version
26 | # Python 3.5.2
27 |
28 |
29 | import sys
30 | print(sys.version_info)
31 | # sys.version_info(major=2, minor=7, micro=12, releaselevel='final', serial=0)
32 |
33 | print(sys.version)
34 | # 2.7.12 (default, Nov 19 2016, 06:48:10)
35 | # [GCC 5.4.0 20160609]
36 |
37 |
38 | # Things to Remember
39 |
40 | # 1. There are two major version of Python still in active use: Python 2 and
41 | # Python 3.
42 | # 2. There are multiple popular runtimes for Python: CPython, Jython,
43 | # IronPython, PyPy, etc.
44 | # 3. Be sure that the command-line for running Python on your system is the
45 | # version you expect it to be.
46 | # 4. Prefer Python 3 for your next project because that is the primary focus
47 | # of the Python community.
48 |
--------------------------------------------------------------------------------
/item_02_PEP8Style.py:
--------------------------------------------------------------------------------
1 | # Item 2: Follow the PEP 8 style guide
2 |
3 |
4 | # Whitespace: In Python, whitespace is syntactically significant. Python
5 | # programmers are especially sensitive to the effects of whitespace on
6 | # code clarity.
7 |
8 | # 1. Use spaces instead of tabs for indentation.
9 | # 2. Use four spaces for each level of syntactically significant indenting.
10 | # 3. Lines should be 79 characters in length or less.
11 | # 4. Continuations of long expressions onto additional lines should be
12 | # indented by four extra spaces from their normal indentation level.
13 | # 5. In a file, functions and classes should be separated by two blank lines.
14 | # 6. In a class, methods should be separated by one blank line.
15 | # 7. Don't put spaces around list indexes, function calls, or keyword
16 | # argument assignments.
17 | # 8. Put one-and only one-space before and after variable assignments.
18 |
19 |
20 | # Naming: PEP 8 suggests unique styles of naming for different part in the
21 | # language.
22 |
23 | # 1. Functions, variables, and attributs should be in lovercase_underscore
24 | # format.
25 | # 2. Protected instance attributes should be in _leading_underscore format.
26 | # 3. Private instance attributes should be in __double_leading_underscore
27 | # format.
28 | # 4. Classes and exceptions should be in CapitalizedWord format.
29 | # 5. Module-level constants should be in ALL_CAPS format.
30 | # 6. Instance methods in classes should use self as the name of the first
31 | # parameter (which refers to the object).
32 | # 7. Class methods should use cls as the name of the first parameter (which
33 | # refers to the class).
34 |
35 |
36 | # Expressions and Statements: The Zen of Python states: "There should be one-
37 | # and preferably only one-obvious way to do it."
38 |
39 | # 1. Use inline negation (if a is not b) instead of negative of positive
40 | # expressions (if not a is b)
41 | # 2. Don't check for empty value (like [] or '') by checking the length
42 | # (if len(somelist) == 0). Use if not somelist and assume empty values
43 | # implicitly evaluate to False.
44 | # 3. The same thing goes for non-empty values (like [1] or 'hi'). The statement
45 | # if somelist is implicitly True for non-empty values.
46 | # 4. Avoid single-line if statements, for and while loops, and except compound
47 | # statements. Spread these over multiple lines for clarity.
48 | # 5. Always put import statements as the top of a file.
49 | # 6. Always use absolute names for modules when importing them, not names
50 | # relative to the current module's own path. For example, to import the foo
51 | # module for the bar package, you should do from bar import foo, not just
52 | # import foo.
53 | # 7. If you must do relative imports, use the explicit syntax from . import foo.
54 | # 8. Imports should be in sections in the following order: standard library
55 | # modules, third-party modules, your own modules. Each subsection should
56 | # have imports in alphabetical order.
57 |
58 |
59 | # Things to Remember
60 |
61 | # 1. Always follow the PEP 8 style guide when writing Python code.
62 | # 2. Sharing a common style with the larger Python community facilitates
63 | # collaboration with others.
64 | # 3. Using a consistent style makes it easier to modify your own code later.
65 |
--------------------------------------------------------------------------------
/item_03_Difference_bytes_str_unicode.py:
--------------------------------------------------------------------------------
1 | # Item 3: Know the difference between bytes, str, and unicode
2 | import os
3 |
4 | # In Python 3, there are two types that represent sequences of characters:
5 | # bytes and str. Instances of bytes contain raw 8-bit values. Instances of
6 | # str contain Unicode characters.
7 |
8 | # In Python 2, there two types that represent sequences of characters: str and
9 | # unicode. In contrast to Python 3, instances of str contain raw 8-bit values.
10 | # Instances of unicode contain Unicode characters.
11 |
12 | # There are many ways to represent Unicode characters as binary data (raw
13 | # 8-bits values). The most common encoding in UTF-8. Importantly, str
14 | # instances in Python 3 and unicode instances in Python 2 do not have an
15 | # associated binary encoding. To convert Unicode characters to binary data,
16 | # you must use the encode method. To convert binary data to Unicode
17 | # characters, you must use the decode method.
18 |
19 | # When you're writing Python programs, it's important to do encoding and
20 | # decoding of Unicode at the furthest boundary of your interfaces. The core of
21 | # your program should use Unicode character types (str in Python 3, unicode in
22 | # Python 2) and should not assume any thing about character encodings. This
23 | # approach allows you to be very accepting of alternative text encodings
24 | # (such as Latin-1, Shift JIS, and Big5) while being strict about your output
25 | # text encoding (idealy, UTF-8).
26 |
27 | # The split between character types leads to two common situations in Python
28 | # code:
29 | # 1. You want to operate on raw 8-bit values that are UTF-8-encoded characters
30 | # (or some other encoding).
31 | # 2. You want to operate on Unicode characters that have no specific encoding.
32 |
33 | # You'll often need two helper functions to convert between these two cases
34 | # and and to ensure that the type of input values matches your code's
35 | # expectations.
36 |
37 | # In Python 3, you'll need one method that takes a str or bytes and always
38 | # returns a str.
39 |
40 |
41 | def to_str(bytes_or_str):
42 | if isinstance(bytes_or_str, bytes):
43 | value = bytes_or_str.decode('utf-8')
44 | else:
45 | value = bytes_or_str
46 | return value # Instance of str
47 |
48 |
49 | # You'll need another method that takes a str and bytes and always returns a
50 | # bytes.
51 |
52 |
53 | def to_bytes(bytes_or_str):
54 | if isinstance(bytes_or_str, str):
55 | value = bytes_or_str.encode('utf-8')
56 | else:
57 | value = bytes_or_str
58 | return value # Instance of str
59 |
60 |
61 | # In Python 2, you'll need one method that takes a str or unicode and always
62 | # returns a unicode.
63 |
64 |
65 | def to_unicode(unicode_or_str):
66 | if isinstance(unicode_or_str, str):
67 | value = unicode_or_str.decode('utf-8')
68 | else:
69 | value = unicode_or_str
70 | return value # Instance of unicode
71 |
72 |
73 | # You'll need another method that takes str or unicode and always returns a str.
74 |
75 |
76 | def to_str(unicode_or_str):
77 | if isinstance(unicode_or_str):
78 | value = unicode_or_str.encode('utf-8')
79 | else:
80 | value = unicode_or_str
81 | return value # Instance of str
82 |
83 |
84 | # There are two big gotchas when dealing with raw 8-bit values and Unicode
85 | # characters in Python.
86 |
87 | # The first issue is that in Python 2, unicode and str instances seem to be
88 | # the same type when a str only contains 7-bit ASCII characters.
89 | # 1. You can combine such a str and unicode together using the + operator.
90 | # 2. You can compare such str and unicode instances using equality and
91 | # inequality operators.
92 |
93 | # All of this behavior means that you can often pass a str or unicode instance
94 | # to a function expecting one or the other and things will just work (as long
95 | # as you're only dealing with 7-bit ASCII). In Python 3, bytes and str
96 | # instances are never equivalent-not even the empty string-so you must be more
97 | # deliberate about the types of character sequences that you're passing around.
98 |
99 | # The second issue is that in Python 3, operations involving file handles
100 | # (returned by the open built-in function) default to UTF-8 encoding. In
101 | # Python 2, file operations default to binary encoding. This causes surprising
102 | # failures, especially for programmers accustomed to Python 2.
103 |
104 | # For example, say you want to write some random binary data to a file. In
105 | # Python 2, this works. In Python 3, this breaks.
106 |
107 |
108 | with open('random.bin', 'w') as f:
109 | f.write("random")
110 | # f.write(os.urandom(10))
111 |
112 | # TypeError: write() argument must be str, not bytes
113 |
114 |
115 | # The cause of this exception is the new encoding argument for open that was
116 | # added in Python 3. This parameter defaults to 'utf-8'. That makes read and
117 | # write operations on file handles expect str instances containing Unicode
118 | # characters instead of bytes instances containing binary data.
119 |
120 | # To make this work properly, you must indicate that the data is being
121 | # opened in write binary mode ('wb') instead of write character mode ('w').
122 | # Here, I use open in a way that works correctly in Python 2 and Python 3:
123 |
124 |
125 | with open('random.bin', 'wb') as f:
126 | f.write(os.urandom(10))
127 |
128 |
129 | # This problem also exists for reading data from files. The solution is the
130 | # same: Indicate binary mode by using 'rb' instead of 'r' when opening a file.
131 |
132 |
133 | # Things to Remember
134 |
135 | # 1. In Python 3, bytes contains sequences of 8-bit values, str contains
136 | # sequences of Unicode characters. bytes and str instances can't be
137 | # used together with operators (like > or +).
138 | # 2. In Python 2, str contains sequences of 8-bit values, unicode contains
139 | # sequences of Unicode characters. str and unicode can be used together
140 | # with operators if the str only contains 7-bit ASCII characters.
141 | # 3. Use helper functions to ensure that the inputs you operate on are the
142 | # type of character sequence you expect (8-bit values, UTF-8 encoded
143 | # characters, Unicode characters, etc.)
144 | # 4. If you want to read or write binary data to/from a file, always open the
145 | # file using a binary mode (like 'rb' or 'wb').
146 |
--------------------------------------------------------------------------------
/item_04_helper_function.py:
--------------------------------------------------------------------------------
1 | # Item 4: Write helper functions instead of complex expressions
2 |
3 |
4 | # Python's pithy syntax makes it easy to write single-line expressions that
5 | # implement a lot of logic. For example, say you want to decode the query
6 | # string from a URL. Here, each query string parameter represents an integer
7 | # value:
8 |
9 |
10 | from urllib.parse import parse_qs
11 | my_values = parse_qs('red=5&blue=0&green=', keep_blank_values=True)
12 | print(repr(my_values))
13 | # $ python3 item_04_helper_function.py
14 | # {'red': ['5'], 'green': [''], 'blue': ['0']}
15 |
16 |
17 | # Some query string parameters may have multiple values, some may have single
18 | # values, some may be present but have blank values, and some may be missing
19 | # entirely. Using the get method on the result dictionary will return
20 | # different values in each circumstance.
21 |
22 |
23 | print("Red: ", my_values.get('red'))
24 | print("Green: ", my_values.get('green'))
25 | print("Opacity: ", my_values.get('opacity'))
26 | # $ python3 item_04_helper_function.py
27 | # Red: ['5']
28 | # Green: ['']
29 | # Opacity: None
30 |
31 |
32 | # It'd be nice if a default value of 0 was assigned when a parameter isn't
33 | # supplied or is blank. You might choose to do this with Boolean expressions
34 | # because it feels like this logic doesn't merit a whole if statement or
35 | # helper function quite yet.
36 |
37 | # Python's syntax makes this choice all too easy. The trick here is that the
38 | # empty string, the empty list, and zero all evaluate to False implicitly.
39 | # Thus, the expressions below will evaluate to the subexpression after the or
40 | # operator when the first subexpression is False.
41 |
42 |
43 | # For query string 'red=5&blue=0&green='
44 | red = my_values.get('red', [''])[0] or 0
45 | green = my_values.get('green', [''])[0] or 0
46 | opacity = my_values.get('opacity', [''])[0] or 0
47 | print("Red: %r" % red)
48 | print("Green: %r" % green)
49 | print("Opacity: %r" % opacity)
50 | # Red: '5'
51 | # Green: 0
52 | # Opacity: 0
53 |
54 |
55 | # The red case works because the key is present in the my_values dictionary.
56 | # The value is a list with one member: the string '5'. This string implicitly
57 | # evaluates to True, so red is assigned to the first part of the or
58 | # expression.
59 |
60 | # The green case works because the value in the my_values dictionary is a list
61 | # with one member: an empty string. The empty string implicitly evaluates to
62 | # False, causing the or expression to evaluate to 0.
63 |
64 | # The opacity case works because the value in the my_values dictionary is
65 | # missing altogether. The behavior of the get method is to return its second
66 | # argument if the key doesn't exist in the dictionary. The default value in
67 | # this case is a list with one member, an empty string. When opacity isn't
68 | # found in the dictionary, this code does exactly the same thing as the green
69 | # case.
70 |
71 | # However, this expression is difficult to read and it still doesn't do
72 | # everything you need. You'd also want to ensure that all the parameter values
73 | # are integers so you can use them in mathematical expressions. To do that,
74 | # you'd wrap each expression with the int built-in function to parse the
75 | # string as an integer.
76 |
77 |
78 | red = int(my_values.get('red', [''])[0] or 0)
79 | print('red: ', red)
80 | # red: 5
81 |
82 |
83 | # This is now extremely hard to read. There's so much visual noise. The code
84 | # isn't approachable. A new reader of the code would have to spend too much
85 | # time picking apart the expression to figure out what it actually does. Even
86 | # though it's nice to keep things short, it's not worth trying to fit this all
87 | # on one line.
88 |
89 | # Python 2.5 added if/else conditional-or ternary-expressions to make cases
90 | # like this clearer while keeping the code short.
91 |
92 |
93 | red = my_values.get('red', [''])
94 | red = int(red[0]) if red[0] else 0
95 | print('red: ', red)
96 | # red: 5
97 |
98 |
99 | # This is better. For less complicated situations, if/else conditional
100 | # expressions can make things very clear. But the example above is still not
101 | # as clear as the alternative of a full if/else statement over multiple lines.
102 | # Seeing all of the logic spread out like this makes the dense version seem
103 | # even more complex.
104 |
105 |
106 | green = my_values.get('green', [''])
107 | if green[0]:
108 | green = int(green[0])
109 | else:
110 | green = 0
111 |
112 |
113 | # Writing a helper function is the way to go, especially if you need to use
114 | # this logic repeatedly.
115 |
116 |
117 | def get_first_int(values, key, default=0):
118 | found = values.get(key, [''])
119 | if found[0]:
120 | found = int(found[0])
121 | else:
122 | found = default
123 | return found
124 |
125 |
126 | # The calling code is much clearer than complex expression using or and the
127 | # two-line version using the if/else expression.
128 |
129 |
130 | green = get_first_int(my_values, 'green')
131 | print('green: ', green)
132 | # green: 0
133 |
134 |
135 | # As soon as your expressions get complicated, it's time to consider
136 | # splitting them into smaller pieces and moving logic into helper functions.
137 | # What you gain in readability always outweighs what brevity may have
138 | # afforded you. Don't let Python's pithy syntax for complex expressions get
139 | # you into a mess like this.
140 |
141 |
142 | # Things to remember
143 |
144 | # 1. Python's syntax makes it all too easy to write single-line expressions
145 | # that are overly complicated and difficult to read.
146 | # 2. Move complex expressions into helper functions, especially if you need to
147 | # use the same logic repeatedly.
148 | # 3. The if/else expression provides a more readable alternative to using
149 | # Boolean operators like or and adn in expressions.
150 |
--------------------------------------------------------------------------------
/item_05_slice_sequence.py:
--------------------------------------------------------------------------------
1 | # Item 5: Know hot to slice sequences
2 |
3 |
4 | # Python includes syntax for slicing sequences into pieces. Slicing lets you
5 | # access a subset of a sequence's items with minimal effort. The simplest uses
6 | # for slicing are the built-in types list, str, and bytes. Slicing can be
7 | # extended to any Python class that implements the __getitem__ and __setitem__
8 | # special methods (see Item 28: Inherit form collections.abc for custom
9 | # container types).
10 |
11 | # The basic form of the slicing syntax is somelist[start:end], where start is
12 | # inclusive and end is exclusive.
13 |
14 |
15 | a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
16 | print('First four: ', a[:4])
17 | print('Last four: ', a[-4:])
18 | print('Middle two: ', a[3:-3])
19 | # First four: ['a', 'b', 'c', 'd']
20 | # Last four: ['e', 'f', 'g', 'h']
21 | # Middle two: ['d', 'e']
22 |
23 |
24 | # When slicing from the start of a list, you should leave out the zero index
25 | # to reduce visual noise.
26 |
27 |
28 | assert a[:5] == a[0:5]
29 |
30 |
31 | # When slicing to the end of a list, you should leave out the final index
32 | # because it's redundant.
33 |
34 |
35 | assert a[5:] == a[5:len(a)]
36 |
37 |
38 | # Using negative numbers for slicing is helpful for doing offsets relative
39 | # to the end of a list. All of these forms of slicing would be clear to a new
40 | # reader of your code. There are no surprises, and I encourage you to use
41 | # these variations.
42 |
43 |
44 | print(a[:])
45 | print(a[:5])
46 | print(a[:-1])
47 | print(a[4:])
48 | print(a[-3:])
49 | print(a[2:5])
50 | print(a[2:-1])
51 | print(a[-3:-1])
52 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
53 | # ['a', 'b', 'c', 'd', 'e']
54 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g']
55 | # ['e', 'f', 'g', 'h']
56 | # ['f', 'g', 'h']
57 | # ['c', 'd', 'e']
58 | # ['c', 'd', 'e', 'f', 'g']
59 | # ['f', 'g']
60 |
61 |
62 | # Slicing deals properly with start and end indexes that are beyond the
63 | # boundaries of the list. That makes it easy for your code to establish
64 | # a maximum length to consider for an input sequence.
65 |
66 |
67 | first_twenty_items = a[:20]
68 | last_twenty_items = a[-20:]
69 | print(first_twenty_items)
70 | print(last_twenty_items)
71 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
72 | # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
73 |
74 |
75 | # In contrast, accessing the same index directly causes an exception.
76 | # print(a[20])
77 | # IndexError: list index out of range
78 |
79 |
80 | # Note
81 | # Beware that indexing a list by a negative variable is one of the few
82 | # situations in which you can get surprising results from slicing. For
83 | # example, the expression somelist[-n:] will work fine when n is greater
84 | # than one (e.g. somelist[-3:]). However, when n is zero, the expression
85 | # somelist[-0:] will result in a copy of the original list.
86 |
87 |
88 | # The result of slicing a list is a whole new list. References to the objects
89 | # from the original list are maintained. Modifying the result of slicing won't
90 | # affect the original list.
91 |
92 |
93 | b = a[4:]
94 | print('Before: ', b)
95 | b[1] = 99
96 | print('After: ', b)
97 | print('No change: ', a)
98 | # Before: ['e', 'f', 'g', 'h']
99 | # After: ['e', 99, 'g', 'h']
100 | # No change: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
101 |
102 |
103 | # When used in assignments, slices will replace the specified range in the
104 | # original list. Unlike tuple assignments (like a, b = c[:2), the length of
105 | # slice assignments don't need to be the same. The values before and after
106 | # the assigned slice will be preserved. The list will grow or shrink to
107 | # accommodate the new values.
108 |
109 |
110 | print('Before: ', a)
111 | a[2:7] = [99, 22, 14]
112 | print('After: ', a)
113 | # Before: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
114 | # After: ['a', 'b', 99, 22, 14, 'h']
115 |
116 |
117 | # If you leave out both the start and the end indexes when slicing, you'll end
118 | # up with a copy of the original list.
119 |
120 |
121 | b = a[:]
122 | assert b == a and b is not a
123 |
124 |
125 | # if you assign a slice with no start or end indexes, you'll replace its
126 | # entire contents with a copy of what's referenced (instead of allocating a
127 | # new list).
128 |
129 |
130 | b = a
131 | print('Before: ', a)
132 | a[:] = [101, 102, 103]
133 | assert a is b
134 | print('After: ', a)
135 | # Before: ['a', 'b', 99, 22, 14, 'h']
136 | # After: [101, 102, 103]
137 |
138 |
139 | # Things to remember
140 |
141 | # 1. Avoid being verbose: Don't supply 0 for the start index or the length of
142 | # the sequence for the end index.
143 | # 2. Slicing is forgiving of start or end indexes that are out of bounds,
144 | # making it easy to express slices on the front or back boundaries of a
145 | # sequence (like a[:20] or a[-20:]).
146 | # 3. Assigning to a list slice will replace that range in the original
147 | # sequence with what's referenced even if their lengths are different.
148 |
--------------------------------------------------------------------------------
/item_06_avoid_using.py:
--------------------------------------------------------------------------------
1 | # Item 6: Avoid using start, end and stride in a single slice
2 |
3 |
4 | # In addition to basic slicing (see Item 5: Knowing how to slice sequences),
5 | # Python has special syntax for the stride of a slice in the form
6 | # somelist[start:end:stride]. This lets you take every n-th item when slicing
7 | # a sequence. For example, the stride makes it easy to group by even and odd
8 | # indexes in a list.
9 |
10 |
11 | a = ['red', 'orange', 'yellow', 'green', 'blue', 'purple']
12 | odds = a[::2]
13 | evens = a[1::2]
14 | print(odds)
15 | print(evens)
16 | # ['red', 'yellow', 'blue']
17 | # ['orange', 'green', 'purple']
18 |
19 |
20 | # The problem is that the stride syntax ofter cause unexpected behavior that
21 | # can introduce bugs. For example, a common Python trick for reversing a byte
22 | # string is to slice the string with a stride of -1.
23 |
24 |
25 | x = b'mongoose'
26 | y = x[::-1]
27 | print(y)
28 | # b'esoognom'
29 |
30 |
31 | # That works well for byte strings and ASCII characters, but it will break for
32 | # Unicode characters encoded as UTF-8 byte strings.
33 |
34 |
35 | w = '谢谢谢谢'
36 | # x = w.enocde('utf-8')
37 | # y = x[::-1]
38 | # z = y.decode('utf-8')
39 | # print(y)
40 | # print(z)
41 | # AttributeError: 'str' object has no attribute 'enocde'
42 |
43 |
44 | # Are negative strides besides -1 useful? Consider the following examples.
45 |
46 |
47 | a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
48 | print(a[::2])
49 | print(a[::-2])
50 | # ['a', 'c', 'e', 'g']
51 | # ['h', 'f', 'd', 'b']
52 |
53 |
54 | # Here, ::2 means select every second item starting at the beginning.
55 | # Trickier, ::-2 means select every second item starting at the end and moving
56 | # backwards.
57 |
58 |
59 | # What do you think 2::2 means? What about -2::-2 vs. -2:2:-2 vs. 2:2:-2?
60 | print(a[2::2])
61 | print(a[-2::-2])
62 | print(a[-2:2:-2])
63 | print(a[2:2:-2])
64 | # ['c', 'e', 'g']
65 | # ['g', 'e', 'c', 'a']
66 | # ['g', 'e']
67 | # []
68 |
69 |
70 | # The point is that the stride part of the slicing syntax can be extremely
71 | # confusing. Having three numbers within the brackets is hard enough to read
72 | # because of its density. Then it's not obvious when the start and end indexes
73 | # come into effect relative to the stride value, especially when stride is
74 | # negative.
75 |
76 |
77 | # To prevent problems, avoid using stride along with start and end indexes. If
78 | # you must use a stride, prefer making it a positive value and omit start and
79 | # end indexes. If you must use stride with start and end indexes, consider
80 | # using one assignment to stride and another to slice.
81 |
82 |
83 | b = a[::2]
84 | c = b[1:-1]
85 | print(b)
86 | print(c)
87 | # ['a', 'c', 'e', 'g']
88 | # ['c', 'e']
89 |
90 |
91 | # Slicing and then striding will create an extra shallow copy of the data.
92 | # The first operation should try to reduce the size of the resulting slice by
93 | # as much as possible. If your program can't afford the time or memory
94 | # required for two steps, consider using the itertools built-in module's
95 | # islice method (see Item 46: Use built-in algorithms and data structures),
96 | # which doesn't permit negative values for start, end or stride.
97 |
98 |
99 | # Things to remember
100 |
101 | # 1. Specifying start, end, and stride in a slice can be extremely confusing.
102 | # 2. Prefer using positive stride values in slices without start or end
103 | # indexes. Avoid negative stride values if possible.
104 | # 3. Avoid using start, end and stride together in a single slice. If you need
105 | # all three parameters, consider doing two assignments (one to slice,
106 | # another to stride) or using islice form itertools built-in module.
107 |
--------------------------------------------------------------------------------
/item_07_list_not_map_filter.py:
--------------------------------------------------------------------------------
1 | # Item 7: Use list comprehensions instead of map and filter
2 |
3 |
4 | # Python provides compact syntax for deriving one list from another. These
5 | # expressions are called list comprehensions. For example, say you want to
6 | # compute the square of each number in a list. You can do this by providing
7 | # the expression for your computation and the input sequence to loop over.
8 |
9 |
10 | a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
11 | squares = [x**2 for x in a]
12 | print(squares)
13 | # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
14 |
15 |
16 | # Unless you're applying a single-argument functions, list comprehensions are
17 | # clearer than map built-in function cases, map requires creating a lambda
18 | # function for the computation, which is visually noisy.
19 |
20 |
21 | squares = map(lambda x: x**2, a)
22 | print(squares)
23 | # Python 2
24 | # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
25 | # Python 3
26 | print(list(squares))
27 | # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
28 |
29 |
30 | # Unlike may, list comprehensions let you easily filter items from the input
31 | # list, removing corresponding outputs from the result. For example, say you
32 | # only want to compute the squares of the numbers that are divisible by 2.
33 | # Here, I do this by adding a conditional expression to the list
34 | # comprehension after the loop:
35 |
36 |
37 | even_squares = [x**2 for x in a if x % 2 == 0]
38 | print(even_squares)
39 | # [4, 16, 36, 64, 100]
40 |
41 |
42 | # The filter built-in function can be used along with map to achieve the same
43 | # outcome, but it is much harder to read.
44 |
45 |
46 | alt = map(lambda x: x**2, filter(lambda x: x % 2 == 0, a))
47 | assert even_squares == list(alt)
48 |
49 |
50 | # Dictionaries and sets have their own equivalents of list comprehensions.
51 | # These make it easy to create derivative data structures when writing
52 | # algorithms.
53 |
54 |
55 | chile_ranks = {'ghost': 1, 'habanero': 2, 'cayenne': 3}
56 | rank_dict = {rank: name for name, rank in chile_ranks.items()}
57 | chile_len_set = {len(name) for name in rank_dict.values()}
58 | print(rank_dict)
59 | print(chile_len_set)
60 | # {1: 'ghost', 2: 'habanero', 3: 'cayenne'}
61 | # {8, 5, 7}
62 |
63 |
64 | # Things to remember
65 |
66 | # 1. List comprehensions are clearer than the map and filter built-in
67 | # functions because they don't require extra lambda expressions.
68 | # 2. List comprehensions allow you easily skip items from the input list, a
69 | # behavior map doesn't support without help from filter.
70 | # 3. Dictionaries and sets also support comprehension expressions.
71 |
--------------------------------------------------------------------------------
/item_08_no_more_than_2_expressions.py:
--------------------------------------------------------------------------------
1 | # Item 8: Avoid more than two expressions in list comprehensions
2 |
3 |
4 | # Beyond basic usage (see Item 7: Use list comprehensions instead of map and
5 | # filter), list comprehensions also support multiple levels of looping. For
6 | # example, say you want to simplify a matrix (a list containing other lists)
7 | # into one flat list of all cells. Here, I do this with a list comprehension
8 | # by including two for expressions. These expressions run in the order
9 | # provided from left to right.
10 |
11 |
12 | matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
13 | flat = [x for row in matrix for x in row]
14 | print(flat)
15 | # [1, 2, 3, 4, 5, 6, 7, 8, 9]
16 |
17 |
18 | # The example above is simple, readable, and a reasonable usage of multiple
19 | # loops. Another reasonable usage of multiple loops is replicating the
20 | # two-level deep layout of the input list. For example, say you want to square
21 | # the value in each cell of a two-dimensional matrix. This expression is
22 | # noisier because of the extra [] characters, but it's still easy to read.
23 |
24 |
25 | squared = [[x**2 for x in row] for row in matrix]
26 | print(squared)
27 | # [[1, 4, 9], [16, 25, 36], [49, 64, 81]]
28 |
29 |
30 | # If this expression included another loop, the list comprehension would get
31 | # so long that you'd have to split it over multiple lines.
32 |
33 | my_lists = [
34 | [[1, 2, 3], [4, 5, 6]],
35 | # ...
36 | [[11, 22, 33], [44, 55, 66]]
37 | ]
38 | flat = [x for sublist1 in my_lists
39 | for sublist2 in sublist1
40 | for x in sublist2]
41 | print(flat)
42 | # [1, 2, 3, 4, 5, 6, 11, 22, 33, 44, 55, 66]
43 |
44 |
45 | # At this point, the multiline comprehension isn't much shorter thant the
46 | # alternative. Here, I produce the same using normal loop statements. The
47 | # indentation of this version makes the looping clearer than the list
48 | # comprehension.
49 |
50 |
51 | flat = []
52 | for sublist1 in my_lists:
53 | for sublist2 in sublist1:
54 | flat.extend(sublist2)
55 | print(flat)
56 | # [1, 2, 3, 4, 5, 6, 11, 22, 33, 44, 55, 66]
57 |
58 |
59 | # List comprehensions also support multiple if conditions. Multiple
60 | # conditions at the same loop level are an implicit and expression. For
61 | # example, say you want to filter a list of numbers to only even values
62 | # greater than four. These only list comprehensions are equivalent.
63 |
64 |
65 | a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
66 | b = [x for x in a if x > 4 if x % 2 == 0]
67 | c = [x for x in a if x > 4 and x % 2 == 0]
68 | print(b)
69 | print(c)
70 | # [6, 8, 10]
71 | # [6, 8, 10]
72 |
73 |
74 | # Conditions can be specified at each level of looping after the for
75 | # expression. For example, say you want to filter a matrix so the only cells
76 | # remaining are those divisible by 3 in rows that sum to 10 or higher.
77 | # Expressing this with list comprehensions is short, but extremely difficult
78 | # to read.
79 |
80 |
81 | matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
82 | filtered = [[x for x in row if x % 3 == 0]
83 | for row in matrix if sum(row) >= 10]
84 | print(filtered)
85 | # [[6], [9]]
86 |
87 |
88 | # Though this example is a bit convoluted, in practice you'll see situations
89 | # arise where such expressions seem like a good fit. I strongly encourage you
90 | # to avoid using list comprehensions that look like this. The resulting code
91 | # is very difficult for others to comprehend. What you save in the number of
92 | # lines doesn't outweigh the difficulties it could cause later.
93 |
94 | # The rule of thumb is to avoid using more than two expressions in a list
95 | # comprehension. This could be two conditions, two loops, or one condition
96 | # and one loop. As soon as it gets more complicated than that, you should
97 | # use normal if and for statements and write a helper function (see Item 16:
98 | # Consider generators instead of returning lists).
99 |
100 |
101 | # Things to remember
102 |
103 | # 1. List comprehensions support multiple levels of loops and multiple
104 | # conditions per loop level.
105 | # 2. List comprehensions with more than two expressions are very difficult to
106 | # read and should be avoided.
107 |
--------------------------------------------------------------------------------
/item_09_generator_expressions.py:
--------------------------------------------------------------------------------
1 | # Item 9: Consider generator expressions for large comprehensions
2 |
3 |
4 | # The problem with list comprehensions (see Item 7: Use list comprehensions
5 | # instead of map and filter) is that they may create a whole new list
6 | # containing one item for each value in the input sequence. This is fine for
7 | # small inputs, but for large inputs this could consume significant amounts of
8 | # memory and cause your program to crash.
9 |
10 | # For example, say you want to read a file and return the number of
11 | # characters on each line. Doing this with a list comprehension would require
12 | # holding the length of every line of the file in memory. If the file is
13 | # absolutely enormous or perhaps a never-ending network socket, list
14 | # comprehensions are problematic. Here, I use a list comprehension in a way
15 | # that can only handle small input values.
16 |
17 |
18 | value = [len(x) for x in open('item_09_generator_expressions.py')]
19 | print(value)
20 | # [66, 1, 1, 76, 70, 77, 79, 42, 1, 68, 78, 73, 69, 76, 43, 1, 1, 46, 12]
21 | print("line: %d, max length: %d\n" % (len(value), max(value)))
22 | # line: 39, max length: 79
23 |
24 |
25 | # To solve this, Python provides generator expressions, a generalization of
26 | # list comprehensions and generators. Generator expressions don't materialize
27 | # the whole output sequence when they're run. Instead, generator expressions
28 | # evaluate to an iterator that yields one item at a time form the expression.
29 |
30 | # A generator expression is created by putting list-comprehension-like syntax
31 | # between () characters. Here, I use a generator expression that is equivalent
32 | # to the code above. However, the generator expression immediately evaluates
33 | # to an iterator and doesn't make any forward progress.
34 |
35 |
36 | it = (len(x) for x in open('item_09_generator_expressions.py'))
37 | print(it)
38 | # at 0x7f5f396eaa40>
39 |
40 |
41 | # The returned iterator can be advanced one step at a time to produce the next
42 | # output from the generator expression as needed (using the next built-in
43 | # function). Your code can consume as much of the generator expression as you
44 | # want without risking a blowup in memory usage.
45 |
46 |
47 | print(next(it))
48 | print(next(it))
49 | # 66
50 | # 1
51 |
52 |
53 | # Another powerful outcome of generator expressions is that they can be
54 | # composed together. Here, I take the iterator returned by the generator
55 | # expression above and use it as the input for another generator expression.
56 |
57 |
58 | roots = ((x, x**0.5) for x in it)
59 | print(next(roots))
60 | print(next(roots))
61 | # (1, 1.0)
62 | # (76, 8.717797887081348)
63 |
64 |
65 | # Each time I advance this iterator, it will also advance the interior
66 | # iterator, creating a domino effect of looping, evaluating
67 | # conditional expressions, and passing around inputs and outputs.
68 |
69 |
70 | print(next(roots))
71 | # (70, 8.366600265340756)
72 |
73 |
74 | # Chaining generators like this executes very quickly in Python. When you're
75 | # looking for a way to compose functionality that's operating on a large
76 | # stream of input, generator expressions are the best tool for the job.
77 | # The only gotcha is that the iterators returned by generator expressions are
78 | # stateful, so you must be careful not to use them more than once (see Item
79 | # 17: Be defensive when iterating over arguments).
80 |
81 |
82 | # Things to remember
83 |
84 | # 1. List comprehensions can cause problems for large inputs by using too much
85 | # memory.
86 | # 2. Generator expressions avoid memory issues by producing outputs one at a
87 | # time as an iterator.
88 | # 3. Generator expressions can be composed by passing the iterator from one
89 | # generator expression into the for subexpression of another.
90 | # 4. Generator expressions execute very quickly when chained together.
91 |
--------------------------------------------------------------------------------
/item_10_prefer_enumerate.py:
--------------------------------------------------------------------------------
1 | # Item 10: Prefer enumerate over range
2 | import random
3 |
4 |
5 | # The range built-in function is useful for loops that iterate over a set of
6 | # integers.
7 |
8 | random_bits = 0
9 | for i in range(64):
10 | if random.randint(0, 1):
11 | random_bits |= 1 << i
12 |
13 |
14 | # When you have a data structure to iterate over, like a list of strings, you
15 | # can loop directly over the sequence.
16 |
17 |
18 | flavor_list = ['vanilla', 'chocolate', 'pecan', 'strawberry']
19 | for flavor in flavor_list:
20 | print('%s is delicious' % flavor)
21 | # vanilla is delicious
22 | # chocolate is delicious
23 | # pecan is delicious
24 | # strawberry is delicious
25 |
26 |
27 | # Often, you'll want to iterate over a list and also know the index of the
28 | # current item in the list. For example, say you want to print the ranking of
29 | # your favorite ice cream flavors. One way to do it is using range.
30 |
31 |
32 | for i in range(len(flavor_list)):
33 | flavor = flavor_list[i]
34 | print('%d: %s' % (i+1, flavor))
35 | # 1: vanilla
36 | # 2: chocolate
37 | # 3: pecan
38 | # 4: strawberry
39 |
40 |
41 | # This looks clumsy compared with the other examples of iterating over
42 | # flavor_list or range. You have to get the length of the list. You have to
43 | # index into the array. It's harder to read.
44 |
45 | # Python provides the enumerate built-in function for addressing this
46 | # situation. enumerate wraps any iterator with a lazy generator. This
47 | # generator yields pairs of the loop index and the next value from the
48 | # iterator. The resulting code is much clearer.
49 |
50 |
51 | for i, flavor in enumerate(flavor_list):
52 | print('%d: %s' % (i + 1, flavor))
53 | # 1: vanilla
54 | # 2: chocolate
55 | # 3: pecan
56 | # 4: strawberry
57 |
58 |
59 | # You can make this even shorter by specifying the number from which enumerate
60 | # should begin counting (1 in this case).
61 |
62 |
63 | for i, flavor in enumerate(flavor_list, 1):
64 | print('%d: %s' % (i, flavor))
65 | # 1: vanilla
66 | # 2: chocolate
67 | # 3: pecan
68 | # 4: strawberry
69 |
70 |
71 | # Things to remember
72 |
73 | # 1. enumerate provides concise syntax for looping over an iterator and
74 | # getting the index of each item from the iterator as you go.
75 | # 2. Prefer enumerate instead of looping over a range and indexing into a
76 | # sequence.
77 | # 3. You can supply a second parameter to enumerate to specify the number from
78 | # which to begin counting (zero is default).
79 |
--------------------------------------------------------------------------------
/item_11_use_zip.py:
--------------------------------------------------------------------------------
1 | # Item 11: Use zip to process iterators in parallel
2 |
3 |
4 | # Often in Python you find yourself with many lists of related objects. List
5 | # comprehensions make it easy to take a source list and get a derived list by
6 | # applying an expression (see Item 7: Use list comprehensions instead of map
7 | # and filter).
8 |
9 |
10 | names = ['Cecilia', 'Lise', 'Marie']
11 | letters = [len(n) for n in names]
12 |
13 |
14 | # The items in the derived list are related to the items in the source list by
15 | # their indexes. To iterate over both lists in parallel, you can iterate over
16 | # the length of the names source list.
17 |
18 |
19 | longest_name = None
20 | max_letters = 0
21 |
22 | for i in range(len(names)):
23 | count = letters[i]
24 | if count > max_letters:
25 | longest_name = names[i]
26 | max_letters = count
27 |
28 | print(longest_name)
29 | # Cecilia
30 |
31 |
32 | # The problem is that this whole loop statement is visually noisy. The indexes
33 | # into names and letters make the code hard to read. Indexing into the arrays
34 | # by the loop index i happens twice. Using enumerate (see Item 10: Prefer
35 | # enumerate over range) improves this slightly, but it's still not ideal.
36 |
37 |
38 | for i, name in enumerate(names):
39 | count = letters[i]
40 | if count > max_letters:
41 | longest_name = name
42 | max_letters = count
43 |
44 |
45 | # To make this code clearer, Python provides the zip built-in function. In
46 | # Python 3, zip wraps two or more iterators with a lazy generator. The zip
47 | # generator yields tuples containing the next value from each iterator. The
48 | # resulting code is much cleaner that indexing into multiple lists.
49 |
50 |
51 | for name, count in zip(names, letters):
52 | if count > max_letters:
53 | longest_name = name
54 | max_letters = count
55 |
56 |
57 | # There are two problems with the zip built-in.
58 |
59 | # The first issue is that in Python 2 zip is not a generator; it will fully
60 | # exhaust the supplied iterators and return a list of all the tuples it
61 | # creates. This could potentially use a lot of memory and cause your program
62 | # to crash. If you want to zip very large iterators in Python 2, you should
63 | # use izip from the itertools built-in module (see Item 46: Use built-in
64 | # algorithms and data structures).
65 |
66 | # The second issue is that zip behaves strangely if the input iterators are of
67 | # different lengths. For example, say you add other name to the list above but
68 | # forget to update the letter counts. Running zip on the two input lists will
69 | # have an unexpected result.
70 |
71 |
72 | names.append('Rosalind')
73 | for name, count in zip(names, letters):
74 | print(name)
75 | # Cecilia
76 | # Lise
77 | # Marie
78 |
79 |
80 | # The new item for 'Rosalind' isn't there. This is just now zip works. It
81 | # keeps yielding tuples until a wrapped iterator is exhausted. This approach
82 | # works fine when you know that the iterators are of the same length, which is
83 | # often the case for derived lists created by list comprehensions. In many
84 | # other cases, the truncating behavior of zip is surprising and bad. If you
85 | # aren't confident that the lengths of the list you want to zip are equal,
86 | # consider using the zip_longest function from itertools built-in module
87 | # instead (also called izip_longest in Python 2).
88 |
89 |
90 | # Things to remember
91 |
92 | # 1. The zip built-in function can be used to iterate over multiple iterators
93 | # in parallel.
94 | # 2. In Python 3, zip is a lazy generator that produces tuples. In Python 2,
95 | # zip returns the full result as a list of tuples.
96 | # 3. zip truncates its outputs silently if you supply it with iterators of
97 | # different lengths.
98 | # 4. The zip_longest function from the itertools built-in module lets you
99 | # iterate over multiple iterators in parallel regardless of their
100 | # lengths (see Item 46: Use built-in algorithms and data structures).
101 |
--------------------------------------------------------------------------------
/item_12_avoid_else.py:
--------------------------------------------------------------------------------
1 | # Item 12: Avoid else blocks after for and while loops
2 |
3 |
4 | # Python loops have an extra feature that is not available in most other
5 | # programming language: you can put an else block immediately after a loop's
6 | # repeated interior block.
7 |
8 |
9 | for i in range(3):
10 | print('Loop %d' % i)
11 | else:
12 | print('Else block!')
13 | # Loop 0
14 | # Loop 1
15 | # Loop 2
16 | # Else block!
17 |
18 |
19 | # Surprisingly, the else block runs immediately after the loop finishes. Why
20 | # is the clause called "else"? Why not "and"? In an if/else statement, else
21 | # means, "Do this if the block before this doesn't happen." In a try/except
22 | # statement, except has the definition: "Do this if trying the block before
23 | # this failed."
24 |
25 |
26 | # Similarly, else from try/except/else follows this pattern (see item 13: Take
27 | # advantage of each block in try/except/else/finally) because it means, "Do
28 | # this if the block before did not fail". try/finally is also intuitive
29 | # because it means, "Always do what is final after trying the block before.
30 |
31 | # Given all of the uses of else, except, and finally in Python, a new
32 | # programmer might assume that the else part of for/else means, "Do this if
33 | # the loop wan't completed". In reality, it does exactly the opposite. Using
34 | # a break statement in a loop will actually skip the else block.
35 |
36 |
37 | for i in range(3):
38 | print('Loop %d' % i)
39 | if i == 1:
40 | break
41 | else:
42 | print('Else block!')
43 | # Loop 0
44 | # Loop 1
45 |
46 |
47 | # Another surprise is that the else block will run immediately if you loop
48 | # over an empty sequence.
49 |
50 |
51 | for x in []:
52 | print('Never runs')
53 | else:
54 | print('For Else block!')
55 | # For Else block!
56 |
57 |
58 | # The else block also runs when while loops are initially false.
59 |
60 |
61 | while False:
62 | print('Never runs!')
63 | else:
64 | print('While Else block!')
65 | # While Else block!
66 |
67 |
68 | # The rationale for these behaviors is that else blocks after loops are useful
69 | # when you're using loops to search for something. For example, say you want
70 | # to determine whether two numbers are coprime (their only common divisor is
71 | # 1). Here, I iterate through every possible common divisor and test the
72 | # numbers. After every option has been tried, the loop ends. The else block
73 | # runs when the numbers are coprime because the loop doesn't encounter a
74 | # break.
75 |
76 |
77 | a = 4
78 | b = 9
79 | for i in range(2, min(a, b) + 1):
80 | print('Testing', i)
81 | if a % i == 0 and b % i == 0:
82 | print('Not coprime')
83 | break
84 | else:
85 | print('Coprime')
86 | # Testing 2
87 | # Testing 3
88 | # Testing 4
89 | # Coprime
90 |
91 |
92 | # In practice, you wouldn't write the code this way. Instead, you'd write a
93 | # helper function to do the calculation. Such a helper function is writen in
94 | # two common styles.
95 |
96 | # The first approach is to return early when you find the condition you're
97 | # looking for. You return the default outcome if you fall through the loop.
98 |
99 |
100 | def coprime(a, b):
101 | for i in range(2, min(a, b) + 1):
102 | if a % i == 0 and b % i == 0:
103 | return False
104 | return True
105 |
106 |
107 | # The second way is to have a result variable that indicates whether you've
108 | # found what you're looking for in the loop. You break out of the loop as soon
109 | # as you find something.
110 |
111 |
112 | def coprime2(a, b):
113 | is_coprime = True
114 | for i in range(2, min(a, b) + 1):
115 | if a % i == 0 and b % i == 0:
116 | is_coprime = False
117 | break
118 | return is_coprime
119 |
120 |
121 | # Both of these approaches are so much clearer to readers of unfamiliar code.
122 | # The expressively you gain from the else block doesn't outweigh the burden
123 | # you put on people (including yourself) who want to understand your code in
124 | # the future. Simple constructs like loops should be self-evident in Python.
125 | # You should avoid using else blocks after loops entirely.
126 |
127 |
128 | # Things to remember
129 |
130 | # 1. Python has special syntax that allows else blocks to immediately follow
131 | # for and while loop interior blocks.
132 | # 2. The else block after a loop only runs if the loop body did not encounter
133 | # a break statement.
134 | # 3. Avoid using else blocks after loops because their behavior isn't
135 | # intuitive and can be confusing.
136 |
--------------------------------------------------------------------------------
/item_13_try_except_else_finally.py:
--------------------------------------------------------------------------------
1 | # Item 13: Take advantage of each block in try/except/else/finally
2 | import json
3 |
4 |
5 | # There are four distinct times that you may want to take action during
6 | # exception handling in Python. These are captured in the functionality of
7 | # try, except, else, and finally blocks. Each block serves a unique purpose in
8 | # the compound statement, and their various combinations are useful (see Item
9 | # 51).
10 |
11 |
12 | # Finally Blocks
13 |
14 | # Use try/finally when you want exceptions to propagate up, but you also want
15 | # to run cleanup code when exceptions occur. One common usage to try/finally
16 | # is reliably closing file handles (see Item 43: "Consider contextlib and with
17 | # statements for reusable try/finally behavior" for another approach).
18 |
19 |
20 | handle = open('item_13_try_except_else_finally.py') # May raise IOError
21 | # handle = open('item_13_try_except_else_finally_.py') # May raise IOError
22 | # FileNotFoundError: [Errno 2] No such file or directory:
23 | # 'item_13_try_except_else_finally_.py'
24 | try:
25 | data = handle.read() # May raise UnicodeDecodeError
26 | finally:
27 | handle.close() # Always runs after try:
28 |
29 |
30 | # Any exception raised by the read method will always propagate up to the
31 | # calling code, yet the close method of handle is also guaranteed to run in
32 | # the finally block.
33 |
34 |
35 | # Else Blocks
36 |
37 | # Use try/except/else to make it clear with exceptions will be handled try
38 | # your code and which exceptions will propagate up. When the try block doesn't
39 | # raise an exception, the else block will run. The else block helps you
40 | # minimize the amount of code in the try block and improves readability. For
41 | # example, say you want to load JSON dictionary data from a string and return
42 | # the value of a key it contains.
43 |
44 |
45 | def load_json_key(data, key):
46 | try:
47 | result_dict = json.loads(data) # May raise ValueError
48 | except ValueError as e:
49 | raise KeyError from e
50 | else:
51 | return result_dict[key] # May raise KeyError
52 |
53 |
54 | # If the data isn't valid JSON, then decoding with json.load will raise a
55 | # ValueError. The exception is caught by the except block and handled. If
56 | # decoding is successful, then the key lookup will occur in the else block. if
57 | # the key lookup raises any exceptions, they will propagate up to the caller
58 | # because they are outside the try block. The else clause ensures that what
59 | # follows the try/except is visually distinguished from the except block. This
60 | # makes the exception propagation behavior clear.
61 |
62 |
63 | # Everything together
64 |
65 | # Use try/except/else/finally when you want to do it all in one compound
66 | # statement. For example, say you want to read a description of work to do
67 | # from a file, process it, and then update the file in place. Here, the try
68 | # block is used to read the file and process it. The except block is used to
69 | # handle exceptions from the try block that are expected. The else block is
70 | # used to update the file in place and to allow realted exceptions to
71 | # propagated up. The finally block cleans up the file handle.
72 |
73 |
74 | UNDEFINED = object()
75 |
76 |
77 | def divide_json(path):
78 | handle = open(path, 'r+') # May raise IOError
79 | try:
80 | data = handle.read() # May raise UnicodeDecodeError
81 | op = json.loads(data) # May raise ValueError
82 | value = ( # May raise ZeroDivisionError
83 | op['numerator']/op['denominator'])
84 | except ZeroDivisionError as e:
85 | return UNDEFINED
86 | else:
87 | op['result'] = value
88 | result = json.dumps(op)
89 | handle.seek(0)
90 | handle.write(result) # May raise IOError
91 | return value
92 | finally:
93 | handle.close() # Always runs
94 |
95 |
96 | # This layout is especially useful because all of the blocks work together in
97 | # intuitive ways. For example, if an exception gets raised in the else block
98 | # while rewriting the result data, the finally block will still run and close
99 | # the file handle.
100 |
101 |
102 | # Things to remember
103 |
104 | # 1. The try/finally compound statement lets you run cleanup code regardless
105 | # of whether exceptions were raised in the try block.
106 | # 2. The else block helps you minimize the amount of code in try blocks and
107 | # visually distinguish the success case from the try/except blocks.
108 | # 3. An else block can be used to perform additional actions after a
109 | # successful try block but before common cleanup in a finally block.
110 |
--------------------------------------------------------------------------------
/item_14_prefer_exceptions.py:
--------------------------------------------------------------------------------
1 | # Chapter 2: Functions
2 |
3 |
4 | # The first organizational tool programmers use in Python is the function. As
5 | # in other programming language, functions enable you to break large programs
6 | # into smaller pieces. They improve read-ability and make code more
7 | # approachable. They allow for reuse and refactoring.
8 |
9 | # Functions in Python have a variety of extra features that make the
10 | # programmer's life easier. Some are similar to capabilities in other
11 | # programming languages, but many are unique to Python. These extras can
12 | # eliminate noise and clarify the intention of callers. They can significantly
13 | # reduce subtle bugs that are difficult to find.
14 |
15 |
16 | # Item 14: Prefer exceptions to returning None
17 |
18 |
19 | # When writing utility functions, there's a draw for Python programmers to
20 | # give special meaning to the return value of None. It seems to makes sense
21 | # in some cases. For example, say you want a helper function that divides one
22 | # number by another. In the case of dividing by zero, returning None seems
23 | # natural because the results is undefined.
24 |
25 |
26 | def divide(a, b):
27 | try:
28 | return a / b
29 | except ZeroDivisionError:
30 | return None
31 |
32 |
33 | # Code using this function can interpret the return value accordingly.
34 |
35 |
36 | x, y = 1, 2
37 | # Result is not None
38 | # x, y = 1, 0
39 | # Invalid inputs
40 | result = divide(x, y)
41 | if result is None:
42 | print('Invalid inputs')
43 | else:
44 | print('Result is not None')
45 |
46 |
47 | # What happens when the numerator is zero? That will cause the return value
48 | # to be zero (if the denominator is non-zero). This can cause problems when
49 | # you evaluate the result in a condition like an if statement. You may
50 | # accidentally look for any False equivalent value to indicate errors instead
51 | # of only looking for None (see Item 4: "What helper functions instead of
52 | # complex expressions" for a similar situation).
53 |
54 |
55 | x, y = 0, 5
56 | result = divide(x, y)
57 | if not result:
58 | print('Invalid inputs') # This is wrong!
59 | # Invalid inputs
60 |
61 |
62 | # This is a common mistake in Python code when None has special meaning. This
63 | # is why returning None from a function is error prone. There are two ways to
64 | # reduce the chance of such error.
65 |
66 | # The first way is to split the return value into a two-tuple. The first part
67 | # of the tuple indicates that the operation was a success or failure. The
68 | # second part is the actual result that was computed.
69 |
70 |
71 | def divide(a, b):
72 | try:
73 | return True, a / b
74 | except ZeroDivisionError:
75 | return False, None
76 |
77 |
78 | # Caller of this function have to unpack the tuple. That forces them to
79 | # consider the status part of the tuple instead of just looking at the
80 | # result of division.
81 |
82 |
83 | success, result = divide(x, y)
84 | if not success:
85 | print('Invalid inputs')
86 | else:
87 | print('Success')
88 | # Success
89 |
90 |
91 | # The problem is that callers can easily ignore the first part of the tuple
92 | # (using the underscore variable name, a Python convention for unused
93 | # variables). The resulting code doesn't look wrong at first glance. This
94 | # is as bad as just returning None.
95 |
96 |
97 | _, result = divide(x, y)
98 | if not result:
99 | print('Invalid inputs')
100 | else:
101 | print('Get result')
102 | # Invalid inputs
103 |
104 |
105 | # The second better way to reduce these errors is to never return None at all.
106 | # Instead, raise an exception up to the caller and make them deal with it.
107 | # Here, I turn a ZeroDivisionError into a ValueError to indicate to the caller
108 | # the input values are bad:
109 |
110 |
111 | def divide(a, b):
112 | try:
113 | return a / b
114 | except ZeroDivisionError as e:
115 | raise ValueError('Invalid inputs') from e
116 |
117 |
118 | # Now the caller should handle the exception for the invalid input case (this
119 | # behavior should be documented; see Item 49: "Write docstrings for every
120 | # function, class and module"). The caller no longer requires a condition on
121 | # the return value of the function. If the function didn't raise an exception,
122 | # then the return value must be good. The outcome of exception handing is
123 | # clear.
124 |
125 |
126 | x, y = 5, 2
127 | try:
128 | result = divide(x, y)
129 | except ValueError:
130 | print('Invalid inputs')
131 | else:
132 | print('Result is %.2f' % result)
133 | # Result is 2.50
134 |
135 |
136 | # Things to remember
137 |
138 | # 1. Functions that return None to indicate special meaning are error prone
139 | # because None and other values (e.g., zero, the empty string) all
140 | # evaluate to False in conditional expressions.
141 | # 2. Raise exceptions to indicate special situations instead of returning
142 | # None. Expect the calling code to handle exceptions properly when they
143 | # are documented.
144 |
--------------------------------------------------------------------------------
/item_16_address.txt:
--------------------------------------------------------------------------------
1 | Four score and seven years ago...
--------------------------------------------------------------------------------
/item_16_generators_instead_of_lists.py:
--------------------------------------------------------------------------------
1 | # Item 16: Consider generators instead of returning lists
2 | import itertools
3 |
4 |
5 | # The simple choice for functions that produce a sequence of results is to
6 | # return a list of items For example, say you want to find the index of every
7 | # word in string. Here, I accumulate results in a list using the append method
8 | # and return it at the end of the function:
9 |
10 |
11 | def index_words(text):
12 | result = []
13 | if next:
14 | result.append(0)
15 | for index, letter in enumerate(text):
16 | if letter == ' ':
17 | result.append(index + 1)
18 | return result
19 |
20 |
21 | # This words as expected for some sample input.
22 |
23 |
24 | address = 'Four score and seven years ago...'
25 | result = index_words(address)
26 | print(result[:3])
27 | # [0, 5, 11]
28 |
29 |
30 | # There are two problems with index_words function.
31 |
32 | # The first problem is that the code is a bit dense and noisy. Each time a
33 | # new result is found, I call the append method. The method call's bulk (
34 | # result.append) deemphasizes the value being added to the list (index + 1).
35 | # There is one line for creating the result list and another for returning it.
36 | # While the function body contains ~130 characters (without whitespace), only
37 | # ~75 characters are important.
38 |
39 | # A better way to write this function is using a generator. Generators are
40 | # functions that use yield expressions. When called, generator functions do
41 | # not actually run but instead immediately return an iterator. With each call
42 | # to the next built-in function, the iterator will advance the generator to
43 | # its next yield expression. Each value passed to yield by the generator will
44 | # be returned by the iterator to the caller.
45 |
46 | # Here, I define a generator function that produces the same results as
47 | # before:
48 |
49 |
50 | def index_words_iter(text):
51 | if text:
52 | yield 0
53 | for index, letter in enumerate(text):
54 | if letter == ' ':
55 | yield index + 1
56 |
57 |
58 | # It's significantly easier to read because all interactions with the result
59 | # list have been eliminated. Results are passed to yield expressions instead.
60 | # The iterator returned by the generator call can easily be converted to a
61 | # list by passing it to the list built-in function (see Item 9: "Consider
62 | # generator expressions for large comprehensions" for how this works).
63 |
64 |
65 | result = list(index_words(address))
66 | print(result)
67 | # [0, 5, 11, 15, 21, 27]
68 |
69 |
70 | # The second problem with index_words is that it requires all results to be
71 | # stored in the list before being returned. For huge inputs, this can cause
72 | # your program to return out of memory and crash. In contrast, a generator
73 | # version of this function can easily be adapted to take inputs of arbitrary
74 | # length.
75 |
76 | # Here, I define a generator that streams input from a file one line at a time
77 | # and yields outputs one word at a time. The working memory for this function
78 | # is bounded to the maximum length of one line of input.
79 |
80 |
81 | def index_file(handle):
82 | offset = 0
83 | for line in handle:
84 | if line:
85 | yield offset
86 | for letter in line:
87 | offset += 1
88 | if letter == ' ':
89 | yield offset
90 |
91 |
92 | # Running the generator produces the same results.
93 |
94 |
95 | with open('item_16_address.txt', 'r') as f:
96 | it = index_file(f)
97 | results = itertools.islice(it, 0, 3)
98 | print(list(results))
99 | # [0, 5, 11]
100 |
101 |
102 | # The only gotcha of defining generators like this is that the callers must be
103 | # aware that the iterators returned are stateful and can't be reused (see
104 | # Item 17: "Be defensive when iterating over arguments").
105 |
106 |
107 | # Things to remember
108 |
109 | # 1. Using generators can be clearer than the alternative of returning lists
110 | # of accumulated results.
111 | # 2. The iterator returned by a generator produces the set of values passed to
112 | # yield expressions within the generator function's body.
113 | # 3. Generators can produce a sequence of outputs for arbitrarily large inputs
114 | # because their working memory doesn't include all inputs and outputs.
115 |
--------------------------------------------------------------------------------
/item_17_my_numbers.txt:
--------------------------------------------------------------------------------
1 | 15
2 | 35
3 | 80
--------------------------------------------------------------------------------
/item_18_reduce_visual_noise.py:
--------------------------------------------------------------------------------
1 | # Item 18: Reduce visual noise with variable positional arguments
2 |
3 |
4 | # Accepting optional positional arguments (often called star args in reference
5 | # to the conventional name for the parameter, *args) can make a function call
6 | # more clear and remove visual noise.
7 |
8 | # For example, say you want to log some debug information. With a fixed
9 | # number of arguments, you would need a function that takes a message and a
10 | # list of values.
11 |
12 |
13 | def log(message, values):
14 | if not values:
15 | print(message)
16 | else:
17 | valuse_str = ', '.join(str(x) for x in values)
18 | print('%s: %s' % (message, valuse_str))
19 |
20 | log("My numbers are", [1, 2])
21 | log("Hi there", [])
22 | # My numbers are: 1, 2
23 | # Hi there
24 |
25 | # Having to pass an empty list when you have no values to log is cumbersome
26 | # and noise. It'd be better to leave out the second argument entirely. You can
27 | # do this in Python by prefixing the last positional parameter with *. The
28 | # first parameter for the log message is required, whereas any number of
29 | # subsequent positional arguments are optional. The function body doesn't
30 | # need to change, only the callers do.
31 |
32 |
33 | def log(message, *values): # The only difference
34 | if not values:
35 | print(message)
36 | else:
37 | valuse_str = ', '.join(str(x) for x in values)
38 | print('%s: %s' % (message, valuse_str))
39 |
40 | log("My numbers are", 1, 2)
41 | log("Hi there") # Much better
42 | # My numbers are: 1, 2
43 | # Hi there
44 |
45 |
46 | # If you already have a list and want to call a variable argument function
47 | # like log, you can do this by using the * operator. This instructs Python to
48 | # pass items from the sequence as positional arguments.
49 |
50 |
51 | favorites = [7, 33, 99]
52 | log('Favorite colors', *favorites)
53 | # Favorite colors: 7, 33, 99
54 |
55 |
56 | # There are two problems wit accepting a variable number of positional
57 | # arguments.
58 |
59 |
60 | # 1. The first issue is that the variable arguments are always turned into
61 | # a tuple before they are passed to your function. This means that if the
62 | # caller of your function uses the * operator on a generator, it will be
63 | # iterated until it's exhausted. The resulting tuple will include every value
64 | # from the generator, which could consume a lot of memory and cause your
65 | # program to crash.
66 |
67 |
68 | def my_generator():
69 | for i in range(10):
70 | yield i
71 |
72 |
73 | def my_func(*args):
74 | print(args)
75 |
76 |
77 | it = my_generator()
78 | my_func(*it)
79 | # (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
80 |
81 |
82 | # Function that accept *args are best for situations where you know the number
83 | # of inputs in the argument list will be reasonably small. It's ideal for
84 | # function calls that pass many literals or variable names together. It's
85 | # primarily for the convenience of the programmer and the readability of the
86 | # code.
87 |
88 | # 2. The second issue with *args is that you can't add new positional
89 | # arguments to your function in the future without migrating every caller. If
90 | # you try to add a positional argument in the front of the argument list,
91 | # existing callers will subtly break if they aren't updated.
92 |
93 |
94 | def log(sequence, message, *values):
95 | if not values:
96 | print('%s: %s' % (sequence, message))
97 | else:
98 | values_str = ', '.join(str(x) for x in values)
99 | print('%s: %s: %s' % (sequence, message, values_str))
100 |
101 | log(1, 'Favorites', 7, 33) # New usage is OK
102 | log('Favorite numbers', 7, 33) # Old usage breaks
103 | # 1: Favorites: 7, 33
104 | # Favorite numbers: 7: 33
105 |
106 |
107 | # The problem here is that the second call to log used 7 as the message
108 | # parameter because a sequence argument wasn't given. Bugs like this are
109 | # hard to track down because the code still runs without raising any
110 | # exceptions. To avoid this possibility entirely, you should use
111 | # key-word-only arguments when you want to extend functions that accept *args
112 | # (see Item 21: "Enforce clarity with keyword-only arguments").
113 |
114 |
115 | # Things to remember
116 |
117 | # 1. Functions can accept a variable number of positional arguments by using
118 | # *args in the def statement.
119 | # 2. You can use the items from a sequence as the positional arguments for a
120 | # function with the * operator.
121 | # 3. Using the * operator with a generator may cause your program to run out
122 | # of memory and crash.
123 | # 4. Adding new positional parameters to functions that accept *args can
124 | # introduce hard-to-find bugs.
125 |
--------------------------------------------------------------------------------
/item_19_provide_optimal_behavior.py:
--------------------------------------------------------------------------------
1 | # Item 19: Provide optimal behavior with keyword arguments
2 |
3 |
4 | # Like most other programming languages, calling a function in Python allows
5 | # for passing arguments by position.
6 |
7 |
8 | def remainder(number, divisor):
9 | return number % divisor
10 |
11 |
12 | assert remainder(20, 7) == 6
13 |
14 |
15 | # All positional arguments to Python functions can also be passed by keyword,
16 | # where the name of the argument is used in an assignment within the
17 | # parentheses of a function call. The keyword arguments can be passed in any
18 | # order as long as all of the required positional arguments are specified.
19 | # You can mix and match keyword and positional arguments. There calls are
20 | # equivalent.
21 |
22 |
23 | print(remainder(20, 7))
24 | print(remainder(20, divisor=7))
25 | print(remainder(number=20, divisor=7))
26 | print(remainder(divisor=7, number=20))
27 | # 6
28 | # 6
29 | # 6
30 | # 6
31 |
32 |
33 | # Positional arguments must be specified before keyword arguments.
34 |
35 |
36 | # remainder(number=20, 7)
37 | # line 36
38 | # remainder(number=20, 7)
39 | # SyntaxError: non-keyword arg after keyword arg
40 |
41 |
42 | # Each argument can only be specified noce.
43 |
44 |
45 | # remainder(20, number=7)
46 | # line 45, in
47 | # remainder(20, number=7)
48 | # TypeError: remainder() got multiple values for keyword argument 'number'
49 |
50 |
51 | # The flexibility of keyword arguments provides three significant benefits.
52 |
53 | # The first advantage is that keyword arguments make the function call clearer
54 | # to new reader of the code. With the call remainder(20, 7), it's not evident
55 | # looking at the implementation of the remainder method. In the call with
56 | # keyword arguments, number=20 and divisor=7 make it immediately obvious which
57 | # parameter is being used for each purpose.
58 |
59 | # The second impact of arguments is that they can have default values
60 | # specified in the function definition. This allows a function to provide
61 | # additional capabilities when you need them but lets you accept the default
62 | # behavior most of the time. This can eliminate repetitive code and reduce
63 | # noise.
64 |
65 | # For example, say you want to compute the rate of fluid flowing into a vat.
66 | # If the vat is also on a scale, then you could use the difference between two
67 | # weight measurements at two different times to determine the flow rate.
68 |
69 |
70 | def flow_rate(weight_diff, time_diff):
71 | return weight_diff / time_diff
72 |
73 | weight_diff = 0.5
74 | time_diff = 3
75 | flow = flow_rate(weight_diff, time_diff)
76 | print('%.3f kg per second' % flow)
77 | # 0.167 kg per second
78 |
79 |
80 | # In the typical case, it's useful to know the flow rate in kilograms per
81 | # second. Other times, it'd be helpful to use the last sensor measurements
82 | # to larger time scales, like hours or days. You can provide this behavior
83 | # in the same function by adding an argument for the time period scaling
84 | # factor.
85 |
86 |
87 | def flow_rate(weight_diff, time_diff, period):
88 | return (weight_diff / time_diff) * period
89 |
90 |
91 | # The problem is that now you need to specify the period argument every time
92 | # you call the function, even in the common case of flow rate per second (
93 | # where the period is 1).
94 |
95 |
96 | flow_per_second = flow_rate(weight_diff, time_diff, 1)
97 |
98 |
99 | # To make this less noisy, I can give the period arguments a default value.
100 |
101 |
102 | def flow_rate(weight_diff, time_diff, period=1):
103 | return (weight_diff / time_diff) * period
104 |
105 |
106 | # The period argument is now optional.
107 |
108 |
109 | flow_per_second = flow_rate(weight_diff, time_diff)
110 | flow_per_hour = flow_rate(weight_diff, time_diff, period=3600)
111 | print(flow_per_second)
112 | print(flow_per_hour)
113 | # 0.166666666667
114 | # 600.0
115 |
116 |
117 | # This works well for simple default values (it gets tricky for complex
118 | # default values--see Item 20: "Use None and Docstrings to specify dynamic
119 | # default arguments").
120 |
121 | # The third reason to use keyword arguments is that they provide a powerful
122 | # way to extend a function's parameters while remaining backwards compatible
123 | # with existing callers. This lets you provide additional functionality
124 | # without having to migrate a lot of code, reducing the chance of introducing
125 | # bugs.
126 |
127 |
128 | def flow_rate(weight_diff, time_diff, period=1, units_per_kg=1):
129 | return ((weight_diff / units_per_kg) / time_diff) * period
130 |
131 |
132 | # The default argument value for units_per_kg is 1, which makes the return
133 | # weight units remain as kilograms. This means that all existing callers will
134 | # see no change in behavior. New callers to flow_rate can specify the new
135 | # keyword argument to see the new behavior.
136 |
137 |
138 | pounds_per_hour = flow_rate(
139 | weight_diff, time_diff, period=3600, units_per_kg=2.2)
140 | print(pounds_per_hour)
141 | # 272.727272727
142 |
143 |
144 | # The only problem with this approach is that optional keyword arguments like
145 | # period and units_per_kg may still be specified as positional arguments.
146 |
147 |
148 | pounds_per_hour = flow_rate(weight_diff, time_diff, 3600, 2.2)
149 | print(pounds_per_hour)
150 | # 272.727272727
151 |
152 |
153 | # Supplying optional arguments positionally can be confusing because it isn't
154 | # clear that the values 3600 and 2.2 correspond to. The best practice is to
155 | # always specify optional arguments using the keyword names and never pass
156 | # them as positional arguments.
157 |
158 | # Note:
159 | # Backwards compatibility using optional keyword arguments like this is
160 | # crucial for functions that accept *args (see Item 18: "Reduce visual noise
161 | # with variable positional arguments"). But a even better practice is to use
162 | # keyword-only arguments (see Item 21: "Enforce clarity with keyword-only
163 | # arguments").
164 |
165 |
166 | # Things to remember
167 |
168 | # 1. Function arguments can be specified by position or by keyword.
169 | # 2. Keywords make it clear what the purpose of each arguments is when it
170 | # would be confusing with only positional arguments.
171 | # 3. Keywords arguments with default values make it easy to add new behaviors
172 | # to a function, especially when the function has existing callers.
173 | # 4. Optional keyword arguments should always be passed by keyword instead of
174 | # by position.
175 |
--------------------------------------------------------------------------------
/item_20_use_none_and_docstrings.py:
--------------------------------------------------------------------------------
1 | # Item 20: Use None and Docstrings to specify dynamic default arguments
2 | import datetime
3 | import time
4 | import json
5 |
6 |
7 | # Sometimes you need to use a non-static type as a keyword arguments's default
8 | # value. For example, say you want to print logging messages that are marked
9 | # with the time of the logged event. In the default case, you want the message
10 | # to include the time when the function was called. You might try the
11 | # following approach, assuming the default arguments are reevaluated each time
12 | # the function is called.
13 |
14 |
15 | def log(message, when=datetime.datetime.now()):
16 | print('%s: %s' % (when, message))
17 |
18 | log('Hi there!')
19 | time.sleep(0.1)
20 | log('Hi again!')
21 | # 2017-02-23 18:27:27.045710: Hi there!
22 | # 2017-02-23 18:27:27.045710: Hi again!
23 |
24 |
25 | # The timestamps are the same because datetime.now is only executed a single
26 | # time: a single time: when the function is defined. Default argument values
27 | # are evaluated only once per module load, which usually happens when a
28 | # program starts up. After the module containing this code is loaded, the
29 | # datetime.now default argument will never be evaluated again.
30 |
31 |
32 | # The convention for achieving the desired result in Python is to provide a
33 | # default value of None and to document the actual behavior in the docstring
34 | # (see Item 49: "Write Docstrings for every function, class, and module").
35 | # When your code sees an argument value of None, you allocate the default
36 | # value accordingly.
37 |
38 |
39 | def log(message, when=None):
40 | """Log a message with a timestamp.
41 | Args:
42 | message: Message to print.
43 | when: datetime of when the message occurred. Defaults to the present
44 | time.
45 | """
46 | when = datetime.datetime.now() if when is None else when
47 | print('%s: %s' % (when, message))
48 |
49 |
50 | # Now the timestamps will be different.
51 |
52 |
53 | log('Hi there!')
54 | time.sleep(0.1)
55 | log('Hi again!')
56 | # 2017-02-23 18:38:27.510581: Hi there!
57 | # 2017-02-23 18:38:27.610755: Hi again!
58 |
59 |
60 | # Using None for default argument values is especially important when the
61 | # arguments are mutable. For example, say you want to load a value encoded as
62 | # JSON data. If decoding the data fails, you want an empty dictionary to be
63 | # returned by default. You might try this approach.
64 |
65 |
66 | def decode(data, default={}):
67 | try:
68 | return json.loads(data)
69 | except ValueError:
70 | return default
71 |
72 |
73 | # The problem here is the same as the datetime.now example above. The
74 | # dictionary specified for default will be shared by all calls to decode
75 | # because default argument values are only evaluated once (at module load
76 | # time). This can cause extremely surprising behavior.
77 |
78 |
79 | foo = decode('bad data')
80 | foo['stuff'] = 5
81 | bar = decode('also bad')
82 | bar['meep'] = 1
83 | print('Foo:', foo)
84 | print('Bar:', bar)
85 | # Foo: {'stuff': 5, 'meep': 1}
86 | # Bar: {'stuff': 5, 'meep': 1}
87 |
88 |
89 | # You'd expect two different dictionaries, each with a single key and value.
90 | # But modifying one seems to also modify the other. The culprit is that foo
91 | # and bar are both equal to the default parameter. They are the same
92 | # dictionary object.
93 |
94 |
95 | assert foo is bar
96 |
97 |
98 | # The fix is to set the keyword argument default value to None and then
99 | # document the behavior in the function's docstring.
100 |
101 |
102 | def decode(data, default=None):
103 | """Load JSON data from a string.
104 | Args:
105 | data: JSON data to decode.
106 | default: Value to return if decoding fails. Defaults to an empty
107 | dictionary.
108 | """
109 | if default is None:
110 | default = {}
111 | try:
112 | return json.loads(data)
113 | except ValueError:
114 | return default
115 |
116 |
117 | # Now running the same data test code as before produces the expected result.
118 |
119 |
120 | foo = decode('bad data')
121 | foo['stuff'] = 5
122 | bar = decode('also bad')
123 | bar['meep'] = 1
124 | print('Foo:', foo)
125 | print('Bar:', bar)
126 | # Foo: {'stuff': 5}
127 | # Bar: {'meep': 1}
128 |
129 |
130 | # Things to remember
131 |
132 | # 1. Default arguments are only evaluated once: during function definition at
133 | # module load time. This can cause odd behavior for dynamic values
134 | # (like{} or []).
135 | # 2. Use None as the default value for value for keyword arguments that have a
136 | # dynamic value. Document the actual default behavior in the function's
137 | # docstring.
138 |
--------------------------------------------------------------------------------
/item_21_enforce_clarity.py:
--------------------------------------------------------------------------------
1 | # Item 21: Enforce clarity with key-word only arguments
2 |
3 |
4 | # Passing arguments by keyword is a powerful feature of Python functions (see
5 | # Item 19: "Provide optimal behavior with keyword arguments"). The flexibility
6 | # of keyword arguments enables you to write code that will be clear for your
7 | # use cases.
8 |
9 | # For example, say you want to divide one number by another but be very
10 | # careful about special cases. Sometimes you want to ignore ZeroDivisionError
11 | # exceptions and return infinity instead. Other times, you want to ignore
12 | # OverflowError exceptions and return Zero instead.
13 |
14 |
15 | def safe_division(number, divisor, ignore_overflow, ignore_zero_division):
16 | try:
17 | return number / divisor
18 | except OverflowError:
19 | if ignore_overflow:
20 | return 0
21 | else:
22 | raise
23 | except ZeroDivisionError:
24 | if ignore_zero_division:
25 | return float('inf')
26 | else:
27 | raise
28 |
29 |
30 | # Using this function is straightforward. This call will ignore the float
31 | # overflow from division and will return zero.
32 |
33 |
34 | result = safe_division(1, 100**500, True, False)
35 | print(result)
36 | # 0.0
37 |
38 |
39 | # This call will ignore the error from dividing by zero and will return
40 | # infinity.
41 |
42 |
43 | result = safe_division(1, 0, False, True)
44 | print(result)
45 | # inf
46 |
47 |
48 | # The problem is that it's easy to confuse the position of the two Boolean
49 | # arguments that control the exception-ignoring behavior. This can easily
50 | # cause bugs that are hard to track down. One way to improve the readability
51 | # of this code is to use keyword arguments. By default, the function can be
52 | # overly cautions and can always re-raise exceptions.
53 |
54 |
55 | def safe_division_b(number, divisor,
56 | ignore_overflow=False,
57 | ignore_zero_division=False):
58 | try:
59 | return number / divisor
60 | except OverflowError:
61 | if ignore_overflow:
62 | return 0
63 | else:
64 | raise
65 | except ZeroDivisionError:
66 | if ignore_zero_division:
67 | return float('inf')
68 | else:
69 | raise
70 |
71 |
72 | # Then callers can use keyword arguments to specify which of the ignore flags
73 | # they want to flip for specific operations, overriding the default behavior.
74 |
75 |
76 | print(safe_division_b(1, 10**500, ignore_overflow=True))
77 | print(safe_division_b(1, 0, ignore_zero_division=True))
78 | # 0.0
79 | # inf
80 |
81 |
82 | # The problem is, since these keyword arguments are optional behavior, there's
83 | # nothing forcing callers of your functions to use keyword arguments for
84 | # clarity. Even with the new definition of safe_division_b, you can will still
85 | # call it the old way with positional arguments.
86 |
87 |
88 | print(safe_division_b(1, 10**500, True, False))
89 | # 0.0
90 |
91 |
92 | # With complex functions like this, it's better to require that callers are
93 | # clear about their intentions. In Python 3, you can demand clarity by
94 | # defining your functions with keyword-only arguments. These arguments can
95 | # only be supplied by keyword, never by position.
96 |
97 | # Here, I redefine the safe_division function to accept keyword-only
98 | # arguments. The * symbol in the argument list indicates the end of positional
99 | # arguments and the beginning of the keyword-only arguments.
100 |
101 |
102 | def safe_division_c(number, divisor, *,
103 | ignore_overflow=False,
104 | ignore_zero_division=False):
105 | try:
106 | return number / divisor
107 | except OverflowError:
108 | if ignore_overflow:
109 | return 0
110 | else:
111 | raise
112 | except ZeroDivisionError:
113 | if ignore_zero_division:
114 | return float('inf')
115 | else:
116 | raise
117 |
118 |
119 | # Now, calling the function with positional arguments for the keyword argument
120 | # won't work.
121 |
122 |
123 | # result = safe_division_c(1, 10**500, True, False)
124 | # line 123, in
125 | # result = safe_division_c(1, 10**500, True, False)
126 | # TypeError: safe_division_c() takes 2 positional arguments but 4 were given
127 |
128 |
129 | # Keyword arguments and their default values work as expected.
130 |
131 |
132 | result = safe_division_c(1, 0, ignore_zero_division=True) # OK
133 | print(result)
134 | # inf
135 |
136 | try:
137 | result = safe_division_c(1, 0)
138 | print(result)
139 | except ZeroDivisionError:
140 | print("Exception ZeroDivisionError")
141 | pass # Expected
142 | # Exception ZeroDivisionError
143 |
144 |
145 | # Keyword-only arguments in Python 2
146 |
147 | # Unfortunately, Python 2 doesn't have explicit syntax for specifying
148 | # keyword-only arguments like Python 3. But you can achieve the same behavior
149 | # of raising TypeErrors for invalid function calls by using the ** operator in
150 | # in argument list. The ** operator is similar to the * operator (see Item 18:
151 | # "Reduce visual noise with variable positional arguments"), except that
152 | # instead of accepting a variable number of positional arguments, it accepts
153 | # any number of keyword arguments, even when they're not defined.
154 |
155 |
156 | # Python 2
157 | def print_args(*args, **kwargs):
158 | print('Positional:', args)
159 | print('Keyword: ', kwargs)
160 |
161 | print_args(1, 2, foo='bar', stuff='meep')
162 | # ('Positional:', (1, 2))
163 | # ('Keyword: ', {'foo': 'bar', 'stuff': 'meep'})
164 |
165 |
166 | # To make safe_division take keyword-only arguments in Python 2, you have the
167 | # function accept **kwargs. Then you pop keyword arguments that you expect out
168 | # of the kwargs dictionary, using the pop method's second argument to specify
169 | # the default value when the key is messing. Finally, you makere sure there are
170 | # no more keyword arguments left in kwargs to prevent callers from supplying
171 | # arguments that are invalid.
172 |
173 |
174 | # Python 2
175 | def safe_division_d(number, divisor, **kwargs):
176 | ignore_overflow = kwargs.pop('ignore_overflow', False)
177 | ignore_zero_div = kwargs.pop('ignore_zero_division', False)
178 | if kwargs:
179 | raise TypeError('Unexpected **kwargs: %r' % kwargs)
180 |
181 | try:
182 | return number / divisor
183 | except OverflowError:
184 | if ignore_overflow:
185 | return 0
186 | else:
187 | raise
188 | except ZeroDivisionError:
189 | if ignore_zero_div:
190 | return float('inf')
191 | else:
192 | raise
193 |
194 |
195 | # Now you can call the function with or without keyword arguments.
196 |
197 |
198 | print(safe_division_d(1, 10.0))
199 | print(safe_division_d(1, 0, ignore_zero_division=True))
200 | print(safe_division_d(1, 10**500, ignore_overflow=True))
201 | # 0.1
202 | # inf
203 | # 0.0
204 |
205 |
206 | # Trying to pass keyword-only arguments by position won't work, just like in Python 3.
207 |
208 |
209 | # safe_division_d(1, 0, False, True)
210 | # line 209, in
211 | # safe_division_d(1, 0, False, True)
212 | # TypeError: safe_division_d() takes 2 positional arguments but 4 were given
213 |
214 |
215 | # Trying to pass unexpected keyword arguments also won't work.
216 |
217 |
218 | safe_division_d(0, 0, unexpected=True)
219 | # line 179, in safe_division_d
220 | # raise TypeError('Unexpected **kwargs: %r' % kwargs)
221 | # TypeError: Unexpected **kwargs: {'unexpected': True}
222 |
223 |
224 | # Things to remember
225 |
226 | # 1. Keyword arguments make the intention of a function call more clear.
227 | # 2. Use keyword-only arguments to force callers to supply keyword arguments
228 | # for potentially confusing functions, especially those that accept
229 | # multiple Boolean flags.
230 | # 3. Python 3 supports explicit syntax for keyword-only arguments in
231 | # functions.
232 | # 4. Python 2 can emulate keyword-only arguments for functions by using
233 | # **kwargs and manually raising TypeError exceptions.
234 |
--------------------------------------------------------------------------------
/item_23_accepts_functions_4_interfaces.py:
--------------------------------------------------------------------------------
1 | # Item 23: Accept functions for simple interfaces instead of classes
2 | from collections import defaultdict
3 |
4 |
5 | # Many of Python' built-in APIs allow you to customize behavior by passing
6 | # in a function. These hooks are used by APIs to call back your code while
7 | # they execute. For example, the list type's sort method takes an optional key
8 | # argument that's used to determine each index's value for sorting. Here, I
9 | # sort a list of names based on their lengths by providing a lambda expression
10 | # as the key hook:
11 |
12 |
13 | names = ['Socrates', 'Archimedes', 'Plato', 'Aristotle']
14 | names.sort(key=lambda x: len(x))
15 | print(names)
16 | # ['Plato', 'Socrates', 'Aristotle', 'Archimedes']
17 |
18 |
19 | # In other language, you might expect hooks to be defined by an abstract
20 | # class. In Python, many hooks are just stateless functions with well-
21 | # defined arguments and return values. Functions are ideal for hooks because
22 | # they are easier to describe and simpler to define than classes. Functions
23 | # work as hooks because Python has first-class functions: Functions and
24 | # methods can be passed around and referenced like any other value in the
25 | # language.
26 |
27 | # For example, say you want to customize the behavior of the defaultdict class
28 | # (see Item 46: "Use built-in algorithms and data structures" for details).
29 | # This data structure allows you to supply a function that will be called each
30 | # time a missing key is accessed. The function must return the default value
31 | # the missing key should have in the dictionary. Here, I define a hook that
32 | # logs each time a key is missing and return 0 for the default value:
33 |
34 |
35 | def log_missing():
36 | print('Key added')
37 | return
38 |
39 |
40 | # Given an initial dictionary and a set of desired increments, I can cause the
41 | # log_missing function to run and print twice (for 'red' and 'orange').
42 |
43 |
44 | current = {'green': 12, 'blue': 3}
45 | increments = [
46 | ('red', 5),
47 | ('blue', 17),
48 | ('orange', 9),
49 | ]
50 | result = defaultdict(log_missing, current)
51 | print('Before:', dict(result))
52 | for key, amount in increments:
53 | # result[key] += amount
54 | result[key] = amount
55 | print('After:', dict(result))
56 | # line 53, in
57 | # result[key] += amount
58 | # TypeError: unsupported operand type(s) for +=: 'NoneType' and 'int'
59 | # Before: {'blue': 3, 'green': 12}
60 | # After: {'blue': 17, 'green': 12, 'red': 5, 'orange': 9}
61 |
62 |
63 | # Supplying functions like log_missing makes APIs easy to build and test
64 | # because it separates side effects from deterministic behavior. For example,
65 | # say you now want the default value hook passed to defaultdict to count the
66 | # total number of keys that were missing. One way to achieve this is using
67 | # a stateful closure (see Item 15: "Know how to closures interact with
68 | # variable scope" for details). Here, I define a helper function that uses
69 | # such a closure as the default value hook:
70 |
71 |
72 | def increment_with_report(current, increments):
73 | added_count = 0
74 |
75 | def missing():
76 | nonlocal added_count # Stateful closure
77 | added_count += 1
78 | return 0
79 |
80 | result = defaultdict(missing, current)
81 | for key, amount in increments:
82 | result[key] += amount
83 |
84 | return result, added_count
85 |
86 |
87 | # Running this function produces the expected results (2), even though the
88 | # defaultdict has no idea that the missing hook maintains state. This is
89 | # another benefit of accepting simple functions for interfaces. It's easy to
90 | # add functionality later by hiding state in a closure.
91 |
92 |
93 | result, count = increment_with_report(current, increments)
94 | assert count == 2
95 | print('After:', dict(result))
96 | # After: {'orange': 9, 'blue': 20, 'green': 12, 'red': 5}
97 |
98 |
99 | # The problem with defining a closure for stateful hooks is that it's harder
100 | # to read than the stateless function example. Another approach is to define
101 | # a small class that encapsulates the state you want to track.
102 |
103 |
104 | class CountMissing(object):
105 | def __init__(self):
106 | self.added = 0
107 |
108 | def missing(self):
109 | self.added += 1
110 | return 0
111 |
112 |
113 | # In other languages, you might expect that now defaultdict would have to be
114 | # modified to accommodate the interface of CountMissing. But in Python, thanks
115 | # to first-class functions, you can reference the CountMissing.missing method
116 | # directly on an object and pass it to defaultdict as the default value hook.
117 | # it's trivial to have a method satisfy a function interface.
118 |
119 |
120 | counter = CountMissing()
121 | result = defaultdict(counter.missing, current) # Method ref
122 |
123 | for key, amount in increments:
124 | result[key] += amount
125 | assert counter.added == 2
126 | print('After:', dict(result))
127 | # After: {'orange': 9, 'blue': 20, 'green': 12, 'red': 5}
128 |
129 |
130 | # Using a helper class like this to provide the behavior of a stateful closure
131 | # is clearer than increment_with_report function above. However, in isolation
132 | # it's still not immediately obvious that the purpose of the CountMissing
133 | # class is. Who constructs a CountMissing object? Who calls the missing
134 | # method? Will the class need other public methods to be added in the future?
135 | # Until you see its usage with defaultdict, the class is a mystery.
136 |
137 | # To clarify this situation, Python allows classes to define the __call__
138 | # special method, __call__ allows an object to be called just like a function.
139 | # It also causes the callable built-in function to return True for such an
140 | # instance.
141 |
142 |
143 | class BetterCountMissing(object):
144 | def __init__(self):
145 | self.added = 0
146 |
147 | def __call__(self):
148 | self.added += 1
149 | return 0
150 |
151 | counter = BetterCountMissing()
152 | counter()
153 | assert callable(counter)
154 |
155 |
156 | # Here, I use a BetterCountMissing instance as the default value hook for a
157 | # defaultdict to track the number of missing keys that were added:
158 |
159 |
160 | counter = BetterCountMissing()
161 | result = defaultdict(counter, current) # Relies on __call__
162 | for key, amount in increments:
163 | result[key] += amount
164 | assert counter.added == 2
165 | print('After:', dict(result))
166 | # After: {'orange': 9, 'blue': 20, 'green': 12, 'red': 5}
167 |
168 |
169 | # This is much clearer than the CountMissing.missing example. The __call__
170 | # method indicates that a class's instances will be used somewhere a function
171 | # argument would also be suitable (like API hooks). It directs new readers of
172 | # the code to the entry point that's responsible for the class's primary
173 | # behavior. It provides a strong hint that the goal of the class is to act as
174 | # a stateful closure.
175 |
176 | # Best of all, defaultdict still has no view into what's going on when you
177 | # use __call__. All that defaultdict requires is a function for the default
178 | # value hook. Python provides many different ways to satisfy a simple function
179 | # interface depending on what you need to accomplish.
180 |
181 |
182 | # Things to remember
183 |
184 | # 1. Instead of defining and instantiating classes, functions are often all
185 | # you need for simple interfaces between components in Python.
186 | # 2. References to functions and methods in Python are first class, meaning
187 | # they can be used in expressions like any other type.
188 | # 3. The __call__ special method enables instances of a class to be called
189 | # like plain Python functions.
190 | # 4. When you need a function to maintain state, consider defining a class
191 | # that provides the __call__ method instead of defining a stateful closure
192 | # (see Item 15: "Know how closures interact with variable scope").
193 |
--------------------------------------------------------------------------------
/item_25_init_parent_classes_with_super.py:
--------------------------------------------------------------------------------
1 | # Item 25: Initialize parent classes with super
2 | from pprint import pprint
3 |
4 |
5 | # The old way to initialize a parent class from a child class is to directly
6 | # call the parent class's __init__ method with the child instance.
7 |
8 |
9 | class MyBaseClass(object):
10 | def __init__(self, value):
11 | self.value = value
12 |
13 |
14 | class MyChildClass(MyBaseClass):
15 | def __init__(self):
16 | MyBaseClass.__init__(self, 5)
17 |
18 |
19 | # This approach works fine for simple hierarchies but breaks down in many
20 | # cases.
21 |
22 | # If your class is affected by multiple inheritance (something to avoid in
23 | # general; see Item 26: "Use multiple inheritance only for mix-in utility
24 | # classes"), calling the superclass' __init__ methods directly can lead to
25 | # unpredictable behavior. One problem is that the __init__ call order isn't
26 | # specified across all subclass. For example, here I define two parent classes
27 | # that operate on the instance's value field:
28 |
29 |
30 | class TimesTwo(object):
31 | def __init__(self):
32 | self.value *= 2
33 |
34 |
35 | class PlusFive(object):
36 | def __init__(self):
37 | self.value += 5
38 |
39 |
40 | # This class defines its parent classes in one ordering.
41 |
42 |
43 | class OneWay(MyBaseClass, TimesTwo, PlusFive):
44 | def __init__(self, value):
45 | MyBaseClass.__init__(self, value)
46 | TimesTwo.__init__(self)
47 | PlusFive.__init__(self)
48 |
49 |
50 | # And constructing it produces a result that matches the parent class ordering.
51 |
52 |
53 | foo = OneWay(5)
54 | print("First ordering is (5*2)+5=", foo.value)
55 | # First ordering is (5*2)+5= 15
56 |
57 |
58 | # Here's another class that defines the same parent classes but in a different
59 | # ordering:
60 |
61 |
62 | class AnotherWay(MyBaseClass, PlusFive, TimesTwo):
63 | def __init__(self, value):
64 | MyBaseClass.__init__(self, value)
65 | TimesTwo.__init__(self)
66 | PlusFive.__init__(self)
67 |
68 |
69 | # However, I left the calls to the parent class constructors PlusFive.__init__
70 | # and TimesTwo.__init__ in the same order as before, causing this class's
71 | # behavior not to match the order of the parent classes in its definition.
72 |
73 |
74 | bar = AnotherWay(5)
75 | print("Second ordering still is ", bar.value)
76 | # Second ordering still is 15
77 |
78 |
79 | # Another problem occurs with diamond inheritance. Diamond inheritance happens
80 | # when a subclass inherits from two separate classes that have the same
81 | # superclass superclass somewhere in the hierarchy. Diamond inheritance causes
82 | # the common superclass's __init__ method to run multiple times, causing
83 | # unexpected behavior. For example, here I define two child classes that
84 | # inherit from MyBaseClass.
85 |
86 |
87 | class TimesFive(MyBaseClass):
88 | def __init__(self, value):
89 | MyBaseClass.__init__(self, value)
90 | self.value *= 5
91 |
92 |
93 | class PlusTwo(MyBaseClass):
94 | def __init__(self, value):
95 | MyBaseClass.__init__(self, value)
96 | self.value += 2
97 |
98 |
99 | # Then, I define a child class that inherit from both of these classes, making
100 | # MyBaseClass the top of the diamond.
101 |
102 |
103 | class ThisWay(TimesFive, PlusTwo):
104 | def __init__(self, value):
105 | TimesFive.__init__(self, value)
106 | PlusTwo.__init__(self, value)
107 |
108 |
109 | foo = ThisWay(5)
110 | print("Should be (5*5)+2 = 27 but is ", foo.value)
111 | # Should be (5*5)+2 = 27 but is 7
112 |
113 |
114 | # The output should be 27 because (5*5)+2 = 27. But the call to the second
115 | # parent class's constructor, PlusTwo.__init__, causes self.value to be reset
116 | # back to 5 when MyBaseClass.__init__ gets a second time.
117 |
118 | # To solve these problems, Python 2.2 added the super built-in function and
119 | # defined the method resolution order (MRO). The MRO standardizes which
120 | # superclasses are initialized before others (e.g. depth-first,
121 | # left-to-right). It also ensures that common superclass in diamond
122 | # hierarchies are only run once.
123 |
124 | # Here, I create a diamond-shaped class hierarchy again, but this time I use
125 | # super (in Python 2 style) to initialize the parent class:
126 |
127 |
128 | # Python 2
129 | class TimesFiveCorrect(MyBaseClass):
130 | def __init__(self, value):
131 | super(TimesFiveCorrect, self).__init__(value)
132 | self.value *= 5
133 |
134 |
135 | class PlusTwoCorrect(MyBaseClass):
136 | def __init__(self, value):
137 | super(PlusTwoCorrect, self).__init__(value)
138 | self.value += 2
139 |
140 |
141 | # Now the top part of the diamond, MyBaseClas.__init__, is only run a single
142 | # time. The other parent classes are run in the order specified in the class
143 | # statement.
144 |
145 |
146 | # Python 2
147 | class GoodWay(TimesFiveCorrect, PlusTwoCorrect):
148 | def __init__(self, value):
149 | super(GoodWay, self).__init__(value)
150 |
151 |
152 | foo = GoodWay(5)
153 | print("Should be 5*(5+2) = 35 and is ", foo.value)
154 | # Should be 5*(5+2) = 35 and is 35
155 |
156 |
157 | # This order may seem backwards at first. Shouldn't TimesFiveCorrect.__init__
158 | # have run first? Shouldn't the result be (5*5)+2 = 27? The answer is no.
159 | # This ordering matches what the MRO defines for this class. The MRO ordering
160 | # available on a class method called mro.
161 |
162 |
163 | pprint(GoodWay.mro())
164 | # [,
165 | # ,
166 | # ,
167 | # ,
168 | # ]
169 |
170 |
171 | # When I call Goodway(5), it in turn calls TimesFiveCorrect.__init__, which
172 | # calls PlusTwoCorrect.__init__, which calls MyBaseClass.__init__. Once this
173 | # reaches the top of the diamond, then all of the initialization method
174 | # actually do their work in the opposite order from how their __init__
175 | # functions were called. MyBaseClass.__init__ assigns the value to 5.
176 | # PlusTwoCorrect.__init__ adds 2 to make value equal 7.
177 | # TimesFiveCorrect.__init__ multiple it by 5 to make value equal 35.
178 |
179 | # The super built-in function works well, but it still has two noticeable
180 | # problems in Python 2:
181 |
182 | # - Its syntax is a bit verbose. You have to specify the class you're in,
183 | # the self object, the method name (usually __init__), and all the
184 | # arguments. This construction can be confusing to new Python programmers.
185 |
186 | # - You have to specify the current class by name in the call to super. If you
187 | # ever change the class's name--a very common activity when improving a
188 | # class hierarchy--you also need to update every call to super.
189 |
190 | # Thankfully, Python 3 fixes these issues by making calls to super with no
191 | # arguments equivalent to calling super with __class__ and self specified. In
192 | # Python 3, you should always use super because it's clear, concise, and
193 | # always does the right things.
194 |
195 |
196 | class Explicit(MyBaseClass):
197 | def __init__(self, value):
198 | super(__class__, self).__init__(value * 2)
199 |
200 |
201 | class Implicit(MyBaseClass):
202 | def __init__(self, value):
203 | super().__init__(value * 2)
204 |
205 |
206 | assert Explicit(10).value == Implicit(10).value
207 |
208 |
209 | # This works because Python 3 lets you reliably reference the current class
210 | # in methods using the __class__ variable. This doesn't work in Python 2
211 | # because __class__ isn't defined. You may guess that you could use
212 | # self.__class__ as an argument to super, but this breaks because of the way
213 | # super is implemented in Python 2.
214 |
215 |
216 | # Things to remember
217 |
218 | # 1. Python's standard method resolution order (MRO) solves the problems to
219 | # superclass initialization order and diamond inheritance.
220 | # 2. Always use the super built-in function to initialize parent classes.
221 |
--------------------------------------------------------------------------------
/item_28_inherit_from_collections_abc.py:
--------------------------------------------------------------------------------
1 | # Item 28: Inherit from collections.abc for custom container types
2 | # ToDo: need to debug.
3 |
4 |
5 | # Much of programming in Python is defining classes that contain data and
6 | # describing how such objects relate to each other. Every Python class is a
7 | # container of some kind, encapsulating attributes and functionality together.
8 | # Python also provides built-in container types for managing data: lists,
9 | # tuples, sets, and dictionaries.
10 |
11 | # When you'r designing classes for simple use cases like sequence, it's
12 | # natural that you'd want to subclass Python built-in list type directly.
13 | # For example, say you want to create your own custom list type that has
14 | # additional methods for counting the frequency of its members.
15 |
16 |
17 | class FrequencyList(list):
18 | def __init__(self, members):
19 | super().__init__(members)
20 |
21 | def frequency(self):
22 | counts = {}
23 | for item in self:
24 | counts.setdefault(item, 0)
25 | counts[item] += 1
26 | return counts
27 |
28 |
29 | # By subclassing list, you get all of list's standard functionality and
30 | # preserve the semantics familiar to all Python programmers. Your additional
31 | # methods can add any custom behaviors you need.
32 |
33 |
34 | foo = FrequencyList(['a', 'b', 'a', 'c', 'b', 'a', 'd'])
35 | print('Length is', len(foo))
36 | foo.pop()
37 | print('After pop:', repr(foo))
38 | print('Frequency:', foo.frequency())
39 | # Length is 7
40 | # After pop: ['a', 'b', 'a', 'c', 'b', 'a']
41 | # Frequency: {'a': 3, 'b': 2, 'c': 1}
42 |
43 |
44 | # Now imagine you want to provide an object that feels like a list, allowing
45 | # indexing, but isn't a list subclass. For example, say you want to provide
46 | # sequence semantic (like list or tuple) for a binary tree class.
47 |
48 |
49 | class BinaryNode(object):
50 | def __init__(self, value, left=None, right=None):
51 | self.value = value
52 | self.left = left
53 | self.right = right
54 |
55 |
56 | # How do you make this act like a sequence type? Python implements its
57 | # container behaviors with instance methods that have special names. When you
58 | # access a sequence item by index.
59 |
60 | bar = [1, 2, 3]
61 | print(bar[0])
62 | # 1
63 |
64 |
65 | # it will be interpreted as:
66 |
67 |
68 | print(bar.__getitem__(0))
69 | # 1
70 |
71 |
72 | # To make the BinaryNode class act like a sequence, you can provide a custom
73 | # implementation of __getitem__ that traverses the object tree depth first.
74 |
75 |
76 | class IndexableNode(BinaryNode):
77 | def __init__(self, value, left=None, right=None):
78 | self.value = value
79 | self.left = left
80 | self.right = right
81 |
82 | def _search(self, count, index):
83 | found = False
84 | return (found, count)
85 | # ...
86 | # returns (found, count)
87 |
88 | def __getitem__(self, index):
89 | found, _ = self._search(0, index)
90 | if not found:
91 | raise IndexError("Index out of range")
92 | return found.value
93 |
94 |
95 | # You can construct your binary tree as usual.
96 |
97 |
98 | tree = IndexableNode(
99 | 10,
100 | left=IndexableNode(
101 | 5,
102 | left=IndexableNode(2),
103 | right=IndexableNode(
104 | 6,
105 | right=IndexableNode(7)
106 | )
107 | ),
108 | right=IndexableNode(
109 | 15, left=IndexableNode(11)
110 | )
111 | )
112 |
113 |
114 | # But you can also access it like a list in addition to tree traversal.
115 |
116 |
117 | print('LRR', tree.left.right.right.value)
118 | # print('Index 0 = ', tree[0])
119 | # print('Index 1 = ', tree[1])
120 | # print('11 in the tree?', 11 in tree)
121 | # print('17 in the tree?', 17 in tree)
122 | # print('Tree is ', list(tree))
123 |
124 |
125 | # The problem is that implementing __getitem__ isn't enough to provide all of
126 | # the sequence semantics you'd expect.
127 |
128 |
129 | # len(tree)
130 | # TypeError: object of type 'IndexableNode' has no len()
131 |
132 |
133 | # The len built-in function requires another special method named __len__ that
134 | # must have an implementation for your custom sequence type.
135 |
136 |
137 | class SequenceNode(IndexableNode):
138 | def __len__(self):
139 | _, count = self._search(0, None)
140 | return count
141 |
142 | tree = IndexableNode(
143 | 10,
144 | left=IndexableNode(
145 | 5,
146 | left=IndexableNode(2),
147 | right=IndexableNode(
148 | 6,
149 | right=IndexableNode(7)
150 | )
151 | ),
152 | right=IndexableNode(
153 | 15, left=IndexableNode(11)
154 | )
155 | )
156 |
157 | print('Tree has %d nodes' % len(tree))
158 |
159 |
160 | # Unfortunately, this still isn't enought. Also missing are the count and
161 | # index methods that a Python programmer would expect to see on a sequence
162 | # like list or tuple. Defining your own container types is much harder than
163 | # it looks.
164 |
165 | # To avoid this difficulty throughout the Python universe, the built-in
166 | # collections.abc mudule defines a set of abstract base classes that provide
167 | # all of the typical methods for each container type. When you subclass from
168 | # these abstract base classes and forget to implement required methods, the
169 | # module will tell you something is wrong.
170 |
171 |
172 | # from collections.abc import Sequence
173 | from collections import Sequence
174 |
175 |
176 | class BadType(Sequence):
177 | pass
178 |
179 | foo = BadType()
180 | # TypeError: Can't instantiate abstract class BadType with abstract methods __getitem__, __len__
181 |
182 |
183 | # When you do implement all of the methods required by an abstract base class,
184 | # as I did above with SequenceNode, it will provide all of the additional
185 | # methods like index and count for free.
186 |
187 |
188 | class BetterNode(SequenceNode, Sequence):
189 | pass
190 |
191 | tree = IndexableNode(
192 | 10,
193 | left=IndexableNode(
194 | 5,
195 | left=IndexableNode(2),
196 | right=IndexableNode(
197 | 6,
198 | right=IndexableNode(7)
199 | )
200 | ),
201 | right=IndexableNode(
202 | 15, left=IndexableNode(11)
203 | )
204 | )
205 |
206 | print('Index of 7 is', tree.index(7))
207 | print('Count of 10 is', tree.count(10))
208 |
209 |
210 | # The benefit of using these abstract base class is even greater for more
211 | # complex types like Set and MutableMapping, which have a large number of
212 | # special methods that need to be implemented to match Python conventions.
213 |
214 |
215 | # Things to remember
216 |
217 | # 1. Inherit directly from Python's container types (like list or dict) for
218 | # simple use cases.
219 | # 2. Beware of the large number of methods required to implement custom
220 | # container types correctly.
221 | # 3. Have your custom container types inherit from the interface defined in
222 | # collections.abc to ensure that your classes match required interfaces
223 | # and behaviors.
224 |
--------------------------------------------------------------------------------
/item_29_use_plain_attributes.py:
--------------------------------------------------------------------------------
1 | # Chapter 4: Metaclasses and Attributes
2 |
3 |
4 | # Metaclasses are often mentioned in lists of Python's features, but few
5 | # understand what they accomplish in practice. The name metaclass vaguely
6 | # implies a concept above and beyond a class. Simply but, metaclasses let you
7 | # intercept Python's class statement and provide special behavior each time a
8 | # class is defined.
9 |
10 |
11 | # Similarly mysterious and powerful are Python's built-in features for
12 | # dynamically customizing attribute accesses. Along with Python's
13 | # object-oriented constructs, these facilities provide wonderful tools to ease
14 | # the transition from simple classes to complex ones.
15 |
16 |
17 | # However, with these powers come many pitfalls. Dynamic attributes enable you
18 | # to override objects and cause unexpected side effects. Metaclasses can
19 | # create extremely bizarre behaviors that are unapproachable to newcomers. It's
20 | # important that you follow the rule of least surprise and only use these
21 | # mechanisms to implement well understood idioms.
22 |
23 |
24 | # Item 29: Use plain attributes instead of get and set methods
25 |
26 |
27 | # Programmers coming to Python from other languages may naturally try to
28 | # implement explicit getter and setter methods in their classes.
29 |
30 |
31 | class OldDResistor(object):
32 | def __init__(self, ohms):
33 | self._ohms = ohms
34 |
35 | def get_ohms(self):
36 | return self._ohms
37 |
38 | def set_ohms(self, ohms):
39 | self._ohms = ohms
40 |
41 |
42 | # Using these setters and getters is simple, but it's not Pythonic.
43 |
44 |
45 | r0 = OldDResistor(50e3)
46 | print('Before: %5r' % r0.get_ohms())
47 | r0.set_ohms(10e3)
48 | print('After: %5r' % r0.get_ohms())
49 | # Before: 50000.0
50 | # After: 10000.0
51 |
52 |
53 | # Such methods are especially clumsy for operations like incrementing in
54 | # place.
55 |
56 |
57 | r0.set_ohms(r0.get_ohms() + 5e3)
58 | print('Add 5e3: %5r' % r0.get_ohms())
59 | # Add 5e3: 15000.0
60 |
61 |
62 | # These utility methods do help define the interface for your class, making it
63 | # easier to encapsulate functionality, validate usage, and define boundaries.
64 | # Those are important goals when designing a class to ensure you don't break
65 | # callers as your class evolves over time.
66 |
67 | # In Python, however, you almost never need to implement explicit setter or
68 | # getter methods. Instead, you should always start your implementations with
69 | # simple public attributes.
70 |
71 |
72 | class Resistor(object):
73 | def __init__(self, ohms):
74 | self.ohms = ohms
75 | self.voltage = 0
76 | self.current = 0
77 |
78 | r1 = Resistor(50e3)
79 | print('Before: %5r' % r1.ohms)
80 | # These make operations like incrementing in place natural and clear.
81 | r1.ohms = 10e3
82 | print('After: %5r' % r1.ohms)
83 | r1.ohms += 5e3
84 | print('Add 5e3: %5r' % r1.ohms)
85 | # Before: 50000.0
86 | # After: 10000.0
87 | # Add 5e3: 15000.0
88 |
89 |
90 | # Later, if you decide you need special behavior when an attribute is set, you
91 | # can migrate to the @property decorator and its corresponding setter
92 | # attribute. Here, I define a new subclass of Resistor that lets me vary the
93 | # current by assigning the voltage property. Note that in order to work
94 | # properly the name of both the setter and getter methods mush match the
95 | # intended property name.
96 |
97 |
98 | class VoltageResistance(Resistor):
99 | def __init__(self, ohms):
100 | super().__init__(ohms)
101 | self._voltage = 0
102 |
103 |
104 | @property
105 | def voltage(self):
106 | return self._voltage
107 |
108 | @voltage.setter
109 | def voltage(self, voltage):
110 | self._voltage = voltage
111 | self.current = self._voltage / self.ohms
112 |
113 |
114 | # Now, assigning the voltage property will run the voltage setter method,
115 | # updating the current property of the object to match.
116 |
117 |
118 | r2 = VoltageResistance(1e3)
119 | print('Before: %5r amps' % r2.current)
120 | r2.voltage = 10
121 | print('After: %5r amps' % r2.current)
122 | # Before: 0 amps
123 | # After: 0.01 amps
124 |
125 |
126 | # Specifying a setter on a property also lets you perform type checking and
127 | # validation on values passed to your class. Here, I define a class that
128 | # ensures all resistance values are above zero ohms:
129 |
130 |
131 | class BoundedResistance(Resistor):
132 | def __init__(self, ohms):
133 | super().__init__(ohms)
134 |
135 | @property
136 | def ohms(self):
137 | return self._ohms
138 |
139 | @ohms.setter
140 | def ohms(self, ohms):
141 | if ohms <= 0:
142 | raise ValueError('%f ohms mush be > 0' % ohms)
143 | self._ohms = ohms
144 |
145 |
146 | # Assigning an invalid resistance to the attribute raises an excpetion.
147 |
148 |
149 | r3 = BoundedResistance(1e3)
150 | # r3.ohms = 0
151 | # ValueError: 0.000000 ohms mush be > 0
152 |
153 |
154 | # An exception will also be raise if you pass an invalid value to the
155 | # constructor.
156 |
157 |
158 | # BoundedResistance(-5)
159 | # ValueError: -5.000000 ohms mush be > 0
160 |
161 |
162 | # This happens because BoundedResistance.__init__ calls Resistorl.__init__,
163 | # which assigns self.ohms = -5. That assignment causes the @ohms.setter method
164 | # from BoundedResistance to be called, immediately running the validation code
165 | # before object construction has completed.
166 |
167 | # You can even use @property to make attributes from parent classes immutable.
168 |
169 |
170 | class FixedResistance(Resistor):
171 | def __init__(self, ohms):
172 | super().__init__(ohms)
173 |
174 | @property
175 | def ohms(self):
176 | return self._ohms
177 |
178 | @ohms.setter
179 | def ohms(self, ohms):
180 | if hasattr(self, '_ohms'):
181 | raise AttributeError("Can't set attribute")
182 | self._ohms = ohms
183 |
184 |
185 | # Trying to assign to the property after construction raise an exception.
186 |
187 | r4 = FixedResistance(1e3)
188 | # r4.ohms = 2e3
189 | # AttributeError: Can't set attribute
190 |
191 |
192 | # The biggest shortcoming of @property is that the methods for an attribute
193 | # can be shared by subclass. Unrelated classes can't share the same
194 | # implementation. However, Python also supports descriptors (see Item 31: Use
195 | # descriptors for reusable @property methods) that enable reusable property
196 | # logic and many other use cases.
197 |
198 | # Finally, when you use @property methods to implement setters and getters, be
199 | # sure that the behavior you implement is not surprising. For example, don't
200 | # set other attributes in getter property methods.
201 |
202 | class MysteriousResistor(Resistor):
203 | def __init__(self, ohms):
204 | super().__init__(ohms)
205 |
206 | @property
207 | def ohms(self):
208 | self.voltage = self._ohms * self.current
209 | return self._ohms
210 |
211 | @ohms.setter
212 | def ohms(self, ohms):
213 | # if hasattr(self, '_ohms'):
214 | # raise AttributeError("Can't set attribute")
215 | self._ohms = ohms
216 |
217 |
218 | # This leads to extremely bizarre behavior.
219 |
220 |
221 | r7 = MysteriousResistor(10)
222 | r7.current = 0.01
223 | print('Before: %5r' % r7.voltage)
224 | r7.ohms
225 | print('After: %5r' % r7.voltage)
226 | # Before: 0
227 | # After: 0.1
228 |
229 |
230 | # The best policy is to only modify related object state in @property.setter
231 | # methods. Be sure to avoid any other side effects the caller may not expect
232 | # beyond the object, such as importing modules dynamically, running slow
233 | # helper functions, or making expensive database queries. Users of your class
234 | # will expect its attributes to be like any other Python object: quick and
235 | # easy. Use normal methods to do anything more complex or slow.
236 |
237 |
238 | # Things to remember
239 |
240 | # 1. Define new class interfaces using simple public attributes, and avoid set
241 | # and get methods.
242 | # 2. Use @property to define special behavior when attributes are accessed on
243 | # your objects, if necessary.
244 | # 3. Follow the rule of least surprise and void weird side effects in your
245 | # @property methods.
246 | # 4. Ensure that @property methods are fast; do slow or complex work using
247 | # normal methods.
248 |
--------------------------------------------------------------------------------
/item_30_consider_property.py:
--------------------------------------------------------------------------------
1 | # Item 30: Consider @property instead of refactoring attributes
2 | from datetime import timedelta
3 | import datetime
4 |
5 |
6 | # The built-in @property decorator makes it easy for simple accesses of an
7 | # instance's attributes to act smarter (see Item 29: "Use plain attributes
8 | # instead of get and set methods"). One advanced but common use of @property
9 | # is transitioning what was once a simple numerical attribute into an
10 | # on-the-fly calculation. This is extremely helpful because it lets you
11 | # migrate all existing usage of a class to have new behaviors without
12 | # rewriting any of the call sites. It also provides an important stopgap for
13 | # improving your interfaces over time.
14 |
15 | # For example, say you want to implement a leaky bucket quota using plain
16 | # Python objects. Here, the Bucket class represents how much quota remains
17 | # and the duration for which the quota will be available:
18 |
19 |
20 | class Bucket(object):
21 | def __init__(self, period):
22 | self.period_delta = timedelta(seconds=period)
23 | self.reset_time = datetime.datetime.now()
24 | self.quota = 0
25 |
26 | def __repr__(self):
27 | return 'Bucket(quota=%d)' % self.quota
28 |
29 |
30 | # The leaky bucket algorithm works by ensuring that, whenever the bucket is
31 | # filled, the amount of quota does not carry over from one period to the next.
32 |
33 |
34 | def fill(bucket, amount):
35 | now = datetime.datetime.now()
36 | if now - bucket.reset_time > bucket.period_delta:
37 | bucket.quota = 0
38 | bucket.reset_time = now
39 | bucket.quota += amount
40 |
41 |
42 | # Each time a quota consumer wants to do something, it first must ensure that
43 | # it can deduct the amount of quota it needs to use.
44 |
45 |
46 | def deduct(bucket, amount):
47 | now = datetime.datetime.now()
48 | if now - bucket.reset_time > bucket.period_delta:
49 | return False
50 | if bucket.quota - amount < 0:
51 | return False
52 | bucket.quota -= amount
53 | return True
54 |
55 |
56 | # To use this class, first I fill the bucket.
57 |
58 | bucket = Bucket(60)
59 | fill(bucket, 100)
60 | print(bucket)
61 | # Bucket(quota=100)
62 |
63 | # Then, I deduct the quota that I need.
64 |
65 |
66 | if deduct(bucket, 99):
67 | print('Had 99 quota')
68 | else:
69 | print('Not enough for 99 quota')
70 | print(bucket)
71 | # Had 99 quota
72 | # Bucket(quota=1)
73 |
74 | # Eventually, I'm prevented from making progress because I try to deduct more
75 | # quota than is available. In this case, the bucket's quota level remains
76 | # unchanged.
77 |
78 |
79 | if deduct(bucket, 3):
80 | print('Had 3 quota')
81 | else:
82 | print('Not enough for 3 quota')
83 | print(bucket)
84 | # Not enough for 3 quota
85 | # Bucket(quota=1)
86 |
87 |
88 | # The problem with this implementation is that I never know that quota level
89 | # the bucket started with. The quota is deducted over the course of the period
90 | # until it reaches zero. At that point, deduct will always return False. When
91 | # that happens, it would be useful to know whether callers to deduct are being
92 | # blocked because the Bucket ran out of quota or because the Bucket never had
93 | # quota in the first place.
94 |
95 | # To fix this, I can change the class to keep track of the max_quota issued in
96 | # the period and the quota_consumed in the period.
97 |
98 |
99 | class Bucket(object):
100 | def __init__(self, period):
101 | self.period_delta = timedelta(seconds=period)
102 | self.reset_time = datetime.datetime.now()
103 | self.max_quota = 0
104 | self.quota_consumed = 0
105 |
106 | def __repr__(self):
107 | return ('Bucket(max_quota=%d, quota_consumed=%d)' %
108 | (self.max_quota, self.quota_consumed))
109 |
110 | # I use a @property method to compute the current level of quota on-the-fly
111 | # using these new attributes.
112 | @property
113 | def quota(self):
114 | return self.max_quota - self.quota_consumed
115 |
116 | # When the quota attribute is assigned, I take special action matching the
117 | # current interface of the class used by fill and decuct.
118 | @quota.setter
119 | def quota(self, amount):
120 | delta = self.max_quota - amount
121 | if amount == 0:
122 | '''quota being reset for a new period'''
123 | self.quota_consumed = 0
124 | self.max_quota = 0
125 | elif delta < 0:
126 | '''quota being filled for the new period'''
127 | assert self.quota_consumed == 0
128 | self.max_quota = amount
129 | else:
130 | '''quota being consumed during the period'''
131 | assert self.max_quota >= self.quota_consumed
132 | self.quota_consumed += delta
133 |
134 |
135 | # Rerunning the demo code from above produces the same results.
136 |
137 |
138 | bucket = Bucket(60)
139 | print('Initial', bucket)
140 | fill(bucket, 100)
141 | print('Filled', bucket)
142 |
143 | if deduct(bucket, 99):
144 | print('Had 99 quota')
145 | else:
146 | print('Not enough for 99 quota')
147 | print('Now', bucket)
148 |
149 | if deduct(bucket, 3):
150 | print('Had 3 quota')
151 | else:
152 | print('Not enough for 3 quota')
153 | print('Still', bucket)
154 | # Initial Bucket(max_quota=0, quota_consumed=0)
155 | # Filled Bucket(max_quota=100, quota_consumed=0)
156 | # Had 99 quota
157 | # Now Bucket(max_quota=100, quota_consumed=99)
158 | # Not enough for 3 quota
159 | # Still Bucket(max_quota=100, quota_consumed=99)
160 |
161 | # The best part is that the code using Bucket.quota doesn't have to change or
162 | # know that the class has changed. New usage of Bucket can do the right thing
163 | # and access max_quota and quota_consumed directly.
164 |
165 | # I especially like @property because it lets you make incremental progress
166 | # toward a better data model over time. Reading the Bucket example above, you
167 | # may have though to yourself, "fill and deduct should have been implemented
168 | # as instance methods in the first place." Although you're probably right (see
169 | # Item 22: "Prefer helper classes over bookkeeping with dictionaries and
170 | # tuples"), in practice there are many situations in which objects start with
171 | # poorly defined interfaces or act as dumb data containers. This happens when
172 | # code grows over time, scope increases, multiple authors contribute without
173 | # any one considering long-term hygiene, etc.
174 |
175 | # @property is a tool to help you address problems you'll come across in real-
176 | # world code. Don't overuse it. When you find yourself repeatedly extending
177 | # @property methods, it's probably time to refactor your class instead of
178 | # further paving over your code's poor design.
179 |
180 |
181 | # Things to remember
182 |
183 | # 1. Use @property to give existing instance attributes new functionality.
184 | # 2. Make incremental progress toward better data models by using @property.
185 | # 3. Consider refactoring a class and all call sites when you find yourself
186 | # using @property too heavily.
187 |
--------------------------------------------------------------------------------
/item_33_validate_subclass.py:
--------------------------------------------------------------------------------
1 | # Item 33: Validate subclass with metaclass
2 |
3 |
4 | # One of simplest applications of metaclass is verifying that a class was
5 | # defined correctly. When you're building a complex class hierarchy, you may
6 | # want to enforce style, require overriding methods, or have strict
7 | # relationships between class attributes. Metaclass enable these use cases by
8 | # providing a reliable way to run your validation code each time a new
9 | # subclass is defined.
10 |
11 | # Often a class's validation code runs in the __init__ method, when an object
12 | # of the class's type is constructed (see Item 28: "inherit from
13 | # collections.abc for custom container types" for an example). Using metaclass
14 | # for validation can raise errors much earlier.
15 |
16 | # Before I get into how to define a metaclass for validating subclasses, it's
17 | # important to understand the metaclass action for standard objects. A
18 | # metaclass is defined by inheriting from type. In the default case, a
19 | # metaclass receives the contents of associated class statements in its
20 | # __new__ method. Here, you can modify the class information before the type
21 | # is actually constructed:
22 |
23 |
24 | class Meta(type):
25 | def __new__(meta, name, bases, class_dict):
26 | print((meta, name, bases, class_dict))
27 | return type.__new__(meta, name, bases, class_dict)
28 |
29 |
30 | class MyClass(object, metaclass=Meta):
31 | stuff = 123
32 |
33 | def foo(self):
34 | pass
35 |
36 |
37 | # (,
38 | # 'MyClass',
39 | # (,),
40 | # {'stuff': 123,
41 | # 'foo': ,
42 | # '__qualname__': 'MyClass',
43 | # '__module__': '__main__'})
44 |
45 |
46 | # The metaclass has access to the name of the class, the parent classes it
47 | # inherits from, and all of the class attributes that were defined in the
48 | # class's body.
49 |
50 |
51 | # Python 2 has slightly different syntax and specifies a metaclass using the
52 | # __metaclass__ class attribute. The Meta.__new__ interface is the same.
53 |
54 |
55 | # Python 2
56 | # class Meta(type):
57 | # def __new__(meta, name, bases, class_dict):
58 | # print((meta, name, bases, class_dict))
59 | # return type.__new__(meta, name, bases, class_dict)
60 | #
61 | #
62 | # class MyClassInPython2(object):
63 | # __metaclass__ = Meta
64 | # stuff = 123
65 | #
66 | # def foo(self):
67 | # pass
68 |
69 |
70 | # (,
71 | # 'MyClassInPython2',
72 | # (,),
73 | # {'__module__': '__main__',
74 | # 'stuff': 123,
75 | # '__metaclass__': ,
76 | # 'foo': })
77 |
78 |
79 | # You can add functionality to the Meta.__new__ method in order to validate
80 | # all the parameters of a class before it's defined. For example, say you want
81 | # to represent any type of multi-sided polygon. You can do this by defining a
82 | # special validating metaclass and using it in the base class of your polygon
83 | # class hierarchy. Note that it's important not to apply the same validation
84 | # to the base class.
85 |
86 |
87 | class ValidatePolygon(type):
88 | def __new__(meta, name, bases, class_dict):
89 | '''Don't validate the abstract Polygon class'''
90 | if bases != (object,):
91 | if class_dict['sides'] < 3:
92 | raise ValueError('Polygons need 3+ sides')
93 | return type.__new__(meta, name, bases, class_dict)
94 |
95 |
96 | class Polygon(object, metaclass=ValidatePolygon):
97 | sides = None # Specified by subclass
98 |
99 | @classmethod
100 | def interior_angles(cls):
101 | return (cls.sides - 2) * 180
102 |
103 |
104 | class Triangle(Polygon):
105 | sides = 3
106 |
107 |
108 | # If you try to define a polygon with fewer that three sides, the validation
109 | # will cause the class statement to fail immediately after the class statement
110 | # body. This means your program will not even be able to start running when
111 | # you define such a class.
112 |
113 |
114 | print('Before class')
115 |
116 |
117 | class Line(Polygon):
118 | print('Before side')
119 | sides = 1
120 | print('After side')
121 | print('After class')
122 | # Before class
123 | # Before side
124 | # ValueError: Polygons need 3+ sides
125 |
126 |
127 | # Things to remember
128 |
129 | # 1. Use metaclasses to ensure that subclass are well formed at the time they
130 | # are defined, before objects of their type are constructed.
131 | # 2. Metaclass have slightly different syntax in Python 2 vs. Python 3.
132 | # 3. The __new__ method of metaclasses is run after the class statement's
133 | # entire body has been processed.
134 |
135 |
136 |
--------------------------------------------------------------------------------
/item_34_register_class_existence.py:
--------------------------------------------------------------------------------
1 | # Item 34: Register class existence with metaclass
2 | import json
3 |
4 |
5 | # Another common use of metaclass is to automatically register types in your
6 | # program. Registration is useful for doing reverse lookups, where you need to
7 | # map a simple identifier back to a corresponding class.
8 |
9 | # For example, say you want to implement your own serialized representation of
10 | # a Python object using JSON. You need a way to take an object and turn it
11 | # into a JSON string. Here, I do this generically by defining a base class
12 | # that records the constructor parameters and turns them into a JSON
13 | # dictionary.
14 |
15 |
16 | class Serializable(object):
17 | def __init__(self, *args):
18 | self.args = args
19 |
20 | def serialize(self):
21 | return json.dumps({'args': self.args})
22 |
23 |
24 | # This class makes it easy to serialize simple, immutable data structures like
25 | # Point2D to a string.
26 |
27 |
28 | class Point2D(Serializable):
29 | def __init__(self, x, y):
30 | super().__init__(x, y)
31 | self.x = x
32 | self.y = y
33 |
34 | def __repr__(self):
35 | return 'Point2D(%d, %d)' % (self.x, self.y)
36 |
37 |
38 | point = Point2D(5, 3)
39 | print('Object: ', point)
40 | print('Serialized:', point.serialize())
41 | # Object: Point2D(5, 3)
42 | # Serialized: {"args": [5, 3]}
43 |
44 |
45 | # Now, I need to deserialized this JSON string and construct the Point2D
46 | # object it represents. Here, I define another class that can deserialize
47 | # the data from its Serializable parent class:
48 |
49 |
50 | class Deserializable(Serializable):
51 | @classmethod
52 | def deserialize(cls, json_data):
53 | params = json.loads(json_data)
54 | return cls(*params['args'])
55 |
56 |
57 | # Using Deserizlizable makes it easy to serialize and deserialize simple,
58 | # immutable objects in a generic way.
59 |
60 |
61 | class BetterPoint2D(Deserializable):
62 | def __init__(self, x, y):
63 | super().__init__(x, y)
64 | self.x = x
65 | self.y = y
66 |
67 | def __repr__(self):
68 | return 'Point2D(%d, %d)' % (self.x, self.y)
69 |
70 |
71 | point = BetterPoint2D(5, 3)
72 | print('Before: ', point)
73 | data = point.serialize()
74 | print('Serialized: ', data)
75 | after = BetterPoint2D.deserialize(data)
76 | print('After: ', after)
77 | # Before: Point2D(5, 3)
78 | # Serialized: {"args": [5, 3]}
79 | # After: Point2D(5, 3)
80 |
81 |
82 | # The problem with this approach is that it only works if you know the
83 | # intended type of the serialized data ahead of time (e.g., Point2D,
84 | # BetterPoint2D). Ideally, you'd have a large number classes serializing to
85 | # JSON and one common function that could deserialize any of them back to a
86 | # corresponding Python object.
87 |
88 | # To do this, I include the serialized object's class name in the JSON data.
89 |
90 |
91 | class BetterSerializable(object):
92 | def __init__(self, *args):
93 | self.args = args
94 |
95 | def serialize(self):
96 | return json.dumps({
97 | 'class': self.__class__.__name__,
98 | 'args': self.args
99 | })
100 |
101 | def __repr__(self):
102 | return 'Point2D(%d, %d)' % (self.x, self.y)
103 |
104 |
105 | # Then, I can maintain a mapping of class names back to constructors for those
106 | # objects. The general deserialize function will work for any class passed to
107 | # register_class.
108 |
109 |
110 | registry = {}
111 |
112 |
113 | def register_class(target_class):
114 | registry[target_class.__name__] = target_class
115 |
116 |
117 | def deserialize(data):
118 | params = json.loads(data)
119 | name = params['class']
120 | target_class = registry[name]
121 | return target_class(*params['args'])
122 |
123 |
124 | # To ensure that deserialize always works properly, I must call register_class
125 | # for every class I want to deserialize in the future.
126 |
127 |
128 | class EvenBetterPoint2D(BetterSerializable):
129 | def __init__(self, x, y):
130 | super().__init__(x, y)
131 | self.x = x
132 | self.y = y
133 |
134 | def __repr__(self):
135 | return 'EvenBetterPoint2D(%d, %d)' % (self.x, self.y)
136 |
137 |
138 | register_class(EvenBetterPoint2D)
139 |
140 |
141 | # Now, I can deserialize an arbitrary JSON string without having to know which
142 | # class it contains.
143 |
144 |
145 | point = EvenBetterPoint2D(5, 3)
146 | print('Before: ', point)
147 | data = point.serialize()
148 | print('Serialized: ', data)
149 | after = deserialize(data)
150 | print('After: ', after)
151 | # Before: EvenBetterPoint2D(5, 3)
152 | # Serialized: {"class": "EvenBetterPoint2D", "args": [5, 3]}
153 | # After: EvenBetterPoint2D(5, 3)
154 |
155 |
156 | # The problem with this approach is that you can forget to call
157 | # register_class.
158 |
159 |
160 | class Point3D(BetterSerializable):
161 | def __init__(self, x, y, z):
162 | super().__init__(x, y, z)
163 | self.x = x
164 | self.y = y
165 | self.z = z
166 |
167 | def __repr__(self):
168 | return 'Point3D(%d, %d, %d)' % (self.x, self.y, self.z)
169 |
170 | # Forgot to call register_class! Whoops!
171 |
172 |
173 | # This will cause your code to break at runtime, when you finally try to
174 | # deserialize an object of a class you forgot to register.
175 |
176 |
177 | point = Point3D(5, 9, -4)
178 | data = point.serialize()
179 | # deserialize(data)
180 | # KeyError: 'Point3D'
181 |
182 |
183 | # Even though you chose to subclass BetterSerializable, you won't actually get
184 | # all of its features if you forget to call register_class after your class
185 | # statement body. This approach is error prone and especially challenging for
186 | # beginners. The same omission can happen with class decorators in Python 3.
187 |
188 | # What if you could somehow act on the programmer's intent to use
189 | # BetterSerialized and ensure that register_class is called in all cases?
190 | # Metaclasses enable this by intercepting the class statement when subclasses
191 | # are defined (see Item 33: "Validate subclasses with Metaclass"). This lets
192 | # you register the new type immediately after the class's body.
193 |
194 |
195 | class Meta(type):
196 | def __new__(meta, name, bases, class_dict):
197 | cls = type.__new__(meta, name, bases, class_dict)
198 | register_class(cls)
199 | return cls
200 |
201 |
202 | class RegisteredSerializable(BetterSerializable, metaclass=Meta):
203 | pass
204 |
205 |
206 | # When I define a subclass of RegisteredSerializable, I can be confident that
207 | # the call to register_class happened and deserialize will always work as
208 | # expected.
209 |
210 |
211 | class Vector3D(RegisteredSerializable):
212 | def __init__(self, x, y, z):
213 | super().__init__(x, y, z)
214 | self.x, self.y, self.z = x, y, z
215 |
216 | def __repr__(self):
217 | return 'Vector3D(%d, %d, %d)' % (self.x, self.y, self.z)
218 |
219 |
220 | v3 = Vector3D(10, -7, 3)
221 | print('Before: ', v3)
222 | data = v3.serialize()
223 | print('Serialized: ', data)
224 | after = deserialize(data)
225 | print('After: ', after)
226 | # Before: Vector3D(10, -7, 3)
227 | # Serialized: {"class": "Vector3D", "args": [10, -7, 3]}
228 | # After: Vector3D(10, -7, 3)
229 |
230 | # Using metaclass for class registration ensures that you'll never miss a
231 | # class as long as the inheritance tree is right. This works well for
232 | # serialization, as I've shown, and also applies to database
233 | # object-relationship mappings (ORMs), plug-in systems, and system hooks.
234 |
235 |
236 | # Things to remember
237 |
238 | # 1. Class registration is a helpful pattern for building modular Python
239 | # programs.
240 | # 2. Metaclass let you run registration code automatically each time your
241 | # base class is subclassed in a program.
242 | # 3. Using metaclass for class registration avoids errors by ensuring that
243 | # you never miss a registration call.
244 |
--------------------------------------------------------------------------------
/item_35_annotate_class_attributes.py:
--------------------------------------------------------------------------------
1 | # Item 35: Annotate class attributes with metaclass
2 |
3 |
4 | # One more useful feature enable by metaclasses is the ability to modify or
5 | # annotate properties after a class is defined but before the class is
6 | # actually used. This approach is commonly used with descriptors (see Item 31:
7 | # "Use descriptors for reuseable @property methods") to give them more
8 | # introspection into how they're being used within their containing class.
9 |
10 | # For example, say you want to define a new class that represents a row in
11 | # your customer database. You'd like a corresponding property on the class
12 | # for each column in the database table. To do this, here I define a
13 | # descriptor class to connect attributes to column names.
14 |
15 |
16 | class Field(object):
17 | def __init__(self, name):
18 | self.name = name
19 | self.internal_name = '_' + self.name
20 |
21 | def __get__(self, instance, instance_type):
22 | if instance is None:
23 | return self
24 | return getattr(instance, self.internal_name)
25 |
26 | def __set__(self, instance, value):
27 | setattr(instance, self.internal_name, value)
28 |
29 |
30 | # With the column name stored in the Field descriptor, I can save all of the
31 | # per-instance state directly in the instance dictionary as protected fields
32 | # using the setattr and getattr built-in functions. At first, this seems to be
33 | # much more convenient than building descriptors with weakref to avoid memory
34 | # leaks.
35 |
36 | # Defining the class representing a row requires supplying the column name for
37 | # each class attribute.
38 |
39 |
40 | class Customer(object):
41 | first_name = Field('first_name')
42 | last_name = Field('last_name')
43 | prefix = Field('prefix')
44 | suffix = Field('suffix')
45 |
46 |
47 | # Using the class is simple. Here, you can see how the Field descriptors
48 | # modify the instance dictionary __dict__ as expected:
49 |
50 |
51 | foo = Customer()
52 | # print('Before: ', repr(foo.first_name), foo.__dict__)
53 | print('Before: ', foo.__dict__)
54 | foo.first_name = 'Euclid'
55 | print('After: ', repr(foo.first_name), foo.__dict__)
56 | # Before: {}
57 | # After: 'Euclid' {'_first_name': 'Euclid'}
58 |
59 |
60 | # But it seems redundant. I already declared the name of the field when I
61 | # assigned the constructed Field object to Customer.first_name in the class
62 | # statement body. Why do I also have to pass the field name ('first_name' in
63 | # this case) to the Field constructor?
64 |
65 | # The problem is that the order of operations in the Customer class definition
66 | # is the opposite of how it reads from left to right. First, the Field
67 | # constructor is called as Field('first_name'). Then, the return value of that
68 | # is assigned to Customer.field_name. There's no way for the Field to know
69 | # upfront which class attribute it will be assigned to.
70 |
71 | # To eliminate the redundancy, I can use a metaclass. Metaclasses let you hook
72 | # the class statement directly and take action as soon as a class body is
73 | # finished. In this case, I can use the metalcass to assign Field.name and
74 | # Field.internal_name on the descriptor automatically instead of manually
75 | # specifying the field name multiple times.
76 |
77 |
78 | class Meta(type):
79 | def __new__(meta, name, bases, class_dict):
80 | for key, value in class_dict.items():
81 | if isinstance(value, Field):
82 | value.name = key
83 | value.internal_name = '_' + key
84 | cls = type.__new__(meta, name, bases, class_dict)
85 | return cls
86 |
87 |
88 | # Here, I define a base class that uses the metaclass. All classes
89 | # representing database rows should inherit from this class to ensure that
90 | # they use the metaclass:
91 |
92 |
93 | class DatabaseRow(object, metaclass=Meta):
94 | pass
95 |
96 |
97 | # To work with the metaclass, the field descriptor is largely unchanged. The
98 | # only difference is that it no longer requires any arguments to be passed to
99 | # its constructor. Instead, its attributes are set by the Meta.__new__ method
100 | # above.
101 | #
102 | class Field(object):
103 | def __init__(self):
104 | self.name = None
105 | self.internal_name = None
106 |
107 | def __get__(self, instance, instance_type):
108 | if instance is None:
109 | return self
110 | return getattr(instance, self.internal_name)
111 |
112 | def __set__(self, instance, value):
113 | setattr(instance, self.internal_name, value)
114 |
115 |
116 | # By using the metaclass, the new DatabaseRow base class, and the new Field
117 | # descriptor, the class definition for a database row no longer has the
118 | # redundancy from before.
119 |
120 |
121 | class BetterCustomer(DatabaseRow):
122 | first_name = Field()
123 | last_name = Field()
124 | prefix = Field()
125 | suffix = Field()
126 |
127 |
128 | # The behavior of the new class is identical to the old one.
129 |
130 |
131 | foo = BetterCustomer()
132 | # print('Before: ', repr(foo.first_name), foo.__dict__)
133 | print('Before: ', foo.__dict__)
134 | foo.first_name = 'Euler'
135 | print('After: ', repr(foo.first_name), foo.__dict__)
136 | # Before: {}
137 | # After: 'Euler' {'_first_name': 'Euler'}
138 |
139 |
140 | # Things to remember
141 |
142 | # 1. Metaclass enable you to modify a class's attributes before the class is
143 | # fully defined.
144 | # 2. Descriptors and metaclasses make a powerful combination for declarative
145 | # behavior and runtime introspection.
146 | # 3. You can avoid both memory leaks and the weakref module by using
147 | # metaclasses along with descriptors.
148 |
--------------------------------------------------------------------------------
/item_37_use_threads.py:
--------------------------------------------------------------------------------
1 | # Item 37: Use threads for blocking I/O, avoid for parallelism
2 | import time
3 | from threading import Thread
4 | import select
5 |
6 |
7 | # The standard implementation of Python is call CPython. CPython runs a Python
8 | # program in two steps. First, it parses and compiles the source text into
9 | # bytecode. Then, it runs the bytecode using a stack-based interpreter. The
10 | # bytecode interpreter has state that must be maintained and coherent while
11 | # the Python program executes. Python enforces coherence with a mechanism
12 | # called the global interpreter lock (GIL).
13 |
14 | # Essentially, the GIL is a mutual-exclusion lock (mutex) that prevents
15 | # CPython from being affected by preemptive multi-threading, where one thread
16 | # takes control of a program by interrupting state if it comes at an
17 | # unexpected time. The GIL prevents these interruptions and ensures that every
18 | # bytecode instruction works correctly with the CPython implementation and its
19 | # C-extension modules.
20 |
21 | # The GIL has an important negative side effect. With programs written in
22 | # languages like C++ or Java, having multiple threads of execution means your
23 | # program could utilize multiple CPU cores at the same time. Although Python
24 | # supports multiple threads of execution, the GIL causes only one of them to
25 | # make forward progress at a time. This means that when you reach for threads
26 | # to do parallel computation and speed up your Python programs, you will be
27 | # sorely disappointed.
28 |
29 | # For example, say you want to do something computationally intensive with
30 | # Python. I'll use a naive number factorization algorithm as a proxy.
31 |
32 |
33 | def factorize(number):
34 | for i in range(1, number + 1):
35 | if number % i == 0:
36 | yield i
37 |
38 | # Factoring a set of numbers in serial takes quite a long time.
39 |
40 | numbers = [2139079, 1214759, 1516637, 1852285]
41 | start = time.time()
42 | for number in numbers:
43 | list(factorize(number))
44 | end = time.time()
45 | print('Took %.3f seconds' % (end - start))
46 | # Took 0.624 seconds
47 |
48 |
49 | # Using multiple threads to do this computation would make sense in other
50 | # languages because you could take advantage of all the CPU cores of your
51 | # computer. Let me try that in Python. Here, I define a Python thread for
52 | # doing the same computation as before:
53 |
54 |
55 | class FactorizeThread(Thread):
56 | def __init__(self, number):
57 | super().__init__()
58 | self.number = number
59 | self.factors = list([])
60 |
61 | def run(self):
62 | self.factors = list(factorize(self.number))
63 |
64 |
65 | # Then, I start a thread for factorizing each number in parallel.
66 |
67 |
68 | start = time.time()
69 | threads = []
70 | for number in numbers:
71 | thread = FactorizeThread(number)
72 | thread.start()
73 | threads.append(thread)
74 |
75 | # Finally, I wait for all of the threads to finish.
76 |
77 | for thread in threads:
78 | thread.join()
79 | end = time.time()
80 | print('Tool %.3f seconds' % (end - start))
81 | # Tool 0.662 seconds
82 |
83 | # What's surprising is that this takes even longer than running factorize in
84 | # serial. With on thread per number, you may expect less than a n times
85 | # speedup on the dual-core machine I used to run this code. But you would
86 | # never expect the performance of these threads to be worse when you have
87 | # multiple CPUs to utilize. This demonstrates the effect of the GIL on
88 | # programs running in the standard CPython interpreter.
89 |
90 | # There are ways to get CPython to utilize multiple cores, but it doesn't
91 | # work with the standard Thread class (see Item 41:
92 | # "Consider concurrent.futures for true parallelism") and it can require
93 | # substantial effort. Knowing these limitations you may wonder, why does
94 | # Python support threads at all? There are two good reasons.
95 |
96 | # First, multiple threads make it easy for your program to seem like it's
97 | # doing multiple things at the same time. Managing the juggling act of
98 | # simultaneous tasks is difficult to implement yourself (see Item 40:
99 | # "Consider co-routines to run many functions concurrently" for an example).
100 | # With threads, you can leave it to Python to run your functions seemingly in
101 | # parallel. This works because CPython ensures a level of fairness between
102 | # Python threads of execution, even though only one of them makes forward
103 | # progress at a time due to the GIL.
104 |
105 | # The second reason Python supports threads is to deal with blocking I/O,
106 | # which happens when Python does certain types of system calls. System calls
107 | # are how your Python program asks your computer's operating system to
108 | # interact with the external environment on your behalf. Blocking I/O includes
109 | # things like reading and writing files, interacting with networks,
110 | # communicating with devices like displays, etc. Threads help you handle
111 | # blocking I/O by insulating your program from the time it takes for the
112 | # operating system to respond to your requests.
113 |
114 | # For example, say you want to send a singal to a remote-controlled helicopter
115 | # through a serial port. I'll use a slow system call (select) as a proxy for
116 | # this activity. This function asks the operating system to block for 0.1
117 | # second and then return control to my program, similar to what would happen
118 | # when using a synchronous serial port.
119 |
120 |
121 | def slow_systemcall():
122 | select.select([], [], [], 0.1)
123 |
124 |
125 | # Running this system call in serial requires a linearly increasing amount of
126 | # time.
127 |
128 | start = time.time()
129 | for _ in range(5):
130 | slow_systemcall()
131 | end = time.time()
132 | print('Took %.3f seconds' % (end - start))
133 | # Took 0.501 seconds
134 |
135 | # The problem is that while the slow_systemcall function is running, my
136 | # program can't make any other progress. My program's main thread of execution
137 | # is blocked on the select system call. This situation is awful in practice.
138 | # You need to be able to compute your helicopter's next move while you're
139 | # sending it a signal, otherwise it's crash. When you find yourself needing to
140 | # do blocking I/O and computation simultaneously, it's time to consider moving
141 | # your system calls to threads.
142 |
143 | # Here, I run multiple invocation of the slow_systemcall function in separate
144 | # threads. This would allow you to communicate with multiple serial ports (and
145 | # helicopters) at the same time, while leaving the main thread to do whatever
146 | # computation is required.
147 |
148 |
149 | start = time.time()
150 | threads = []
151 | for _ in range(5):
152 | thread = Thread(target=slow_systemcall)
153 | thread.start()
154 | threads.append(thread)
155 |
156 |
157 | # With the threads started, here I do some work to calculate the next
158 | # helicopter move before waiting for the system call threads to finish.
159 |
160 |
161 | def compute_helicopter_location(index):
162 | return index**2
163 |
164 | for i in range(5):
165 | compute_helicopter_location(i)
166 |
167 | for thread in threads:
168 | thread.join()
169 | end = time.time()
170 | print('Took %.3f seconds' % (end - start))
171 | # Took 0.101 seconds
172 |
173 | # The parallel time is 5 times less than the serial time. This shows that the
174 | # system call will all run in parallel from multiple Python threads even
175 | # though they're limited by the GIL. The GIL prevents my Python code from
176 | # running in parallel, but it has no negative effect on system calls. This
177 | # works because Python threads release the GIL just before they make system
178 | # calls and reacquire the GIL as soon as the system calls are done.
179 |
180 | # There are many other ways to deal with blocking I/O besides threads, such as
181 | # the asyncio built-in module, and these alternatives have important benefits.
182 | # But these options also require extra work in refactoring your code to fit a
183 | # different model of execution (see Item 40: "Consider coroutines to run many
184 | # functions concurrently"). Using threads is the simplest way to do blocking
185 | # I/O in parallel with minimal changes to your program.
186 |
187 |
188 | # Things to remember
189 |
190 | # 1. Python threads can't bytecode in parallel on multiple CPU cores because
191 | # of the global interpreter lock (GIL).
192 | # 2. Python threads are still useful despite the GIL because they provide an
193 | # easy way to do multiple things at seemingly the same time.
194 | # 3. Use Python threads to make multiple system calls in parallel. This allows
195 | # you to do blocking I/O at the same time as computation.
196 |
--------------------------------------------------------------------------------
/item_38_use_lock.py:
--------------------------------------------------------------------------------
1 | # Item 38: Use lock to prevent data races in threads
2 | from threading import Thread
3 | from threading import Lock
4 |
5 |
6 | # After learning about the global interpreter lock (GIL) (see Item 37: "Use
7 | # threads for blocking I/O, Avoid for parallelism"), many new Python
8 | # programmers assume they can forgo using mutual-exclusion locks () in their
9 | # code altogether. If the GIL is already preventing Python threads form
10 | # running on multiple CPU cores in parallel, it must also act as a lock for a
11 | # program's data structure, right? Some testing on types like lists and
12 | # dictionaries may even show that this assumption appears to hold.
13 |
14 | # But beware, this is truly not the case. The GIL will not protect you.
15 | # Although only one Python thread runs at a time, a thread's operations on
16 | # data structures can be interrupted between any two bytecode instructions in
17 | # the Python interpreter. This is dangerous if you access the same objects
18 | # from multiple threads simultaneously. The invariants of your data structures
19 | # could be violated at practically any time because of these interruptions,
20 | # leaving your program in a corrupted state.
21 |
22 | # For example, say you want to write a program that counts many things in
23 | # parallel, like sampling light levels from a whole network of sensors. If you
24 | # want to determine the total number of light samples over time, you can
25 | # aggregate them with a new class.
26 |
27 |
28 | class Counter(object):
29 | def __init__(self):
30 | self.count = 0
31 |
32 | def increment(self, offset):
33 | self.count += offset
34 |
35 |
36 | # Imagine that each sensor has its own worker thread because reading from the
37 | # sensor requires blocking I/O. After each sensor measurement, the worker
38 | # thread increments the counter up to a maximum number of desired readings.
39 |
40 |
41 | def worker(sensor_index, how_many, counter):
42 | for _ in range(how_many):
43 | # Read from the sensor
44 | counter.increment(1)
45 |
46 |
47 | # Here, I define a function that starts a worker thread for each sensor and
48 | # waits for them all to finish their readings:
49 |
50 |
51 | def run_threads(func, how_many, counter):
52 | threads = []
53 | for i in range(5):
54 | args = (i, how_many, counter)
55 | thread = Thread(target=func, args=args)
56 | threads.append(thread)
57 | thread.start()
58 |
59 | for thread in threads:
60 | thread.join()
61 |
62 |
63 | # Running five threads in parallel seems simple, and the outcome should be
64 | # abvious.
65 |
66 |
67 | how_many = 10**5
68 | counter = Counter()
69 | run_threads(worker, how_many, counter)
70 | print('Counter should be %d, found %d' % (5 * how_many, counter.count))
71 | # Counter should be 500000, found 468713
72 |
73 |
74 | # But this result is way off! What happened here? How could something so
75 | # simple go so wrong, especially since only one Python interpreter thread can
76 | # run at a time?
77 |
78 | # The Python interpreter enforces fairness between all of the threads that
79 | # are executing to ensure they get a roughly equal amount of processing time.
80 | # To do this, Python will suspend a thread as it's running and will resume
81 | # another thread in turn. The problem is that you don't know exactly when
82 | # Python will suspend your threads. A thread can even be paused seemingly
83 | # halfway through what looks like an atomic operation. That's what happened
84 | # in this case.
85 |
86 | # The Counter object's increment method looks simple.
87 | # counter.count += offset
88 | # But the += operator used on an object attribute actually instructs Python to
89 | # do three separate operations behind the scenes. The statement above is
90 | # equivalent to this:
91 | # value = getattr(counter, 'count')
92 | # result = value + offset
93 | # setattr(counter, 'count', result)
94 |
95 | # Python threads incrementing the counter can be suspended between any two of
96 | # these of these operations. This is problematic if the way the operations
97 | # interleave causes old versions of value to be assigned to the counter. Here
98 | # is an example of bad interaction between two threads, A and B:
99 |
100 |
101 | # Running in thread A
102 | value_a = getattr(counter, 'count')
103 | # context switch to thread B
104 | value_b = getattr(counter, 'count')
105 | result_b = value_b + 1
106 | setattr(counter, 'count', result_b)
107 | # context switch back to Thread A
108 | result_a = value_a + 1
109 | setattr(counter, 'count', result_a)
110 |
111 |
112 | # Thread A stomped on thread B, erasing all of its progress incrementing the
113 | # counter. This is exactly what happened in the right sensor example above.
114 |
115 | # To prevent data races like these and other forms of data structure
116 | # corruption, Python includes a robust set of tools in the threading built-in
117 | # module. The simplest and most useful of them is the Lock class, a
118 | # mutual-exclusion lock (mutex).
119 |
120 | # By using a lock, I can have the Counter class protect its current value
121 | # against simultaneous access from multiple threads. Only one thread will be
122 | # able to acquire the lock at a time. Here, I use a with statement to acquire
123 | # and release the lock; this makes it easier to see which code is executing
124 | # while the lock is held (see Item 43: "Consider contextlib and with
125 | # statements for reusable try/finally behavior" for details):
126 |
127 |
128 | class LockingCounter(object):
129 | def __init__(self):
130 | self.lock = Lock()
131 | self.count = 0
132 |
133 | def increment(self, offset):
134 | with self.lock:
135 | self.count += offset
136 |
137 |
138 | # Now I run the worker threads as before, but use a LockingCounter instead.
139 |
140 | counter = LockingCounter()
141 | run_threads(worker, how_many, counter)
142 | print('Counter should be %d, found %d' % (5 * how_many, counter.count))
143 | # Counter should be 500000, found 500000
144 |
145 | # The result is exactly what I expect. The Lock solved the problem.
146 |
147 |
148 | # Things to remember
149 |
150 | # 1. Even though Python has a global interpreter lock, you're still
151 | # responsible for protecting against objects without locks.
152 | # 2. Your programs will corrupt their data structures if you allow multiple
153 | # threads to modify the same objects without locks.
154 | # 3. The lock class in the threading built-in module is Python's standard
155 | # mutual exclusion lock implementation.
156 |
--------------------------------------------------------------------------------
/item_42_define_function_decorators.py:
--------------------------------------------------------------------------------
1 | # Chapter 6
2 |
3 |
4 | # Python takes a "batteries included" approach to the standard library. Many
5 | # other languages ship with a small number of common packages and require you
6 | # to look elsewhere for important functionality. Although Python also has an
7 | # impressive repository of community-built modules, it strives to provide, in
8 | # its default installation, the most important modules for common uses of the
9 | # language.
10 |
11 | # The full set of standard modules is too large to cover in this book. But
12 | # some of these built-in packages are so closely intertwined with idiomatic
13 | # Python that they may as well as be part of the language specification. These
14 | # essential build-in modules are especially important when writing the
15 | # intricate, error-prone parts of programs.
16 |
17 |
18 | # Item 42: Define function decorators with functools.wraps
19 | from functools import wraps
20 |
21 |
22 | # Python has special syntax for decorators that can be applied to functions.
23 | # Decorators have the ability to run additional code before and after any
24 | # calls to the functions they wrap. This allows them to access and modify
25 | # input arguments and return values. This functionality can be useful for
26 | # enforcing semantics, debugging, registering functions, and more.
27 |
28 | # For example, say you want to print the arguments and return value of a
29 | # function call. This is especially helpful when debugging a stack of function
30 | # calls from a recursive function. Here, I define such a decorator:
31 |
32 |
33 | def trace(func):
34 | def wrapper(*args, **kwargs):
35 | result = func(*args, **kwargs)
36 | print('%s(%r, %r) -> %r' %
37 | (func.__name__, args, kwargs, result))
38 | return result
39 | return wrapper
40 |
41 | # I can apply this to a function using the @ symbol.
42 |
43 |
44 | @ trace
45 | def fibonacci(n):
46 | """Return the n-th Fibonacci number"""
47 | if n in (0, 1):
48 | return n
49 | return fibonacci(n - 2) + fibonacci(n - 1)
50 |
51 |
52 | # The @ symbol is equivalent to calling the decorator on the function it wraps
53 | # and assigning the return value to the original name in the same scope.
54 |
55 | # fibonacci = trace(fibonacci)
56 |
57 | # Calling this decorated function will run the wrapper code before and after
58 | # fibonacci runs, printing the arguments and return value at each level in the
59 | # recursive stack.
60 |
61 | fibonacci(3)
62 | # fibonacci((1,), {}) -> 1
63 | # fibonacci((0,), {}) -> 0
64 | # fibonacci((1,), {}) -> 1
65 | # fibonacci((2,), {}) -> 1
66 | # fibonacci((3,), {}) -> 2
67 |
68 | # This works well, but it has an unintended side effect. The value returned by
69 | # the decorator--the function that's called above--doesn't think it's named
70 | # fibonacci.
71 |
72 | print(fibonacci)
73 | # .wrapper at 0x7fac26042a60>
74 |
75 | # This cause of this isn't hard to see. The trace function returns the wrapper
76 | # it defines. The wrapper function is what's assigned to the fibonacci name in
77 | # the containing module because it undermines tools that do introspection,
78 | # such as debuggers (see Item 57: "Consider interactive debugging with pdb")
79 | # and object serializers (see Item 44: "Make pickle reliable with copyreg").
80 |
81 | # For example, the help built-in function is useless on the decorated
82 | # fibonacci function.
83 |
84 | help(fibonacci)
85 | # Help on function wrapper in module __main__:
86 | #
87 | # wrapper(*args, **kwargs)
88 |
89 | # The solution is to use the wraps helper function from the functools built-in
90 | # module. This is a decorator that helps you write decorators. Applying it to
91 | # the wrapper function will copy all of the important meta-data about the
92 | # inner function to the outer function.
93 |
94 |
95 | def trace(func):
96 | @wraps(func)
97 | def wrapper(*args, **kwargs):
98 | result = func(*args, **kwargs)
99 | print('%s(%r, %r) -> %r' %
100 | (func.__name__, args, kwargs, result))
101 | return result
102 | return wrapper
103 |
104 | @ trace
105 | def fibonacci(n):
106 | """Return the n-th Fibonacci number"""
107 | if n in (0, 1):
108 | return n
109 | return fibonacci(n - 2) + fibonacci(n - 1)
110 |
111 | # Now, running the help function produces the expected result, even though the
112 | # function is decorated.
113 |
114 | help(fibonacci)
115 | # Help on function fibonacci in module __main__:
116 | #
117 | # fibonacci(n)
118 | # Return the n-th Fibonacci number
119 |
120 | # Calling help is just one example of how decorators can subtly cause
121 | # problems. Python functions have many other standard attributes
122 | # (e.g. __name__, __module__) that must be preserved to maintain the
123 | # interface of functions in the language. Using wraps ensures that you'll
124 | # always get the correct behavior.
125 |
126 |
127 | # Things to remember
128 |
129 | # 1. Decorators are Python syntax for allowing one function to modify another
130 | # function at runtime.
131 | # 2. Using decorators can cause strange behaviors in tools that do
132 | # introspection, such as debuggers.
133 | # 3. Use the wraps decorator from the functools built-in module when you
134 | # define your own decorators to avoid any issues.
135 |
--------------------------------------------------------------------------------
/item_43_consider_contexlib.py:
--------------------------------------------------------------------------------
1 | # Item 43: Consider contextlib and with statements for reusable try/finally
2 | # behavior
3 | from threading import Lock
4 | import logging
5 | from contextlib import contextmanager
6 |
7 |
8 | # The with statement in Python is used to indicate when code is running in a
9 | # special context. For example, mutual exclusion locks (see Item 38: "Use lock
10 | # to prevent data races in threads") can be used in with statements to
11 | # indicate that the indented code only runs while the lock is held.
12 |
13 | lock = Lock()
14 | with lock:
15 | print('Lock is held')
16 |
17 | # The example above is equivalent to this try/finally construction because the
18 | # Lock class properly enables the with statement.
19 |
20 | lock.acquire()
21 | try:
22 | print('Lock is held')
23 | finally:
24 | lock.release()
25 |
26 | # The with statement version of this is better because it eliminates the need
27 | # to write the repetitive code of the try/finally construction. It's easy to
28 | # make your objects and functions capable of use in with statements by using
29 | # the contextlib built-in module. This module contains the contextmanager
30 | # decorator, which lets a simple function be used in with statements. This is
31 | # much easier than defining a new class with the special methods __enter__ and
32 | # __exit__ (the standard way).
33 |
34 | # For example, say you want a region of your code to have more debug logging
35 | # sometimes. Here, I define a function that does logging at two severity
36 | # levels:
37 |
38 |
39 | def my_function():
40 | logging.debug('Some debug data')
41 | logging.error('Error log here')
42 | logging.debug('More debug data')
43 |
44 | # The default log level for my program is WARNING, so only the error message
45 | # will print to screen when I run the function.
46 |
47 | # my_function()
48 | # ERROR:root:Error log here
49 |
50 | # I can elevate the log level of this function temporarily by defining a
51 | # context manager. This helper function boosts the logging severity level
52 | # before running the code in the with block and reduces the logging severity
53 | # level afterward.
54 |
55 |
56 | @contextmanager
57 | def debug_logging(level):
58 | logger = logging.getLogger()
59 | old_level = logger.getEffectiveLevel()
60 | logger.setLevel(level)
61 | try:
62 | yield
63 | finally:
64 | logger.setLevel(old_level)
65 |
66 |
67 | # The yield expression is the point at which the with block's contents will
68 | # execute. Any exceptions that happen in the with block will be re-raised by
69 | # the yield expression for you to catch in the helper function (see Item 40:
70 | # "Consider coroutines to run many functions concurrently" for an explanation
71 | # of how that works).
72 |
73 | # Now, I can call the sam logging function again, but in the debug_logging
74 | # context. This time, all of the debug messages are printed to the screen
75 | # during the with block. The same function running outside the with block
76 | # won't print debug messages.
77 |
78 | with debug_logging(logging.DEBUG):
79 | print('Inside:')
80 | my_function()
81 | print('After:')
82 | my_function()
83 | # Inside:
84 | # DEBUG:root:More debug data
85 | # After:
86 | # ERROR:root:Error log here
87 |
88 |
89 | # Using with Targets
90 |
91 | # The context manager passed to a with statement may also return an object.
92 | # This object is assigned to a local variable in the as part of the compound
93 | # statement. This gives the code running in the with block the ability to
94 | # directly interact with its context.
95 |
96 | # For example, say you want to write a file and ensure that it's always closed
97 | # correctly. You can do this by passing open to the with statement. open
98 | # returns a file handle for the as target of with and will close the handle
99 | # when the with block exits.
100 |
101 | with open('/tmp/my_output.txt', 'w') as handle:
102 | handle.write('This is some data!')
103 |
104 |
105 | # This approach is preferable to manually opening and closing the file handle
106 | # every time. It gives you confidence that the file is eventually closed when
107 | # execution leaves the with statement. It also encourages you to reduce the
108 | # amount of code that executes while the file handle is open, which is good
109 | # practice in general.
110 |
111 | # To enable your own functions to supply values for as targets, all you need
112 | # to is yield a value from your context manager. For example, here I define
113 | # a context manager to fetch a Logger instance, set its level, and then yield
114 | # it for the as target.
115 |
116 |
117 | @contextmanager
118 | def log_level(level, name):
119 | logger = logging.getLogger(name)
120 | old_level = logger.getEffectiveLevel()
121 | logger.setLevel(level)
122 | try:
123 | yield logger
124 | finally:
125 | logger.setLevel(old_level)
126 |
127 |
128 | # Calling logging methods like debug on the as target will produce output
129 | # because the logging severity level is set how enough in the with block.
130 | # Using the logging module directly won't print anything because the default
131 | # logging severity level for the default program logger is WARNING.
132 |
133 |
134 | with log_level(logging.DEBUG, 'my_log') as logger:
135 | logger.debug('This is my message!')
136 | logging.debug('This will not print')
137 |
138 |
139 | # After the with statement exits, calling debug logging methods on the Logger
140 | # named 'my-log' will not print anything because the default logging severity
141 | # level has been restored. Error log messages will always print.
142 |
143 | logger = logging.getLogger('my_log')
144 | logger.debug('Debug will not print')
145 | logger.error('Error will print')
146 | # DEBUG:my_log:This is my message!
147 | # ERROR:my_log:Error will print
148 |
149 |
150 | # Things to remember
151 |
152 | # 1. The with statement allows you to reuse logic from try/finally blocks and
153 | # reduce visual noise.
154 | # 2. The contextlib built-in module provides a contextmanager decorator that
155 | # makes it easy to use your own functions in with statements.
156 | # 3. The value yielded by context managers is supplied to the as part of the
157 | # with statement. It's useful for letting your code directly access the
158 | # cause of the special context.
159 |
--------------------------------------------------------------------------------
/item_45_use_datetime.py:
--------------------------------------------------------------------------------
1 | # Item 45: Use datetime instead of time for local clocks
2 |
3 |
4 | # Coordinate Universal Time (CUT) is the standard, time-zone-independent
5 | # representation of time. UTC works great for computers that represent time as
6 | # seconds since the UNIX epoch. But UTC isn't ideal for humans. Humans
7 | # reference time relative to where they're currently located. People say
8 | # "noon" or "8 am" instead of "UTC 15:00 minus 7 hours." If your program
9 | # handles times, you'll probably fin yourself converting time between UTC and
10 | # local clocks to make it easier for humans to understand.
11 |
12 | # Python provides two ways of accomplishing time zone conversation. The old
13 | # way, using the time built-in module, is disastrously error prone. The new
14 | # way, using the datetime built-in module, works great with some help from
15 | # the community-built package named pytz.
16 |
17 | # You should be acquainted with both time and datetime to thoroughly
18 | # understand why datetime is the best choice and time should be avoided.
19 |
20 |
21 | # The time Module
22 |
23 | # The localtime function from the time built-in module lets you convert a UNIX
24 | # timestamp (seconds since the UNIX epoch in UTC) to a local time that matches
25 | # the host computer's time zone (Pacific Daylight Time, in my case).
26 |
27 | from time import localtime, strftime
28 |
29 | now = 1407694710
30 | local_tuple = localtime(now)
31 | time_format = '%Y-%m-%d %H:%M:%S'
32 | time_str = strftime(time_format, local_tuple)
33 | print(time_str)
34 | # 2014-08-11 02:18:30
35 |
36 | # You'll often need to go the other way as well, starting with user input in
37 | # local time and converting it to UTC time. You can do this by using the
38 | # strptime function to parse the time string, then call mktime to convert
39 | # local time to a UNIX timestamp.
40 |
41 | from time import mktime, strptime
42 |
43 | time_tuple = strptime(time_str, time_format)
44 | utc_now = mktime(time_tuple)
45 | print(utc_now)
46 | # 1407694710.0
47 |
48 | # How do you convert local time in one time zone to local time in another? For
49 | # example, say you are taking a flight between San Francisco and New York, and
50 | # want to know what time it will be in San Francisco once you've arrived in
51 | # New York.
52 |
53 | # Directly manipulating the return values from the time, localtime, and
54 | # strptime functions to do time zone conversations is a bad idea. Time zones
55 | # change all the time due to local laws. It's too complicated to manage
56 | # yourself, especially if you want to handle every global city for flight
57 | # departure and arrival.
58 |
59 | # Many operating systems have configuration files that keep up with the time
60 | # zone changes automatically. Python lets you use these time zones through the
61 | # time module. For example, here I parse the departure time from the San
62 | # Francisco time zone of Pacific Daylight Time:
63 |
64 | parse_format = '%Y-%m-%d %H:%M:%S'
65 | depart_sfo = '2014-05-01 15:45:16'
66 | # parse_format = '%Y-%m-%d %H:%M:%S %Z'
67 | # depart_sfo = '2014-05-01 15:45:16 PDT'
68 | time_tuple = strptime(depart_sfo, parse_format)
69 | time_str = strftime(time_format, time_tuple)
70 | print(time_str)
71 | # 2014-05-01 15:45:16
72 | # ValueError: time data '2014-05-01 15:45:16 PDT' does not match format '%Y-%m-%d %H:%M:%S %Z'
73 |
74 | # After seeing that PDT works with the strptime function, you might also
75 | # assume that time zones known to my computer will also work. Unfortunately,
76 | # this isn't the case. Instead, strptime raises an exception when it sees
77 | # Eastern Daylight Time (the time zone for New York).
78 |
79 | arrival_nyc = '2014-05-01 23:33:24 EDT'
80 | # time_tuple = strptime(arrival_nyc, time_format)
81 | # ValueError: unconverted data remains: EDT
82 |
83 | # The problem here is the platform-dependent nature of the time module. Its
84 | # actual behavior is determined by how the underlying C functions work with
85 | # the host operating system. This makes the funcionality of the time module
86 | # unreliable in Python. The time module fails to consistently work properly
87 | # for multiple local times. Thus, you should avoid the time module for this
88 | # purpose. If you must use time, only use it to convert between UTC and the
89 | # host computer's local time. For all other types of conversations, use the
90 | # datetime module.
91 |
92 |
93 | # The datetime Module
94 |
95 | # The second option for representing times in Python is the datetime class
96 | # from the datetime built-in module. Like the time module, datetime can be
97 | # used to convert from the current time in UTC to local time.
98 |
99 | # Here, I take the present time in UTC and convert it to my computer's local
100 | # time (Pacific Dayligh Time):
101 |
102 | from datetime import datetime, timezone
103 |
104 | now = datetime(2014, 8, 10, 18, 18, 30)
105 | now_utc = now.replace(tzinfo=timezone.utc)
106 | now_local = now_utc.astimezone()
107 | print(now_local)
108 | # 2014-08-11 02:18:30+08:00
109 |
110 | # The datetime module can also easily convert a local time back to a UNIX
111 | # timestamp in UTC.
112 |
113 | time_str = '2014-08-10 11:18:30'
114 | now = datetime.strptime(time_str, time_format)
115 | time_tuple = now.timetuple()
116 | utc_now = mktime(time_tuple)
117 | print(utc_now)
118 | # 1407640710.0
119 |
120 | # Unlike the time module, the datetime module has facilities for reliably
121 | # converting from one local time to another local time. However, datetime
122 | # only provides the machinery for time zone operations with its tzinfo class
123 | # and related methods. What's missing are the time zone definitions basides
124 | # UTC.
125 |
126 | # Luckily, the Python community has addressed this gap with the pytz module
127 | # that's available for download from the Python Package Index
128 | # (https://pypi.python.org/pypi/pytz/). pytz contains a full database of every
129 | # time zone definition you might need.
130 |
131 | # To use pytz effectively, you should always convert local times to UTC first.
132 | # Perform any datetime operations you need on the UTC values (such as
133 | # offsetting). Then, convert to local times as a final step.
134 |
135 | # For example, here I convert an NYC flight arrival time to a UTC datetime.
136 | # Although some of these calls seem redundant, all of them are necessary when
137 | # using pytz.
138 |
139 | import pytz
140 |
141 | arrival_nyc = '2014-05-01 23:33:24'
142 | nyc_dt_naive = datetime.strptime(arrival_nyc, time_format)
143 | eastern = pytz.timezone('US/Eastern')
144 | nyc_dt = eastern.localize(nyc_dt_naive)
145 | utc_dt = pytz.utc.normalize(nyc_dt.astimezone(pytz.utc))
146 | print(utc_dt)
147 | # 2014-05-02 03:33:24+00:00
148 |
149 | # Once I have a UTC datetime, I can convert it to San Francisco local time.
150 |
151 | pacific = pytz.timezone('US/Pacific')
152 | sf_dt = pacific.normalize(utc_dt.astimezone(pacific))
153 | print(sf_dt)
154 | # 2014-05-01 20:33:24-07:00
155 |
156 | # Just as easily, I can convert it to the local time in Nepal.
157 |
158 | nepal = pytz.timezone('Asia/Katmandu')
159 | nepal_dt = nepal.normalize(utc_dt.astimezone(nepal))
160 | print(nepal_dt)
161 | # 2014-05-02 09:18:24+05:45
162 |
163 | # With datetime and pytz, these conversations are consistent across all
164 | # environments regardless of what operating system the host computer is
165 | # running.
166 |
167 |
168 | # Things to remember
169 |
170 | # 1. Avoid using the time module for translating between different time zones.
171 | # 2. Use the datetime built-in module along with the pytz module to reliably
172 | # convert between times in different time zones.
173 | # 3. Always represent time in UTC and do conversations to local time as the
174 | # final step before presentation.
175 |
--------------------------------------------------------------------------------
/item_47_use_decimal.py:
--------------------------------------------------------------------------------
1 | # Item 47: Use decimal when precision ia paramount
2 | from decimal import Decimal
3 | from decimal import ROUND_UP
4 |
5 |
6 | # Python is excellent language for writing code that interacts with numerical
7 | # data. Python's integer type can represent values of any practical size. Its
8 | # double-precision floating point type complies with the IEEE 754 standard.
9 | # The language also provides a standard complex number type for imaginary
10 | # values. However, these aren't enough for every situation.
11 |
12 | # For example, say you want to compute the amount to charge a customer for an
13 | # international phone call. You know the time in minutes and seconds that the
14 | # customer was on the phone (say, 3 minutes 42 seconds). You also have a set
15 | # rate for the cost of calling Antarctica from the United States
16 | # ($1.45/minute). What should the charge be?
17 |
18 | # With floating point math, the computed charge seems reasonable.
19 |
20 | rate = 1.45
21 | seconds = 3*60 + 42
22 | cost = rate * seconds / 60
23 | print(cost)
24 | # 5.364999999999999
25 |
26 | # But rounding it to the nearest whole cent rounds down when you want it to
27 | # round up to properly cover all costs incurred by the customer.
28 |
29 | print(round(cost, 2))
30 | # 5.36
31 |
32 | # Say you also want to support very short phone calls between places that are
33 | # much cheaper to connect. Here, I compute the charge for a phone call that
34 | # was 5 seconds long with a rate of $0.05/minute:
35 |
36 | rate = 0.05
37 | seconds = 5
38 | cost = rate * seconds / 60
39 | print(cost)
40 | # 0.004166666666666667
41 |
42 | # The resulting float is so low that it rounds down to zero. This won't do!
43 |
44 | print(round(cost, 2))
45 | # 0.0
46 |
47 | # The solution is to use the Decimal class from the decimal built-in module.
48 | # The Decimal class provides fixed point math of 28 decimal points by default.
49 | # It can go even higher if required. This works around the precision issues in
50 | # IEEE 754 floating point numbers. The class also gives you more control over
51 | # rounding behaviors.
52 |
53 | # For example, redoing the Antarctica calculation with Decimal results in an
54 | # exact charge instead of an approximation.
55 |
56 | rate = Decimal('1.45')
57 | seconds = Decimal('222') # 3*60 + 42
58 | cost = rate * seconds / Decimal('60')
59 | print(cost)
60 | # 5.365
61 |
62 | # The Decimal class has a built-in function for rounding to exactly the
63 | # decimal place you need with rounding behavior you want.
64 |
65 | rounded = cost.quantize(Decimal('0.01'), rounding=ROUND_UP)
66 | print(rounded)
67 | # 5.37
68 |
69 | # Using the quantize method this way also properly handles the small usage
70 | # case for short, cheep phone calls. Here, you can see the Decimal cost is
71 | # still less than 1 cent fro the call:
72 |
73 | rate = Decimal('0.05')
74 | seconds = Decimal('5')
75 | cost = rate * seconds / Decimal('60')
76 | print(cost)
77 | # 0.004166666666666666666666666667
78 |
79 | # But the quantize behavior ensures that this is rounded up to one whole cent.
80 |
81 | rounded = cost.quantize(Decimal('0.01'), rounding=ROUND_UP)
82 | print(rounded)
83 | # 0.01
84 |
85 | # While Decimal works great for fixed point numbers, it still has limitations
86 | # in its precision (e.g. 1/3 will be an approximation). For representing
87 | # rational numbers with no limit to precision, consider using the Fraction
88 | # class from the fractions built-in module.
89 |
90 |
91 | # Things to remember
92 |
93 | # 1. Python has built-in types and classes in modules that can represent
94 | # practically every type of numerical value.
95 | # 2. The Decimal class is ideal for situations that require high precision and
96 | # exact rounding behavior, such as computations of monetary values.
97 |
--------------------------------------------------------------------------------
/item_48_community_built_modules.py:
--------------------------------------------------------------------------------
1 | # Item 48: Know where to find community built modules
2 |
3 |
4 | # Python has a central repository of modules (https://pypi.python.org) for you
5 | # to install and use in your programs. These modules are built and maintained
6 | # by people like you: the Python community. When you find yourself facing an
7 | # unfamiliar challenge, the Python Package Index (PyPI) is a great place to
8 | # look for code that will get you closer to your goal.
9 |
10 | # To use the Package Index, you'll need to use a command-line tool named pip.
11 | # pip is installed by default in Python 3.4 and above (it's also accessible
12 | # with python -m pip). For earlier versions, you can find instructions for
13 | # pip on the Python Packaging website (https:packaging.python.org).
14 |
15 | # Once installed, using pip to install a new module is simple. For example,
16 | # here I install the pytz module that I used in another item in this chapter
17 | # (see Item 45: Use datatime instead of time for local clocks).
18 |
19 | # $ pip3 install pytz
20 |
21 | # In the example above, I used the pip3 command-line to install the Python 3
22 | # version of the package. The pip command-line (without the 3) is also
23 | # available for installing packages for Python 2. The majority of popular
24 | # packages are now available for either version of Python (see Item 1: "Know
25 | # which version of Python you're using"). pip can also be used with pyvenv to
26 | # track sets of packages to install for your projects (see Item 53: "Use
27 | # virtual environments for isolated and reproducible dependencies").
28 |
29 | # Each module in the PyPI has its own software license. Most of the packages,
30 | # especially the popular ones, have free or open source licenses (see
31 | # http://opensource.org for details). In most cases, these licenses how you to
32 | # include a copy of the module with your program (when in doubt, talk to a
33 | # lawyer).
34 |
35 |
36 | # Things to remember
37 |
38 | # 1. The Python Package Index (PyPI) contains a wealth of common packages
39 | # that are built and maintained by the Python community.
40 | # 2. pip is the command-line to use for installing packages from PyPI.
41 | # 3. pip is installed by default in Python 3.4 and above; you must install it
42 | # yourself for older versions.
43 | # 4. The majority of PyPI modules are free and open source software.
44 |
--------------------------------------------------------------------------------
/item_51_define_a_root_exception.py:
--------------------------------------------------------------------------------
1 | # Item 51: Define a root exception to insulate callers from APIs
2 |
3 |
4 | # When you're defining a module's API, the exceptions you throw are just as
5 | # much a part of your interface as the functions and classes you define (see
6 | # Item 14: "Prefer exceptions to returning None").
7 |
8 | # Python has a built-in hierarchy of exceptions for the language and standard
9 | # library. There's a draw to using the built-in exception types for reporting
10 | # errors instead of defining your own new types. For example, you could raise
11 | # a ValueError exception whenever an invalid parameter is passed to your
12 | # function.
13 |
14 |
15 | def determine_weight(volume, density):
16 | if density < 0:
17 | raise ValueError('Density must be positive')
18 | # ...
19 |
20 |
21 | # In some cases, using ValueError makes sense, but for APIs it's much more
22 | # powerful to define your own hierarchy of exceptions. You can do this by
23 | # providing a root Exception in your module. Then, have all other exceptions
24 | # raised by that module inherit from the root exception.from
25 |
26 | # my_module.py
27 | class Error(Exception):
28 | """Base-class for all exceptions raised by this module."""
29 | pass
30 |
31 |
32 | class InvalidDensityError(Error):
33 | """There was a problem with a provided density value."""
34 | pass
35 |
36 | # Having a root exception in a module makes it easy for consumers of your API
37 | # to catch all of the exceptions that you raise on purpose. For example, here
38 | # a consumer of your API makes a function all with a try/except statement that
39 | # catches your root exception:
40 |
41 | # try:
42 | # weight = my_module.determine_weight(1, -1)
43 | # except my_module.Error as e:
44 | # logging.error('Unexpected error: %s', e)
45 |
46 | # The try/except prevents your API's exceptions from progagating too far
47 | # upward and breaking the calling program. It insulates the calling code from
48 | # your API. This insulation has three helpful effects.
49 |
50 | # First, root exceptions let callers understand when there's a problem with
51 | # their usage of your API. If callers are using your API properly, they should
52 | # catch the various exceptions that you deliberately raise. If they don't
53 | # handle such an exception, it will propagate all the way up to the insulating
54 | # except block that catches your module's root exception. That block can bring
55 | # the exception to the attention of the API consumer, giving them a chance to
56 | # add proper handling of the exception type.
57 |
58 | # try:
59 | # weight = my_module.determine_weight(1, -1)
60 | # except my_module.InvalidDensityError:
61 | # weight = 0
62 | # except my_module.Error as e:
63 | # logging.error('Bug in the calling code: %s', e)
64 |
65 | # The second advantage of using root exceptions is that they can help find
66 | # bugs in your API module's code. If your code only deliberately raises
67 | # exceptions that you define within your module's hierarchy, then all other
68 | # types of exceptions raised by your module must be the ones that you didn't
69 | # intend to raise. These are bugs in your API's code.
70 |
71 | # Using the try/except statement above will not insulate API consumers from
72 | # bugs in your API module's code. To do that, the caller need to add another
73 | # except block that catches Python's base Exception class. This allows the
74 | # API consumer to detect when there's a bug in the API module's implementation
75 | # that needs to be fixed.
76 |
77 | # try:
78 | # weight = my_module.determine_weight(1, -1)
79 | # except my_module.InvalidDensityError:
80 | # weight = 0
81 | # except my_module.Error as e:
82 | # logging.error('Bug in the calling code: %s', e)
83 | # except Exception as e:
84 | # logging.error('Bug in the API code: %s', e)
85 |
86 | # The third impact of using root exceptions is future-proofing your API. Over
87 | # time, you may want to expand your API to provide more specific exceptions in
88 | # certain situation. For example, you could add an Exception subclass that
89 | # indicates the error condition of supplying negative densities.
90 |
91 |
92 | # my_module.py
93 | class NegativeDensityError(InvalidDensityError):
94 | """A provided density value was negative."""
95 | pass
96 |
97 |
98 | def determine_weight(volume, density):
99 | if density < 0:
100 | raise NegativeDensityError
101 |
102 |
103 | # The calling code will continue to work exactly as before because it already
104 | # catches InvalidDensityError exceptions (the parent class of
105 | # NegativeDensityError). In the future, the caller could decide to
106 | # special-case the new type of exception and change its behavior accordingly.
107 |
108 | # try:
109 | # weight = my_module.determine_weight(1, -1)
110 | # except my_module.NegativeDensityError as e:
111 | # raise ValueError('Must supply non-negative density') from e
112 | # except my_module.InvalidDensityError:
113 | # weight = 0
114 | # except my_module.Error as e:
115 | # logging.error('Bug in the calling code: %s', e)
116 | # except Exception as e:
117 | # logging.error('Bug in the API code: %s', e)
118 |
119 | # You can take API future-proofing further by providing a broader set of
120 | # exceptions directly below the root exception. For example, imagine you had
121 | # one set of errors related to calculating weights, another related to
122 | # calculating volume, and a third related to calculating density.
123 |
124 |
125 | # my_module.py
126 | class WeightError(Error):
127 | """Base-class for weight calculation errors."""
128 |
129 |
130 | class VolumeError(Error):
131 | """Base-class for volume calculation errors."""
132 |
133 |
134 | class DensityError(Error):
135 | """Base-class for density calculation errors."""
136 |
137 |
138 | # Specific exceptions would inherit from these general exceptions. Each
139 | # intermediate exception acts as its own kind of root exception. This makes
140 | # it easier to insulate layers of calling code from API code based on broad
141 | # functionality. This is much better than having all callers catch a long
142 | # list of very specific Exception subclasses.
143 |
144 |
145 | # Things to remember
146 |
147 | # 1. Defining root exceptions for your modules allows API consumers to
148 | # insulate themselves from your API.
149 | # 2. Catching root exceptions can help you find bugs in code that consumes an
150 | # API.
151 | # 3. Catching the Python Exception base class can help you find bugs in API
152 | # implementations.
153 | # 4. Intermediate root exceptions let you add more specific types of
154 | # exceptions in the future without breaking your API consumers.
155 |
--------------------------------------------------------------------------------
/item_52_break_circular_dependencies.py:
--------------------------------------------------------------------------------
1 | # Item 52: Know how to break circular dependencies
2 |
3 |
4 | # Inevitably, while you're collaborating with others, you'll find a mutual
5 | # interdependency between modules. It can even happen while you work by
6 | # yourself on the various parts of a single program.
7 |
8 | # There are three other ways to break circular dependencies.
9 |
10 | # Reordering Imports
11 | # The first approach is to change the order of imports.
12 |
13 | # Import, Configure, Run
14 | # A second solution to the circular imports problem is to have your modules
15 | # minimize side effects at import time.
16 |
17 | # Dynamic Import
18 | # The third--and often simplest--solution to the circular imports problem is
19 | # to use an import statement within a function or method. This is called a
20 | # dynamic import because the module import happens while the program is
21 | # running, not while the program is first starting up and initializing its
22 | # modules.
23 |
24 |
25 | # Things to remember
26 |
27 | # 1. Circular dependencies happen when two modules must call into each other
28 | # at import time. They can cause your program to crash at startup.
29 | # 2. The best way to break a circular dependency is refactoring mutual
30 | # dependencies into a separate module at the bottom of the dependency tree.
31 | # 3. Dynamic imports are the simplest solution for breaking a circular
32 | # dependency between modules while minimizing refactoring and complexity.
33 |
--------------------------------------------------------------------------------
/item_54_consider_module_scoped_code.py:
--------------------------------------------------------------------------------
1 | # Chapter 8: Production
2 |
3 |
4 | # Putting a Python program to use requires moving it from a development
5 | # environment to a production environment. Supporting disparate configurations
6 | # like this can be a challenge. Making programs that are dependable in
7 | # multiple situations is just as important as making programs with correct
8 | # functionality.
9 |
10 | # The goal is to productionize your Python programs and make them bulletproof
11 | # while they're in use. Python has built-in modules that aid in hardening your
12 | # programs. It provides facilities for debugging, optimizing, and testing to
13 | # maximize the quality and performance of your programs at runtime.
14 |
15 |
16 | # Item 54: Consider module-scoped code to configure deployment environments
17 |
18 | # A deployment environment is a configuration in which your program runs.
19 | # Every program has at least one deployment environment, the production
20 | # environment. The goal of writing a program in the first place is to put it
21 | # to work in the production environment and achieve some kind of outcome.
22 |
23 | # Writing or modifying a program requires being able to run it on the computer
24 | # you use for developing. The configuration of your development may be much
25 | # different from your production environments have the same Python packages
26 | # installed. The trouble is that production environment often require many
27 | # external assumptions that are hard to reproduce in development environments.
28 |
29 | # For example, say you want to run your program in a web server container and
30 | # give it access to a database. This means that very time you want to modify
31 | # your program's code, you need to run a server container, the database must
32 | # be set up properly, and your program needs to password for access. That's a
33 | # very high cost if all you're trying to do is verify that a one-line change
34 | # to your program works correctly.
35 |
36 | # The best way to work around these issues is to override parts of your
37 | # program at startup time to provide different functionality depending on the
38 | # deployment environment. For example, you could have two different __main__
39 | # files, one for production and one for development.
40 |
41 | # # dev_main.py
42 | # TESTING = True
43 | # import db_connection
44 | # db = db_connection.Database()
45 |
46 | # # prod_main.py
47 | # TESTING = False
48 | # import db_connection
49 | # db = db_connection.Database()
50 |
51 | # The only difference between the two files is the value of the TESTING
52 | # constant. Other modules in your program can then import the __main__ module
53 | # and use the value of TESTING to decide how they define their own attributes.
54 |
55 | # # db_connection.py
56 | # import __main__
57 | #
58 | #
59 | # class TestingDatabase(object):
60 | # #...
61 | # pass
62 | #
63 | #
64 | # class RealDatabase(object):
65 | # #...
66 | # pass
67 | #
68 | #
69 | # if __main__.TESTING:
70 | # Database = TestingDatabase
71 | # else:
72 | # Database = RealDatabase
73 |
74 | # The key behavior to notice here is that code running in module scope--not
75 | # inside any function or method--is just normal Python code. You can use an
76 | # if statement at the module level to decide how the module will define names.
77 | # This makes it easy to tailor modules to your various deployment
78 | # environments. You avoid having to reproduce costly assumptions like
79 | # database configurations when they aren't needed. You can inject fake or mock
80 | # implementations that ease interactive development and testing (see Item 56:
81 | # "Test everything with unittest")
82 |
83 | # Note
84 | # Once your deployment environments get complicated, you should consider moving
85 | # them out of Python constants (like TESTING) and into dedicated configuration
86 | # files. Tools like the configparser built-in module let you maintain
87 | # production configurations separate from code, a distinction that's crucial for
88 | # collaborating with an operations team.
89 |
90 | # This approach can be used for more than working around external assumptions.
91 | # For example, if you know that your program must work differently based on its
92 | # host platform, you can inspect the sys module before defining top-level
93 | # constructs in a module.
94 |
95 | # db_connection.py
96 | import sys
97 |
98 |
99 | class Win32Database(object):
100 | #...
101 | pass
102 |
103 |
104 | class PosixDatabase(object):
105 | #...
106 | pass
107 |
108 |
109 | if sys.platform.startswith('win32'):
110 | Database = Win32Database
111 | else:
112 | Database = PosixDatabase
113 |
114 |
115 | # Similarly, you can use environment variable from os.environ to guide your
116 | # module definitions.
117 |
118 |
119 | # Things to remember
120 |
121 | # 1. Programs often need to run in multiple deployment environments that each
122 | # have unique assumptions and configurations.
123 | # 2. You can tailor a module's contents to different deployment environments
124 | # by using normal Python statements in module scope.
125 | # 3. Module contents can be the product of any external condition, including
126 | # host introspection through the sys and os modules.
127 |
--------------------------------------------------------------------------------
/item_55_use_repr_strings.py:
--------------------------------------------------------------------------------
1 | # Item 55: Use repr strings for debugging output
2 |
3 |
4 | # When debugging a Python program, the print function (or output via the
5 | # logging built-in module) will get you surprisingly far. Python internals are
6 | # often easy to access via plain attributes (see Item 27: "Prefer public
7 | # attributes over private ones"). All you need to do is print how the state of
8 | # your program changes while it runs and see where it goes wrong.
9 |
10 | # The print function outputs a human-readable string version of whatever you
11 | # supply it. For example, printing a basic string will print the contents of
12 | # the string without the surrounding quote characters.
13 |
14 | # The problem is that the human-readable string for a value doesn't make it
15 | # clear what the actual type of the value is. For example, notice how in the
16 | # default output of print you can't distinguish between the types of the
17 | # number 5 and the string '5'.
18 |
19 | print(5)
20 | print('5')
21 | # 5
22 | # 5
23 |
24 | # If you're debugging a program with print, these type differences matter.
25 | # What you almost always want while debugging is to see the repr version of an
26 | # object. The repr built-in function returns the printable representation of
27 | # an object, which should be its most clearly understandable string
28 | # representation. For built-in types, the string returned by repr is a valid
29 | # Python expression.
30 |
31 | a = '\x07'
32 | print(repr(a))
33 | # '\x07'
34 |
35 | # Passing the value from repr to the eval built-in function should result in
36 | # the same Python object you started with (of course, in practice, you should
37 | # only use eval with extreme caution).
38 |
39 | b = eval(repr(a))
40 | assert a == b
41 |
42 | # When you're debugging with print, you should repr the value before printing
43 | # to ensure that any difference in types is clear.
44 |
45 | print(repr(5))
46 | print(repr('5'))
47 | # 5
48 | # '5'
49 |
50 | # For dynamic Python objects, the default human-readable string value is the
51 | # same as the repr value. This means that passing a dynamic object to print
52 | # will do the right thing, and you don't need to explicitly call repr on it.
53 | # Unfortunately, the default value of repr for object instances isn't
54 | # especially helpful. For example, here I define a simple class and then print
55 | # its value:
56 |
57 |
58 | class OpaqueClass(object):
59 | def __init__(self, x, y):
60 | self.x = x
61 | self.y = y
62 |
63 |
64 | obj = OpaqueClass(1, 2)
65 | print(obj)
66 | print(repr(obj))
67 | # <__main__.OpaqueClass object at 0x7f454b200828>
68 | # <__main__.OpaqueClass object at 0x7f454b200828>
69 |
70 | # This output can't be passed to the eval function, and it says nothing about
71 | # the instance fields of the object.
72 |
73 | # There are two solutions to this problem. If you have control of the class,
74 | # you can define your own __repr__ special method that returns a string
75 | # containing the Python expression that recreates the object. Here, I define
76 | # that function for the class above:
77 |
78 |
79 | class BetterClass(object):
80 | def __init__(self, x, y):
81 | self.x = x
82 | self.y = y
83 |
84 | def __repr__(self):
85 | return 'BetterClass(%d, %d)' % (self.x, self.y)
86 |
87 |
88 | # Now, the repr value is much more useful.
89 |
90 | obj = BetterClass(1, 2)
91 | print(obj)
92 | print(repr(obj))
93 | # BetterClass(1, 2)
94 | # BetterClass(1, 2)
95 |
96 | # When you don't have control over the class definition, you can reach into
97 | # the object's instance dictionary, which is stored in the __dict__
98 | # attribute. Here, I print out the contents of an OpaqueClass instance:
99 |
100 | obj = OpaqueClass(4, 5)
101 | print(obj.__dict__)
102 | # {'y': 5, 'x': 4}
103 |
104 |
105 | # Things to remember
106 |
107 | # 1. Calling print on built-in Python types will produce the human-readable
108 | # string version of a value, which hides type information.
109 | # 2. Calling repr on built-in Python types will produce the printable string
110 | # version of a value. These repr strings could be passed to the eval
111 | # built-in function to get back the original value.
112 | # 3. %s in format strings will produce human-readable strings like str.%r will
113 | # produce printable strings like repr.
114 | # 4. You can define the __repr__ method to customize the printable
115 | # representation of a class and provide more detailed debugging
116 | # information.
117 | # 5. You can reach into any object's __dict__ attribute to view its internals.
118 |
--------------------------------------------------------------------------------
/item_56_test_utils.py:
--------------------------------------------------------------------------------
1 | # utils_test.py
2 | from unittest import TestCase, main
3 | from item_56_utils import to_str
4 | from tempfile import TemporaryDirectory
5 |
6 |
7 | class UtilsTestCase(TestCase):
8 | def test_to_str_bytes(self):
9 | self.assertEqual('hello', to_str(b'hello'))
10 |
11 | def test_to_str_str(self):
12 | self.assertEqual('hello', to_str('hello'))
13 |
14 | def test_to_str_bad(self):
15 | self.assertRaises(TypeError, to_str, object())
16 |
17 |
18 | class MyTest(TestCase):
19 | def setUp(self):
20 | self.test_dir = TemporaryDirectory()
21 |
22 | def tearDown(self):
23 | self.test_dir.cleanup()
24 |
25 | def test_to_str_bytes(self):
26 | self.assertEqual('hello', to_str(b'hello'))
27 |
28 | def test_to_str_str(self):
29 | self.assertEqual('hello', to_str('hello'))
30 |
31 | def test_to_str_bad(self):
32 | self.assertRaises(TypeError, to_str, object())
33 |
34 |
35 | if __name__ == '__main__':
36 | main()
37 |
--------------------------------------------------------------------------------
/item_56_unittest.py:
--------------------------------------------------------------------------------
1 | # Item 56: Test everything with unittest
2 | from unittest import TestCase, main
3 | from item_56_utils import to_str
4 |
5 |
6 | # Python doesn't have static type checking. There's nothing int eh compiler
7 | # that ensure your program will work when you run it. With Python you don't
8 | # know whether the functions your program calls will be defined at runtime,
9 | # even when their existence is evident in the source code. This dynamic
10 | # behavior is a blessing and a curse.
11 |
12 | # The large numbers of Python programmers out there say it's worth it because
13 | # of the productivity gained from the resulting brevity and simplicity. But
14 | # most people have heard at least one horror story about Python in which a
15 | # program encountered a boneheaded error at run time.
16 |
17 | # One of the worst examples I've heard is when a SyntaxError was raised in
18 | # production as a side effect of a dynamic import (see Item 52: "Know how to
19 | # break circular dependencies"). The programmer I know who was hit by this
20 | # surprising occurrence has since ruled out using Python ever again.
21 |
22 | # But I have to wonder, why wasn't the code tested before the program was
23 | # deployed to production? Type safety isn't everything. You should always test
24 | # your code, regardless of what language it's written in. However, I'll admit
25 | # that the big difference between Python and many other languages is that the
26 | # only way to have any confidence in a Python program is by writing tests.
27 | # There is no veil of static type checking to make you feel safe.
28 |
29 | # Luckily, the same dynamic features that prevent static type checking in
30 | # Python also make it extremely easy to write test for your code. You can use
31 | # Python's dynamic nature and easily overridable behaviors to implement tests
32 | # and ensure that your programs work as expected.
33 |
34 | # You should think of tests as an insurance policy on your code. Good tests
35 | # give you confidence that your code is correct. If you refactor or expand
36 | # your code, tests make it easy to identify how behaviors have changed. It
37 | # sounds counter-intuitive, but having good tests actually makes it easier to
38 | # modify Python code, not harder.
39 |
40 | # The simplest way to write tests is to use the unittest built-in module. For
41 | # example, say you have the following utility function defined in utils.py
42 |
43 |
44 | def to_str(data):
45 | if isinstance(data, str):
46 | return data
47 | elif isinstance(data, bytes):
48 | return data.decode('utf-8')
49 | else:
50 | raise TypeError('Must supply str or bytes, '
51 | 'found: %r' % data)
52 |
53 |
54 | # To define tests, I create a second file named test_utils.py or utils_test.py
55 |
56 |
57 | class UtilsTestCase(TestCase):
58 | def test_to_str_bytes(self):
59 | self.assertEqual('hello', to_str(b'hello'))
60 |
61 | def test_to_str_str(self):
62 | self.assertEqual('hello', to_str('hello'))
63 |
64 | def test_to_str_bad(self):
65 | self.assertRaises(TypeError, to_str, object())
66 |
67 |
68 | if __name__ == '__main__':
69 | main()
70 |
71 |
72 | # Tests are organized into TestCase classes. Each test is a method beginning
73 | # with the word test. If a test method runs without raising any kind of
74 | # Exception (including AssertionError from assert statements), then the test
75 | # is considered to have passed successfully.
76 |
77 | # The TestCase class provides helper methods for making assertions in your
78 | # tests, such as assertEqual for verifying equality, assertTrue for verifying
79 | # that exceptions are raised when appropriate (see help (TestCase) for more).
80 | # You can define your own helper methods in TestCase subclasses to make your
81 | # tests more readable; just ensure that your method names don't begin with
82 | # the word test.
83 |
84 | # Note
85 | # Another common practice when writing test is to use mock functions and
86 | # classes to stub out certain behaviors. For this purpose, Python 3 provides
87 | # the unittest.mock built-in module, which is available for Python 2 as an
88 | # open source package.
89 |
90 | # Sometimes, your TestCase classes need to set up the test environment before
91 | # running test methods. To do this, you can override the setUp and tearDown
92 | # methods. These methods are called before and after each test method,
93 | # respectively, and they let you ensure that each test runs in isolation (an
94 | # important best practice of proper testing). For example, here I define a
95 | # TestCase that creates a temporary directory before each test and deletes its
96 | # contents after each test finishes:
97 |
98 |
99 | class MyTest(TestCase):
100 | def setUp(self):
101 | self.test_dir = TemporaryDirectory()
102 |
103 | def tearDown(self):
104 | self.test_dir.cleanup()
105 | #
106 | # def test_to_str_bytes(self):
107 | # self.assertEqual('hello', to_str(b'hello'))
108 | #
109 | # def test_to_str_str(self):
110 | # self.assertEqual('hello', to_str('hello'))
111 | #
112 | # def test_to_str_bad(self):
113 | # self.assertRaises(TypeError, to_str, object())
114 |
115 | # I usually define one TestCase for each set of related tests. Sometimes I
116 | # have one TestCase for each function that has many edge cases. Other times,
117 | # a TestCase spans all functions in a single module. I'll also create one
118 | # TestCase for testing a single class and all of its methods.
119 |
120 | # When programs get complicated, you'll want additional test for verifying
121 | # the interactions between your modules, instead of only testing code in
122 | # isolation. This is the difference between unit tests and integration tests.
123 | # In Python, it's important to write both types of test for exactly the same
124 | # reason: You have no guarantee that your modules will actually work together
125 | # unless you prove it.
126 |
127 | # Note
128 | # Depending on your project, it can also be useful to define data-driven tests
129 | # or organize test into different suites of related functionality. For these
130 | # purpose, code coverage reports, and other advanced use cases, the nose
131 | # (http://nose.readthedocs.org/) and pytest (http://pytest.org/) open source
132 | # packages can be especially helpful.
133 |
134 |
135 | # Things to remember
136 |
137 | # 1. The only way to have confidence in a Python program is to write tests.
138 | # 2. The unittest built-in module provides most of the facilities you'll need
139 | # to write good tests.
140 | # 3. You can define tests by subclassing TestCase and defining one method per
141 | # behavior you'd like to test. Test methods on TestCase classes must start
142 | # with the word test.
143 | # 4. It's important to write both unit tests (for isolated functionality) and
144 | # integration tests (for modules that interact).
145 |
--------------------------------------------------------------------------------
/item_56_utils.py:
--------------------------------------------------------------------------------
1 | # utils.py
2 |
3 |
4 | def to_str(data):
5 | if isinstance(data, str):
6 | return data
7 | elif isinstance(data, bytes):
8 | return data.decode('utf-8')
9 | else:
10 | raise TypeError('Must supply str or bytes, '
11 | 'found: %r' % data)
12 |
--------------------------------------------------------------------------------
/item_57_pdb.py:
--------------------------------------------------------------------------------
1 | # Item 57: Consider interactive debugging with pdb
2 |
3 |
4 | # Everyone encounters bugs in their code while developing programs. Using the
5 | # print function can help you track down the source of many issues (see Item
6 | # 55: "Use repr strings for debugging output"). Writing tests for specific
7 | # cases that cause trouble is another great way to isolate problems (see Item
8 | # 56: "Test everything with unittest").
9 |
10 | # But these tools aren't enough to find every root cause. When you need
11 | # something more powerful, it's time to try Python's built-in interactive
12 | # debugger. The debugger lets you inspect program state, print local
13 | # variables, and set through a Python program one statement at a time.
14 |
15 | # In most other programming language, you use a debugger by specifying what
16 | # line of a source file you'd like to stop on, then execute the program. In
17 | # contrast, with Python the easiest way to use the debugger is by modifying
18 | # your program to directly initiate the debugger just before you think you'll
19 | # have an issue worth investigating. There is no difference between running a
20 | # Python program under a debugger and running it normally.
21 |
22 | # To initiate the debugger, all you have to do is import the pdb built-in
23 | # module and run its set_trace function. You'll often see this done in a
24 | # single line so programmers can comment it out with a single # character.
25 |
26 |
27 | def complex_func(a, b, c):
28 | # ...
29 | import pdb
30 | pdb.set_trace()
31 |
32 |
33 | # As soon as this statement runs, the program will pause its execution. The
34 | # terminal that started you program will turn into an interactive Python
35 | # shell.
36 |
37 | # -> import pdb; pdb.set_trace()
38 | # (Pdb)
39 |
40 | # At the (Pdb) prompt, you can type in the same of local variables to see
41 | # their values printed out. You can see a list of all local variables by
42 | # calling the locals built-in function. YOu can import modules, inspect global
43 | # state, construct new objects, run the help built-in function, and even
44 | # modify parts of the program--whatever you need to to to aid in your
45 | # debugging. In addition, the debugger has three commands that make inspecting
46 | # the running program easier.
47 | # 1. bt: Print the trackback of the current execution call back. This lets you
48 | # figure out where you are in your program anc how you arrived at the
49 | # pdb.set_trace trigger point.
50 | # 2. up: Move your scope up the function call stack to the caller of the
51 | # current function. This allows you to inspect the local variables in
52 | # higher levels of the call stack.
53 | # 3. down: Move your scope back down the function call stack one level.
54 |
55 | # Once you're done inspecting the current state, you can use debugger commands
56 | # to resume the program's execution under precise control.
57 | # 1. step: Run the program until the next line of execution in the program,
58 | # then return control back to the debugger. If the next line of execution
59 | # includes calling a function, the debugger will stop in the function that
60 | # was called.
61 | # 2. next: Run the program until the line of execution in the current
62 | # function, then return control back to the debugger. If the next line of
63 | # execution includes calling a function, the debugger will not stop until
64 | # the called function has returned.
65 | # 3. return: Run the program until the current function returns, then return
66 | # control back to the debugger.
67 | # 4. continue: Continue running the program until the next breakpoint (or
68 | # set_trace is called again).
69 |
70 |
71 | # Things to remember
72 |
73 | # 1. You can initiate the Python interactive debugger at a point of interest
74 | # directly in your program with the import pdb; pdb.set_trace() statements.
75 | # 2. The Python debugger prompt is a full Python shell that lets you inspect
76 | # and modify the state of a running program.
77 | # 3. pdb shell commands let you precisely control program execution, allowing
78 | # you to alternate between inspecting program state and progressing program
79 | # execution.
80 |
--------------------------------------------------------------------------------
/item_59_use_tracemalloc.py:
--------------------------------------------------------------------------------
1 | # Item 59: Use tracemalloc to understand memory usage and leaks
2 |
3 |
4 | # Memory management in the default implementation of Python, CPython, uses
5 | # reference counting. This ensures that as soon as all references to an
6 | # object have expired, the referenced object is also cleared. CPython also
7 | # has a built-in cycle detector to ensure that self-referencing objects are
8 | # eventually garbage collected.
9 |
10 | # In theory, this means that most Python programmers don't have to worry about
11 | # allocating or deallocating memory in their programs. It's taken care of
12 | # automatically by the language and the CPython runtime. However, in practice,
13 | # programs eventually do run out of memory due to held reference. Figuring out
14 | # where your Python programs are using or leaking memory proves to be a
15 | # challenge.
16 |
17 | # The first way to debug memory usage is to ask the gc built-in module to list
18 | # every object currently known by the garbage collector. Although it's quite
19 | # a blunt tool, this approach does let you quickly get a sense of where your
20 | # program's memory is being used.
21 |
22 | # Here, I run a program that wastes memory by keeping references. It prints
23 | # out how many objects were created during execution and a small sample of
24 | # allocated objects.
25 |
26 | # item_59_use_tracemalloc_using_pc.py
27 | import item_59_use_tracemalloc_using_gc
28 | # 4944 objects before
29 | # 4955 objects after
30 | # {'_loaders': [('.cpython-35m-x86_64-linux-gnu.so', :476: size=485 B (+485 B), count=6 (+6), average=81 B
50 |
51 | # It's immediately clear which objects are dominating my program's memory
52 | # usage and where in the source code they were allocated.
53 |
54 | # The tracemalloc module can also print out the full stack trace of each
55 | # allocation (up to the number of frames passed to the start method). Here, I
56 | # print out the stack trace of the biggest source of memory usage in the
57 | # program:
58 |
59 | # item_59_use_tracemalloc_with_trace.py
60 | import item_59_use_tracemalloc_with_trace
61 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py", line 7
62 | # a.append(10 * 230 * i)
63 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_with_trace.py", line 6
64 | # x = waste_memory.run()
65 |
66 | # A stack trace like this is most valuable for figuring out which particular
67 | # usage of a common function is responsible for memory consumption in a
68 | # program.
69 |
70 | # Unfortunately, Python 2 doesn't provide the tracemalloc built-in module.
71 | # There are open source packages for tracking memory usage in Python 2 (such
72 | # as heapy), though they do not fully replicate the functionality of
73 | # tracemalloc.
74 |
75 |
76 | # Things to remember
77 |
78 | # 1. It can be difficult to understand how Python programs use and leak
79 | # memory.
80 | # 2. The gc module can help you understand which objects exist, but it has no
81 | # information about how they were allocated.
82 | # 3. The tracemalloc built-in module provides powerful tools for understanding
83 | # the source of memory usage.
84 | # 4. tracemalloc is only available in Python 3.4 and above.
85 |
--------------------------------------------------------------------------------
/item_59_use_tracemalloc_top_n.py:
--------------------------------------------------------------------------------
1 | import tracemalloc
2 | tracemalloc.start(10) # Save up to 10 stack frames
3 |
4 | time1 = tracemalloc.take_snapshot()
5 | import item_59_use_tracemalloc_waste_memory as waste_memory
6 | x = waste_memory.run()
7 | time2 = tracemalloc.take_snapshot()
8 |
9 | stats = time2.compare_to(time1, 'lineno')
10 | for stat in stats[:3]:
11 | print(stat)
12 |
13 | # /home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py:7: size=3539 KiB (+3539 KiB), count=100000 (+100000), average=36 B
14 | # /home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_top_n.py:6: size=1264 B (+1264 B), count=2 (+2), average=632 B
15 | # :476: size=485 B (+485 B), count=6 (+6), average=81 B
16 |
--------------------------------------------------------------------------------
/item_59_use_tracemalloc_using_gc.py:
--------------------------------------------------------------------------------
1 | import gc
2 | found_objects = gc.get_objects()
3 | print('%d objects before' % len(found_objects))
4 |
5 |
6 | import item_59_use_tracemalloc_waste_memory as waste_memory
7 | x = waste_memory.run()
8 | found_objects = gc.get_objects()
9 | print('%d objects after' % len(found_objects))
10 | for obj in found_objects[:3]:
11 | print(repr(obj)[:100])
12 |
13 | # 4916 objects before
14 | # 5446 objects after
15 | #
16 | #
17 | # {'_fields': ('context_expr', 'optional_vars'), '__doc__': None, '__module__': '_ast', '__weakref__':
--------------------------------------------------------------------------------
/item_59_use_tracemalloc_waste_memory.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def run():
4 | a = []
5 | for i in range(100000):
6 | c = i**2 + 1
7 | a.append(10 * 230 * i)
8 | return a
9 |
--------------------------------------------------------------------------------
/item_59_use_tracemalloc_with_trace.py:
--------------------------------------------------------------------------------
1 | import tracemalloc
2 | tracemalloc.start(10) # Save up to 10 stack frames
3 |
4 | time1 = tracemalloc.take_snapshot()
5 | import item_59_use_tracemalloc_waste_memory as waste_memory
6 | x = waste_memory.run()
7 | time2 = tracemalloc.take_snapshot()
8 |
9 | stats = time2.compare_to(time1, 'traceback')
10 | top = stats[0]
11 | print('\n'.join(top.traceback.format()))
12 |
13 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py", line 7
14 | # a.append(10 * 230 * i)
15 | # File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_with_trace.py", line 6
16 | # x = waste_memory.run()
--------------------------------------------------------------------------------