├── 01_understanding ├── .notempty ├── check_prime.py └── reducing_operations.py ├── 02_profiling ├── .notempty ├── julia1.py ├── julia1_decorator.py ├── julia1_lineprofiler.py ├── julia1_lineprofiler2.py ├── julia1_lineprofiler3.py ├── julia1_memoryprofiler.py ├── julia1_nopil.py └── utility.py ├── 03_lists_tuples ├── .notempty ├── binary_search.py ├── binary_vs_linear_search.py ├── bisect_example.py └── linear_search.py ├── 04_dict_set ├── .notempty ├── custom_hash_function.py ├── custom_vs_default_hash.py ├── dict_probing.py ├── namespace.py ├── timing_hash_function.py └── unique_lookup.py ├── 05_iterators ├── .notempty ├── fibonacci.py └── lazy_data_analysis.py ├── 06_matrix ├── .notempty ├── Makefile ├── diffusion_2d │ ├── Makefile │ ├── README.md │ ├── _benchmark.py │ ├── diffusion_numpy.py │ ├── diffusion_numpy_memory.py │ ├── diffusion_numpy_memory2.py │ ├── diffusion_numpy_memory2_numexpr.py │ ├── diffusion_numpy_memory2_numexpr_single.py │ ├── diffusion_numpy_naive.py │ ├── diffusion_python.py │ ├── diffusion_python_memory.py │ ├── diffusion_scipy.py │ ├── kernprof │ │ ├── diffusion_numpy.kernprof │ │ ├── diffusion_numpy_memory.kernprof │ │ ├── diffusion_numpy_memory2.kernprof │ │ ├── diffusion_numpy_memory2_numexpr.kernprof │ │ ├── diffusion_numpy_memory2_numexpr_single.kernprof │ │ ├── diffusion_numpy_naive.kernprof │ │ ├── diffusion_python.kernprof │ │ ├── diffusion_python_memory.kernprof │ │ └── diffusion_scipy.kernprof │ ├── memit │ │ ├── diffusion_numpy.memit │ │ ├── diffusion_numpy_memory.memit │ │ ├── diffusion_numpy_memory2.memit │ │ ├── diffusion_numpy_memory2_numexpr.memit │ │ ├── diffusion_numpy_memory2_numexpr_single.memit │ │ ├── diffusion_numpy_naive.memit │ │ ├── diffusion_python.memit │ │ ├── diffusion_python_memory.memit │ │ └── diffusion_scipy.memit │ ├── perf │ │ ├── diffusion_numpy.novec.perf │ │ ├── diffusion_numpy.perf │ │ ├── diffusion_numpy_memory.perf │ │ ├── diffusion_numpy_memory2.perf │ │ ├── diffusion_numpy_memory2_numexpr.perf │ │ ├── diffusion_numpy_memory2_numexpr_single.perf │ │ ├── diffusion_numpy_naive.perf │ │ ├── diffusion_python.perf │ │ ├── diffusion_python_memory.perf │ │ └── diffusion_scipy.perf │ └── time │ │ ├── diffusion_numpy.time │ │ ├── diffusion_numpy_memory.time │ │ ├── diffusion_numpy_memory2.time │ │ ├── diffusion_numpy_memory2_numexpr.time │ │ ├── diffusion_numpy_memory2_numexpr_single.time │ │ ├── diffusion_numpy_naive.time │ │ ├── diffusion_python.time │ │ ├── diffusion_python_memory.time │ │ └── diffusion_scipy.time ├── norm │ ├── Makefile │ ├── norm_array.memit │ ├── norm_array.py │ ├── norm_numpy.py │ ├── norm_numpy_dot.py │ ├── norm_python.py │ ├── norm_python_comprehension.py │ ├── perf │ │ ├── norm_array.perf │ │ ├── norm_numpy.perf │ │ ├── norm_numpy_dot.perf │ │ ├── norm_python.perf │ │ └── norm_python_comprehension.perf │ └── time │ │ ├── norm_array.time │ │ ├── norm_numpy.time │ │ └── norm_python.time └── pandas │ ├── compare_sklearn_lstsq_timing.py │ ├── generate_data.py │ ├── plot_min_max_slopes.py │ ├── sklearn_line_profiler.py │ ├── str_operation.py │ ├── time_iteration_methods.py │ └── utility.py ├── 07_compiling ├── .gitignore ├── .notempty ├── Makefile ├── cffi │ ├── diffusion_2d_cffi.py │ └── diffusion_2d_cffi_inline.py ├── cpython_module │ ├── .gitignore │ ├── cdiffusion │ │ ├── diffusion.h │ │ └── python_interface.c │ ├── diffusion.py │ └── setup.py ├── ctypes │ └── diffusion_ctypes.py ├── diffusion.c ├── f2py │ ├── .gitignore │ ├── Makefile │ ├── diffusion.f90 │ └── diffusion_fortran.py ├── julia │ ├── cython │ │ ├── cpython │ │ │ ├── cythonfn.pyx │ │ │ ├── cythonfn1.pyx │ │ │ ├── cythonfn2.pyx │ │ │ ├── cythonfn3.pyx │ │ │ ├── cythonfn4.pyx │ │ │ ├── cythonfn5.pyx │ │ │ ├── julia1.py │ │ │ └── setup.py │ │ ├── cpython_pyximport │ │ │ ├── cythonfn.pyx │ │ │ └── julia1.py │ │ └── nparray_memoryview │ │ │ ├── cythonfn.pyx │ │ │ ├── cythonfn1.pyx │ │ │ ├── cythonfn2.pyx │ │ │ ├── julia1.py │ │ │ ├── julia1_np_nocython.py │ │ │ ├── parallel │ │ │ ├── cythonfn.pyx │ │ │ ├── cythonfn1.pyx │ │ │ ├── cythonfn2.pyx │ │ │ ├── cythonfn3.pyx │ │ │ ├── julia1.py │ │ │ └── setup.py │ │ │ └── setup.py │ ├── julia1_nopil.py │ ├── julia1_nopil_expanded_math_pypy.py │ ├── julia1_numba.py │ └── julia1_numba_expandedmath_inspection.py └── pytorch │ ├── compare.py │ ├── diffusion_numpy.py │ ├── diffusion_pytorch.py │ ├── random_access.py │ └── requirements.txt ├── 08_concurrency ├── .notempty ├── cralwer │ ├── asyncio │ │ └── crawler.py │ ├── benchmark.sh │ ├── gevent │ │ └── crawler.py │ ├── images │ │ ├── asyncio.png │ │ ├── gevent.png │ │ ├── grequests.png │ │ ├── parallel_requests.png │ │ ├── serial.png │ │ └── tornado.png │ ├── metric_data.json │ ├── parallel_requests.json │ ├── parallel_requests.py │ ├── serial │ │ └── crawler.py │ ├── server.py │ ├── tornado │ │ └── crawler.py │ ├── tornado_callback │ │ └── crawler.py │ └── visualize.py ├── requirements.txt └── workload │ ├── images │ ├── async_callgraph.png │ ├── workload_async_batches_no-IO.png │ ├── workload_async_batches_no-IO_serial.png │ ├── workload_async_no-IO.png │ ├── workload_batches_no-IO.png │ ├── workload_file-IO_no-IO.png │ └── workload_no-IO_serial.png │ ├── server.py │ ├── workload.py │ └── workloads.json ├── 09_multiprocessing ├── .notempty ├── locking │ ├── ex1_lock.py │ ├── ex1_nolock1.py │ ├── ex1_nolock4.py │ ├── ex2_lock.py │ ├── ex2_lock_rawvalue.py │ ├── ex2_nolock.py │ └── ex3_redis.py ├── np_shared_example │ ├── np_shared.py │ └── rnd_demo │ │ ├── np_shared_rnd_parallel.py │ │ └── np_shared_rnd_serial.py ├── pi_estimation │ ├── pi_lists_parallel │ │ ├── pi_graph_speed_tests.py │ │ ├── pi_lists_parallel.py │ │ ├── pi_lists_parallel_joblib.py │ │ ├── pi_lists_parallel_joblib_cache.py │ │ └── pi_lists_series.py │ ├── pi_monte_carlo_diagram │ │ └── pi_plot_monte_carlo_example.py │ └── pi_processes_parallel │ │ ├── pi_graph_speed_tests.py │ │ ├── pi_numpy_parallel_worker.py │ │ ├── pi_numpy_serial.py │ │ └── pi_numpy_serial_blocks.py ├── prime_generation │ ├── plot_serial_vs_queue_times.py │ ├── primes.py │ ├── primes_pool.py │ ├── primes_queue.py │ ├── primes_queue_jobs_feeder_thread.py │ └── primes_queue_less_work.py └── prime_validation │ ├── create_range.py │ ├── plot_prime_validation_times.py │ ├── primes.py │ ├── primes_factor_test.py │ ├── primes_pool_per_number1.py │ ├── primes_pool_per_number2.py │ ├── primes_pool_per_number_manager.py │ ├── primes_pool_per_number_mmap.py │ ├── primes_pool_per_number_mmap2.py │ ├── primes_pool_per_number_mmap3.py │ ├── primes_pool_per_number_mmap4.py │ ├── primes_pool_per_number_redis.py │ ├── primes_pool_per_number_value.py │ ├── primes_pool_per_number_value_withinit.py │ └── primes_understand_comms_frequency.py ├── 10_clusters ├── .notempty ├── docker │ ├── Dockerfile │ ├── Makefile │ ├── diffusion_numpy_memory2.py │ └── requirements.txt ├── ipython_parallel │ └── pi_ipython_cluster.py └── nsq │ └── nsq_worker.py ├── 11_less_ram ├── .notempty ├── compressing_text │ ├── plot_example.py │ ├── text_example.py │ ├── text_example_clean_list.py │ ├── text_example_clean_list_wikipedia_gensim.py │ ├── text_example_dawg.py │ ├── text_example_dawg_load_only.py │ ├── text_example_list.py │ ├── text_example_list_bisect.py │ ├── text_example_set.py │ ├── text_example_trie.py │ └── text_example_trie_load_only.py ├── getsizeof │ └── asizeof.py ├── morris_counter_example │ ├── morris_counter.py │ └── show_morris_counter.py ├── numexpr_pandas │ └── make_cross_entropy_picture.py ├── probabilistic_datastructures │ ├── __init__.py │ ├── _benchmark.clean.pkl │ ├── _benchmark.pkl │ ├── _benchmark.py │ ├── bloomfilter.py │ ├── hyperloglog.py │ ├── kminvalues.py │ ├── ll.py │ ├── llregister.py │ ├── morriscounter.py │ ├── prob_ds_figure.py │ ├── requirements.txt │ ├── results │ │ └── unique.pkl │ ├── scalingbloomfilter.py │ ├── superll.py │ └── utils.py ├── sklearn_hashing_trick │ ├── feature_hashing_explanation2.py │ ├── feature_hashing_explanation_nb.ipynb │ └── feature_hashing_test1.py └── sparse │ └── benchmark_sparse.py ├── LICENSE.md ├── README.md ├── figures ├── array_allocation.graffle ├── bandwidth.csv ├── bandwidth.py ├── dict_probing.py ├── diffusion.py ├── diffusion_1d.py ├── hash-set-theory-example1.graffle ├── hash_function.graffle ├── hll_single_reg.py ├── kmv.py ├── list_overallocation.py ├── list_resize.graffle ├── matrix_method_speed.csv ├── matrix_method_speed.py ├── memory_types.py ├── memory_types_data.csv ├── norm_squared.py ├── processor.csv ├── processor_clock.py └── serial_vs_concurrent.graffle └── fix_cpu_modes.sh /01_understanding/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/01_understanding/.notempty -------------------------------------------------------------------------------- /01_understanding/check_prime.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def check_prime(number): 5 | sqrt_number = math.sqrt(number) 6 | for i in range(2, int(sqrt_number) + 1): 7 | if (number / i).is_integer(): 8 | return False 9 | return True 10 | 11 | 12 | print(f"check_prime(10,000,000) = {check_prime(10_000_000)}") 13 | # check_prime(10,000,000) = False 14 | print(f"check_prime(10,000,019) = {check_prime(10_000_019)}") 15 | # check_prime(10,000,019) = True 16 | -------------------------------------------------------------------------------- /01_understanding/reducing_operations.py: -------------------------------------------------------------------------------- 1 | import timeit 2 | 3 | 4 | def search_fast(haystack, needle): 5 | for item in haystack: 6 | if item == needle: 7 | return True 8 | return False 9 | 10 | 11 | def search_slow(haystack, needle): 12 | return_value = False 13 | for item in haystack: 14 | if item == needle: 15 | return_value = True 16 | return return_value 17 | 18 | 19 | def search_unknown1(haystack, needle): 20 | return any((item == needle for item in haystack)) 21 | 22 | 23 | def search_unknown2(haystack, needle): 24 | return any([item == needle for item in haystack]) 25 | 26 | 27 | if __name__ == "__main__": 28 | iterations = 10000 29 | haystack = list(range(1000)) 30 | setup = "from __main__ import (haystack, needle, search_fast, search_slow)" 31 | 32 | needle = 5 33 | print( 34 | f"Testing search speed with {len(haystack)} items and needle close to the head of the list" 35 | ) 36 | 37 | t = timeit.timeit( 38 | stmt="search_fast(haystack, needle)", setup=setup, number=iterations 39 | ) 40 | print(f"search_fast time: {t/iterations:.5e}") 41 | 42 | t = timeit.timeit( 43 | stmt="search_slow(haystack, needle)", setup=setup, number=iterations 44 | ) 45 | print(f"search_slow time: {t/iterations:.5e}") 46 | 47 | needle = len(haystack) - 10 48 | print( 49 | f"Testing search speed with {len(haystack)} items and needle close to the tail of the list" 50 | ) 51 | 52 | t = timeit.timeit( 53 | stmt="search_fast(haystack, needle)", setup=setup, number=iterations 54 | ) 55 | print(f"search_fast time: {t/iterations:.5e}") 56 | 57 | t = timeit.timeit( 58 | stmt="search_slow(haystack, needle)", setup=setup, number=iterations 59 | ) 60 | print(f"search_slow time: {t/iterations:.5e}") 61 | -------------------------------------------------------------------------------- /02_profiling/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/02_profiling/.notempty -------------------------------------------------------------------------------- /02_profiling/utility.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def test_some_fn(): 4 | """Check basic behaviours for our function""" 5 | assert some_fn(2) == 4 6 | assert some_fn(1) == 1 7 | assert some_fn(-1) == 1 8 | 9 | 10 | # check for line_profiler or memory_profiler in the local scope, both 11 | # are injected by their respective tools or they're absent 12 | # if these tools aren't being used (in which case we need to substite 13 | # a dummy @profile decorator) 14 | if 'line_profiler' not in dir() and 'profile' not in dir(): 15 | def profile(func): 16 | def inner(*args, **kwargs): 17 | return func(*args, **kwargs) 18 | return inner 19 | 20 | @profile 21 | def some_fn(useful_input): 22 | """An expensive function that we wish to both test and profile""" 23 | # artificial 'we're doing something clever and expensive' delay 24 | time.sleep(1) 25 | return useful_input ** 2 26 | 27 | 28 | if __name__ == "__main__": 29 | print(f"Example call `some_fn(2)` == {some_fn(2)}") 30 | 31 | 32 | -------------------------------------------------------------------------------- /03_lists_tuples/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/03_lists_tuples/.notempty -------------------------------------------------------------------------------- /03_lists_tuples/binary_search.py: -------------------------------------------------------------------------------- 1 | import timeit 2 | 3 | 4 | def binary_search(needle, haystack): 5 | # imin and imax store the bounds of the haystack that we are currently 6 | # considering. This starts as the bounds of the haystack and slowly 7 | # converges to surround the needle. 8 | imin, imax = 0, len(haystack) 9 | while True: 10 | if imin >= imax: 11 | return -1 12 | midpoint = (imin + imax) // 2 13 | if haystack[midpoint] > needle: 14 | imax = midpoint 15 | elif haystack[midpoint] < needle: 16 | imin = midpoint + 1 17 | else: 18 | return midpoint 19 | 20 | 21 | if __name__ == "__main__": 22 | setup = "from __main__ import (binary_search, haystack, needle)" 23 | iterations = 10000 24 | 25 | for haystack_size in (10000, 100000, 1000000): 26 | haystack = range(haystack_size) 27 | for needle in (1, 6000, 9000, 1000000): 28 | index = binary_search(needle, haystack) 29 | t = timeit.timeit( 30 | stmt="binary_search(needle, haystack)", setup=setup, number=iterations 31 | ) 32 | print( 33 | f"Value {needle: <8} found in haystack of " 34 | f"size {len(haystack): <8} at index " 35 | f"{index: <8} in {t/iterations:.5e} seconds" 36 | ) 37 | -------------------------------------------------------------------------------- /03_lists_tuples/binary_vs_linear_search.py: -------------------------------------------------------------------------------- 1 | import timeit 2 | 3 | from binary_search import binary_search 4 | from linear_search import linear_search 5 | 6 | 7 | def time_and_log(function, needle, haystack): 8 | index = function(needle, haystack) 9 | t = timeit.timeit( 10 | stmt=f"{function.__name__}(needle, haystack)", setup=setup, number=iterations 11 | ) 12 | print( 13 | f"[{function.__name__}] Value {needle: <8} found in haystack of " 14 | f"size {len(haystack): <8} at index " 15 | f"{index: <8} in {t/iterations:.5e} seconds" 16 | ) 17 | 18 | 19 | if __name__ == "__main__": 20 | setup = "from __main__ import " "(binary_search, linear_search, haystack, needle)" 21 | iterations = 1000 22 | 23 | for haystack_size in (10000, 100000, 1000000): 24 | haystack = range(haystack_size) 25 | for needle in (1, 6000, 9000, 1000000): 26 | time_and_log(linear_search, needle, haystack) 27 | time_and_log(binary_search, needle, haystack) 28 | -------------------------------------------------------------------------------- /03_lists_tuples/bisect_example.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import random 3 | 4 | 5 | def find_closest(haystack, needle): 6 | # bisect.bisect_left will return the first value in the haystack 7 | # that is greater than the needle 8 | i = bisect.bisect_left(haystack, needle) 9 | if i == len(haystack): 10 | return i - 1 11 | elif haystack[i] == needle: 12 | return i 13 | elif i > 0: 14 | j = i - 1 15 | # since we know the value is larger than needle (and vice versa for the 16 | # value at j), we don't need to use absolute values here 17 | if haystack[i] - needle > needle - haystack[j]: 18 | return j 19 | return i 20 | 21 | 22 | if __name__ == "__main__": 23 | important_numbers = [] 24 | for i in range(10): 25 | new_number = random.randint(0, 1000) 26 | bisect.insort(important_numbers, new_number) 27 | 28 | # important_numbers will already be in order because we inserted new elements 29 | # with bisect.insort 30 | print(important_numbers) 31 | # > [14, 265, 496, 661, 683, 734, 881, 892, 973, 992] 32 | 33 | closest_index = find_closest(important_numbers, -250) 34 | print(f"Closest value to -250: {important_numbers[closest_index]}") 35 | # > Closest value to -250: 14 36 | 37 | closest_index = find_closest(important_numbers, 500) 38 | print(f"Closest value to 500: {important_numbers[closest_index]}") 39 | # > Closest value to 500: 496 40 | 41 | closest_index = find_closest(important_numbers, 1100) 42 | print(f"Closest value to 1100: {important_numbers[closest_index]}") 43 | # > Closest value to 1100: 992 44 | -------------------------------------------------------------------------------- /03_lists_tuples/linear_search.py: -------------------------------------------------------------------------------- 1 | import timeit 2 | 3 | 4 | def linear_search(needle, array): 5 | for i, item in enumerate(array): 6 | if item == needle: 7 | return i 8 | return -1 9 | 10 | 11 | if __name__ == "__main__": 12 | setup = "from __main__ import (linear_search, haystack, needle)" 13 | iterations = 1000 14 | 15 | for haystack_size in (10000, 100000, 1000000): 16 | haystack = range(haystack_size) 17 | for needle in (1, 6000, 9000, 1000000): 18 | index = linear_search(needle, haystack) 19 | t = timeit.timeit( 20 | stmt="linear_search(needle, haystack)", setup=setup, number=iterations 21 | ) 22 | print( 23 | f"Value {needle: <8} found in haystack of " 24 | f"size {len(haystack): <8} at index " 25 | f"{index: <8} in {t/iterations:.5e} seconds" 26 | ) 27 | -------------------------------------------------------------------------------- /04_dict_set/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/04_dict_set/.notempty -------------------------------------------------------------------------------- /04_dict_set/custom_hash_function.py: -------------------------------------------------------------------------------- 1 | class City(str): 2 | def __hash__(self): 3 | return ord(self[0]) 4 | 5 | 6 | if __name__ == "__main__": 7 | print( 8 | "Adding Rome, San Francisco, New York and Barcelona to a set. New York and Barcenlona will collide!" 9 | ) 10 | # We create a dictionary where we assign arbitrary values to cities 11 | data = { 12 | City("Rome"): "Italy", 13 | City("San Francisco"): "USA", 14 | City("New York"): "USA", 15 | City("Barcelona"): "Spain", 16 | } 17 | -------------------------------------------------------------------------------- /04_dict_set/custom_vs_default_hash.py: -------------------------------------------------------------------------------- 1 | class Point(object): 2 | def __init__(self, x, y): 3 | self.x, self.y = x, y 4 | 5 | 6 | class PointHash(object): 7 | def __init__(self, x, y): 8 | self.x, self.y = x, y 9 | 10 | def __hash__(self): 11 | return hash((self.x, self.y)) 12 | 13 | def __eq__(self, other): 14 | return self.x == other.x and self.y == other.y 15 | 16 | 17 | if __name__ == "__main__": 18 | print("Test with default hash function") 19 | p1 = Point(1, 1) 20 | p2 = Point(1, 1) 21 | points = set([p1, p2]) 22 | print("Contents of set([p1, p2]): ", points) 23 | print("Point(1, 1) in set([p1, p2]) = ", (Point(1, 1) in points)) 24 | 25 | print("Test with custom hash function") 26 | p1 = PointHash(1, 1) 27 | p2 = PointHash(1, 1) 28 | points = set([p1, p2]) 29 | print("Contents of set([p1, p2]): ", points) 30 | print("Point(1, 1) in set([p1, p2]) = ", (PointHash(1, 1) in points)) 31 | -------------------------------------------------------------------------------- /04_dict_set/dict_probing.py: -------------------------------------------------------------------------------- 1 | from itertools import islice 2 | 3 | 4 | def index_sequence(key, mask=0b111, PERTURB_SHIFT=5): 5 | perturb = hash(key) 6 | i = perturb & mask 7 | yield i 8 | while True: 9 | perturb >>= PERTURB_SHIFT 10 | i = (i * 5 + perturb + 1) & mask 11 | yield i 12 | 13 | 14 | class ForceHash(object): 15 | def __init__(self, force_hash): 16 | self.force_hash = force_hash 17 | 18 | def __hash__(self): 19 | return self.force_hash 20 | 21 | def __repr__(self): 22 | return f"" 23 | 24 | 25 | def sample_probe(force_hash, num_samples=10): 26 | probe_values = index_sequence(force_hash) 27 | indexes = islice(probe_values, num_samples) 28 | print(f"First {num_samples} samples for hash {force_hash}: {list(indexes)}") 29 | 30 | 31 | if __name__ == "__main__": 32 | sample_probe(ForceHash(0b00000111)) 33 | sample_probe(ForceHash(0b11100111)) 34 | sample_probe(ForceHash(0b01110111)) 35 | sample_probe(ForceHash(0b01110001)) 36 | sample_probe(ForceHash(0b01110000)) 37 | -------------------------------------------------------------------------------- /04_dict_set/namespace.py: -------------------------------------------------------------------------------- 1 | import math 2 | from math import sin 3 | 4 | 5 | def test1(x): 6 | """ 7 | >>> %timeit test1(123_456) 8 | 162 µs ± 3.82 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) 9 | """ 10 | res = 1 11 | for _ in range(1000): 12 | res += math.sin(x) 13 | return res 14 | 15 | 16 | def test2(x): 17 | """ 18 | >>> %timeit test2(123_456) 19 | 124 µs ± 6.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) 20 | """ 21 | res = 1 22 | for _ in range(1000): 23 | res += sin(x) 24 | return res 25 | 26 | 27 | def test3(x, sin=math.sin): 28 | """ 29 | >>> %timeit test3(123_456) 30 | 105 µs ± 3.35 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) 31 | """ 32 | res = 1 33 | for _ in range(1000): 34 | res += sin(x) 35 | return res 36 | -------------------------------------------------------------------------------- /04_dict_set/timing_hash_function.py: -------------------------------------------------------------------------------- 1 | import string 2 | import timeit 3 | 4 | 5 | class BadHash(str): 6 | def __hash__(self): 7 | return 42 8 | 9 | 10 | class GoodHash(str): 11 | def __hash__(self): 12 | """ 13 | This is a slightly optimized version of twoletter_hash 14 | """ 15 | return ord(self[1]) + 26 * ord(self[0]) - 2619 16 | 17 | 18 | if __name__ == "__main__": 19 | baddict = set() 20 | gooddict = set() 21 | for i in string.ascii_lowercase: 22 | for j in string.ascii_lowercase: 23 | key = i + j 24 | baddict.add(BadHash(key)) 25 | gooddict.add(GoodHash(key)) 26 | 27 | badtime = timeit.repeat( 28 | "key in baddict", 29 | setup="from __main__ import baddict, BadHash; key = BadHash('zz')", 30 | repeat=3, 31 | number=100_000, 32 | ) 33 | goodtime = timeit.repeat( 34 | "key in gooddict", 35 | setup="from __main__ import gooddict, GoodHash; key = GoodHash('zz')", 36 | repeat=3, 37 | number=100_000, 38 | ) 39 | 40 | print(f"Min lookup time for baddict: {min(badtime)}") 41 | print(f"Min lookup time for gooddict: {min(goodtime)}") 42 | 43 | # Results: 44 | # Min lookup time for baddict: 17.719061855008476 45 | # Min lookup time for gooddict: 0.42408075400453527 46 | -------------------------------------------------------------------------------- /04_dict_set/unique_lookup.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | import timeit 4 | 5 | 6 | def list_unique_names(phonebook): 7 | unique_names = [] 8 | for name, phonenumber in phonebook: 9 | first_name, last_name = name.split(" ", 1) 10 | for unique in unique_names: 11 | if unique == first_name: 12 | break 13 | else: 14 | unique_names.append(first_name) 15 | return len(unique_names) 16 | 17 | 18 | def set_unique_names(phonebook): 19 | unique_names = set() 20 | for name, phonenumber in phonebook: 21 | first_name, last_name = name.split(" ", 1) 22 | unique_names.add(first_name) 23 | return len(unique_names) 24 | 25 | 26 | def random_name(): 27 | first_name = "".join(random.sample(string.ascii_letters, 8)) 28 | last_name = "".join(random.sample(string.ascii_letters, 8)) 29 | return "{} {}".format(first_name, last_name) 30 | 31 | 32 | if __name__ == "__main__": 33 | phonebook = [("John Doe", "555-555-5555"), ("Albert Einstein", "212-555-5555")] 34 | 35 | print("Number of unique names from set method:", set_unique_names(phonebook)) 36 | print("Number of unique names from list method:", list_unique_names(phonebook)) 37 | 38 | setup = ( 39 | "from __main__ import (large_phonebook, set_unique_names, list_unique_names)" 40 | ) 41 | iterations = 50 42 | large_phonebook = [(random_name(), "555-555-5555") for i in range(1000)] 43 | 44 | t = timeit.timeit( 45 | stmt="list_unique_names(large_phonebook)", setup=setup, number=iterations 46 | ) 47 | print( 48 | f"Finding unique names in a phonebook of length {len(large_phonebook)} " 49 | f"using lists took: {t / iterations:2e} seconds" 50 | ) 51 | 52 | t = timeit.timeit( 53 | stmt="set_unique_names(large_phonebook)", setup=setup, number=iterations 54 | ) 55 | print( 56 | f"Finding unique names in a phonebook of length {len(large_phonebook)} " 57 | f"using sets took: {t / iterations:2e} seconds" 58 | ) 59 | -------------------------------------------------------------------------------- /05_iterators/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/05_iterators/.notempty -------------------------------------------------------------------------------- /05_iterators/fibonacci.py: -------------------------------------------------------------------------------- 1 | import timeit 2 | 3 | 4 | def fibonacci_list(num_items): 5 | numbers = [] 6 | a, b = 0, 1 7 | while len(numbers) < num_items: 8 | numbers.append(a) 9 | a, b = b, a + b 10 | return numbers 11 | 12 | 13 | def fibonacci_gen(num_items): 14 | a, b = 0, 1 15 | while num_items: 16 | yield a 17 | a, b = b, a + b 18 | num_items -= 1 19 | 20 | 21 | def test_fibonacci(func, N): 22 | for i in func(N): 23 | pass 24 | 25 | 26 | if __name__ == "__main__": 27 | setup = "from __main__ import " "(test_fibonacci, fibonacci_gen, fibonacci_list, N)" 28 | iterations = 1000 29 | 30 | for N in (2, 100, 1_000, 100_00): 31 | t = timeit.timeit( 32 | stmt=f"test_fibonacci(fibonacci_list, N)", setup=setup, number=iterations 33 | ) 34 | print( 35 | f"fibonacci_list took {t / iterations:.5e}s to calculate {N} fibonacci numbers" 36 | ) 37 | 38 | t = timeit.timeit( 39 | stmt=f"test_fibonacci(fibonacci_gen, N)", setup=setup, number=iterations 40 | ) 41 | print( 42 | f"fibonacci_gen took {t / iterations:.5e}s to calculate {N} fibonacci numbers" 43 | ) 44 | -------------------------------------------------------------------------------- /05_iterators/lazy_data_analysis.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from itertools import count, filterfalse, groupby, islice 3 | from random import normalvariate, randint 4 | 5 | from scipy.stats import normaltest 6 | 7 | 8 | def read_data(filename): 9 | with open(filename) as fd: 10 | for line in fd: 11 | data = line.strip().split(",") 12 | timestamp, value = map(int, data) 13 | yield datetime.fromtimestamp(timestamp), value 14 | 15 | 16 | def read_fake_data(filename): 17 | for timestamp in count(): 18 | # We insert an anomalous data point approximately once a week 19 | if randint(0, 7 * 60 * 60 * 24 - 1) == 1: 20 | value = normalvariate(0, 1) 21 | else: 22 | value = 100 23 | yield datetime.fromtimestamp(timestamp), value 24 | 25 | 26 | def groupby_day(iterable): 27 | key = lambda row: row[0].day 28 | for day, data_group in groupby(iterable, key): 29 | yield list(data_group) 30 | 31 | 32 | def is_normal(data, threshold=1e-3): 33 | _, values = zip(*data) 34 | k2, p_value = normaltest(values) 35 | if p_value < threshold: 36 | return False 37 | return True 38 | 39 | 40 | def filter_anomalous_groups(data): 41 | yield from filterfalse(is_normal, data) 42 | 43 | 44 | def filter_anomalous_data(data): 45 | data_group = groupby_day(data) 46 | yield from filter_anomalous_groups(data_group) 47 | 48 | 49 | if __name__ == "__main__": 50 | data = read_fake_data("fake_filename") 51 | anomaly_generator = filter_anomalous_data(data) 52 | first_five_anomalies = islice(anomaly_generator, 5) 53 | 54 | for data_anomaly in first_five_anomalies: 55 | start_date = data_anomaly[0][0] 56 | end_date = data_anomaly[-1][0] 57 | print(f"Anomaly from {start_date} - {end_date}") 58 | -------------------------------------------------------------------------------- /06_matrix/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/.notempty -------------------------------------------------------------------------------- /06_matrix/Makefile: -------------------------------------------------------------------------------- 1 | SCRIPTS := $(sort $(filter-out _%.py, $(wildcard *.py))) 2 | PERF := $(patsubst %.py, perf/%.perf, $(SCRIPTS)) 3 | TIME := $(patsubst %.py, time/%.time, $(SCRIPTS)) 4 | MEMIT := $(patsubst %.py, memit/%.memit, $(SCRIPTS)) 5 | KERNPROF := $(patsubst %.py, kernprof/%.kernprof, $(SCRIPTS)) 6 | 7 | all: $(PERF) $(TIME) $(MEMIT) $(KERNPROF) 8 | 9 | perf: $(PERF) 10 | 11 | time: $(TIME) 12 | 13 | memit: $(MEMIT) 14 | 15 | kernprof: $(KERNPROF) 16 | 17 | %.kernprof: %.py 18 | @echo "lineprof-izing $<" 19 | mkdir kernprof 20 | kernprof -l -v $< > $@ 2>&1 21 | 22 | %.memit: %.py 23 | @echo "%memit-izing $<" 24 | mkdir memit 25 | python -m memory_profiler $< > $@ 2>&1 26 | 27 | %.time: %.py 28 | @echo "Timing $<" 29 | mkdir time 30 | time -v python $< > $@ 2>&1 31 | 32 | %.perf: %.py 33 | @echo "Perfiling $<" 34 | mkdir perf 35 | @perf stat -e cycles,instructions,cache-references,cache-misses,branches,branch-misses,task-clock,faults,page-fault,minor-faults,cs,migrations python $< 2>&1 | sed 's/(\([0-9.]*%\))//g' > $@ 36 | @grep 'seconds time' $@ | column -t | cut -f1 -d' ' 37 | @grep 'cache-misses' $@ | column -t 38 | @echo '' 39 | 40 | clean: 41 | rm -rf $(PERF) $(TIME) $(MEMIT) $(KERNPROF) 42 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/Makefile: -------------------------------------------------------------------------------- 1 | include ../../Makefile 2 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | $ python benchmark.py 3 | Grid size: (256, 256) 4 | Pure Python: 1.22s (1.218498e-01s per iteration) 5 | python+memory: 1.19s (1.186913e-01s per iteration)[1.03x speedup] 6 | numpy: 0.02s (1.638103e-03s per iteration)[74.38x speedup] 7 | numpy+memory: 0.01s (1.490402e-03s per iteration)[81.76x speedup] 8 | numpy+memory2: 0.01s (7.136822e-04s per iteration)[170.73x speedup] 9 | numpy+memory+scipy: 0.02s (1.522303e-03s per iteration)[80.04x speedup] 10 | 11 | Grid size: (512, 512) 12 | Pure Python: 4.89s (4.889611e-01s per iteration) 13 | python+memory: 4.64s (4.643779e-01s per iteration)[1.05x speedup] 14 | numpy: 0.15s (1.469820e-02s per iteration)[33.27x speedup] 15 | numpy+memory: 0.11s (1.104362e-02s per iteration)[44.28x speedup] 16 | numpy+memory2: 0.04s (3.523612e-03s per iteration)[138.77x speedup] 17 | numpy+memory+scipy: 0.08s (8.366203e-03s per iteration)[58.44x speedup] 18 | 19 | Grid size: (1024, 1024) 20 | Pure Python: 20.76s (2.075953e+00s per iteration) 21 | python+memory: 20.60s (2.059773e+00s per iteration)[1.01x speedup] 22 | numpy: 0.55s (5.520298e-02s per iteration)[37.61x speedup] 23 | numpy+memory: 0.40s (4.010251e-02s per iteration)[51.77x speedup] 24 | numpy+memory2: 0.17s (1.718290e-02s per iteration)[120.82x speedup] 25 | numpy+memory+scipy: 0.52s (5.219860e-02s per iteration)[39.77x speedup] 26 | ``` 27 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_numpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import timeit 4 | 5 | from numpy import roll, zeros 6 | 7 | try: 8 | profile 9 | except NameError: 10 | profile = lambda x: x 11 | 12 | grid_shape = (2048, 2048) 13 | 14 | 15 | def laplacian(grid): 16 | return ( 17 | roll(grid, +1, 0) 18 | + roll(grid, -1, 0) 19 | + roll(grid, +1, 1) 20 | + roll(grid, -1, 1) 21 | - 4 * grid 22 | ) 23 | 24 | 25 | @profile 26 | def evolve(grid, dt, D=1): 27 | return grid + dt * D * laplacian(grid) 28 | 29 | 30 | def run_experiment(num_iterations): 31 | grid = zeros(grid_shape) 32 | 33 | block_low = int(grid_shape[0] * 0.4) 34 | block_high = int(grid_shape[0] * 0.5) 35 | grid[block_low:block_high, block_low:block_high] = 0.005 36 | 37 | for i in range(num_iterations): 38 | grid = evolve(grid, 0.1) 39 | return grid 40 | 41 | 42 | if __name__ == "__main__": 43 | n_iter = 100 44 | N, runtime = timeit.Timer( 45 | f"run_experiment({n_iter})", globals=globals() 46 | ).autorange() 47 | print(f"Runtime with grid {grid_shape}: {runtime / N:0.4f}s") 48 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_numpy_memory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | import numpy as np 6 | 7 | try: 8 | profile 9 | except NameError: 10 | profile = lambda x: x 11 | 12 | grid_shape = (640, 640) 13 | 14 | 15 | def laplacian(grid, out): 16 | np.copyto(out, grid) 17 | out *= -4 18 | out += np.roll(grid, +1, 0) 19 | out += np.roll(grid, -1, 0) 20 | out += np.roll(grid, +1, 1) 21 | out += np.roll(grid, -1, 1) 22 | 23 | 24 | @profile 25 | def evolve(grid, dt, out, D=1): 26 | laplacian(grid, out) 27 | out *= D * dt 28 | out += grid 29 | 30 | 31 | def run_experiment(num_iterations): 32 | scratch = np.zeros(grid_shape) 33 | grid = np.zeros(grid_shape) 34 | 35 | block_low = int(grid_shape[0] * 0.4) 36 | block_high = int(grid_shape[0] * 0.5) 37 | grid[block_low:block_high, block_low:block_high] = 0.005 38 | 39 | start = time.time() 40 | for i in range(num_iterations): 41 | evolve(grid, 0.1, scratch) 42 | grid, scratch = scratch, grid 43 | return time.time() - start 44 | 45 | 46 | if __name__ == "__main__": 47 | run_experiment(500) 48 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_numpy_memory2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | from numpy import add, copyto, multiply, zeros 6 | 7 | try: 8 | profile 9 | except NameError: 10 | profile = lambda x: x 11 | 12 | grid_shape = (640, 640) 13 | 14 | 15 | def roll_add(rollee, shift, axis, out): 16 | if shift == 1 and axis == 0: 17 | out[1:, :] += rollee[:-1, :] 18 | out[0, :] += rollee[-1, :] 19 | elif shift == -1 and axis == 0: 20 | out[:-1, :] += rollee[1:, :] 21 | out[-1, :] += rollee[0, :] 22 | elif shift == 1 and axis == 1: 23 | out[:, 1:] += rollee[:, :-1] 24 | out[:, 0] += rollee[:, -1] 25 | elif shift == -1 and axis == 1: 26 | out[:, :-1] += rollee[:, 1:] 27 | out[:, -1] += rollee[:, 0] 28 | 29 | 30 | def laplacian(grid, out): 31 | copyto(out, grid) 32 | multiply(out, -4.0, out) 33 | roll_add(grid, +1, 0, out) 34 | roll_add(grid, -1, 0, out) 35 | roll_add(grid, +1, 1, out) 36 | roll_add(grid, -1, 1, out) 37 | 38 | 39 | @profile 40 | def evolve(grid, dt, out, D=1): 41 | laplacian(grid, out) 42 | multiply(out, D * dt, out) 43 | add(out, grid, out) 44 | 45 | 46 | def run_experiment(num_iterations): 47 | scratch = zeros(grid_shape) 48 | grid = zeros(grid_shape) 49 | 50 | block_low = int(grid_shape[0] * 0.4) 51 | block_high = int(grid_shape[0] * 0.5) 52 | grid[block_low:block_high, block_low:block_high] = 0.005 53 | 54 | start = time.time() 55 | for i in range(num_iterations): 56 | evolve(grid, 0.1, scratch) 57 | grid, scratch = scratch, grid 58 | return time.time() - start 59 | 60 | 61 | if __name__ == "__main__": 62 | run_experiment(500) 63 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_numpy_memory2_numexpr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | import numexpr as ne 6 | from numpy import copyto, multiply, zeros 7 | 8 | try: 9 | profile 10 | except NameError: 11 | profile = lambda x: x 12 | 13 | grid_shape = (640, 640) 14 | 15 | 16 | def roll_add(rollee, shift, axis, out): 17 | if shift == 1 and axis == 0: 18 | out[1:, :] += rollee[:-1, :] 19 | out[0, :] += rollee[-1, :] 20 | elif shift == -1 and axis == 0: 21 | out[:-1, :] += rollee[1:, :] 22 | out[-1, :] += rollee[0, :] 23 | elif shift == 1 and axis == 1: 24 | out[:, 1:] += rollee[:, :-1] 25 | out[:, 0] += rollee[:, -1] 26 | elif shift == -1 and axis == 1: 27 | out[:, :-1] += rollee[:, 1:] 28 | out[:, -1] += rollee[:, 0] 29 | 30 | 31 | def laplacian(grid, out): 32 | copyto(out, grid) 33 | multiply(out, -4.0, out) 34 | roll_add(grid, +1, 0, out) 35 | roll_add(grid, -1, 0, out) 36 | roll_add(grid, +1, 1, out) 37 | roll_add(grid, -1, 1, out) 38 | 39 | 40 | @profile 41 | def evolve(grid, dt, out, D=1): 42 | laplacian(grid, out) 43 | ne.evaluate("out*D*dt+grid", out=out) 44 | 45 | 46 | def run_experiment(num_iterations): 47 | scratch = zeros(grid_shape) 48 | grid = zeros(grid_shape) 49 | 50 | block_low = int(grid_shape[0] * 0.4) 51 | block_high = int(grid_shape[0] * 0.5) 52 | grid[block_low:block_high, block_low:block_high] = 0.005 53 | 54 | start = time.time() 55 | for i in range(num_iterations): 56 | evolve(grid, 0.1, scratch) 57 | grid, scratch = scratch, grid 58 | return time.time() - start 59 | 60 | 61 | if __name__ == "__main__": 62 | run_experiment(500) 63 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_numpy_memory2_numexpr_single.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | from numexpr import evaluate, set_num_threads 6 | from numpy import copyto, multiply, zeros 7 | 8 | try: 9 | profile 10 | except NameError: 11 | profile = lambda x: x 12 | 13 | grid_shape = (640, 640) 14 | 15 | 16 | def roll_add(rollee, shift, axis, out): 17 | if shift == 1 and axis == 0: 18 | out[1:, :] += rollee[:-1, :] 19 | out[0, :] += rollee[-1, :] 20 | elif shift == -1 and axis == 0: 21 | out[:-1, :] += rollee[1:, :] 22 | out[-1, :] += rollee[0, :] 23 | elif shift == 1 and axis == 1: 24 | out[:, 1:] += rollee[:, :-1] 25 | out[:, 0] += rollee[:, -1] 26 | elif shift == -1 and axis == 1: 27 | out[:, :-1] += rollee[:, 1:] 28 | out[:, -1] += rollee[:, 0] 29 | 30 | 31 | def laplacian(grid, out): 32 | copyto(out, grid) 33 | multiply(out, -4.0, out) 34 | roll_add(grid, +1, 0, out) 35 | roll_add(grid, -1, 0, out) 36 | roll_add(grid, +1, 1, out) 37 | roll_add(grid, -1, 1, out) 38 | 39 | 40 | @profile 41 | def evolve(grid, dt, out, D=1): 42 | laplacian(grid, out) 43 | evaluate("out*D*dt+grid", out=out) 44 | 45 | 46 | def run_experiment(num_iterations): 47 | previous_threads = set_num_threads(1) 48 | 49 | scratch = zeros(grid_shape) 50 | grid = zeros(grid_shape) 51 | 52 | block_low = int(grid_shape[0] * 0.4) 53 | block_high = int(grid_shape[0] * 0.5) 54 | grid[block_low:block_high, block_low:block_high] = 0.005 55 | 56 | start = time.time() 57 | for i in range(num_iterations): 58 | evolve(grid, 0.1, scratch) 59 | grid, scratch = scratch, grid 60 | 61 | set_num_threads(previous_threads) 62 | return time.time() - start 63 | 64 | 65 | if __name__ == "__main__": 66 | run_experiment(500) 67 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_numpy_naive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | from numpy import roll, zeros 6 | 7 | try: 8 | profile 9 | except NameError: 10 | profile = lambda x: x 11 | 12 | grid_shape = (640, 640) 13 | 14 | 15 | def laplacian(grid): 16 | return ( 17 | roll(grid, +1, 0) 18 | + roll(grid, -1, 0) 19 | + roll(grid, +1, 1) 20 | + roll(grid, -1, 1) 21 | - 4 * grid 22 | ) 23 | 24 | 25 | @profile 26 | def evolve(grid, dt, D=1): 27 | return grid + dt * D * laplacian(grid) 28 | 29 | 30 | def run_experiment(num_iterations): 31 | grid = zeros(grid_shape) 32 | 33 | block_low = int(grid_shape[0] * 0.4) 34 | block_high = int(grid_shape[0] * 0.5) 35 | grid[block_low:block_high, block_low:block_high] = 0.005 36 | 37 | start = time.time() 38 | for i in range(num_iterations): 39 | grid = evolve(grid, 0.1) 40 | return time.time() - start 41 | 42 | 43 | if __name__ == "__main__": 44 | run_experiment(500) 45 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_python.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | try: 6 | profile 7 | except NameError: 8 | profile = lambda x: x 9 | 10 | grid_shape = (640, 640) 11 | 12 | 13 | @profile 14 | def evolve(grid, dt, D=1.0): 15 | xmax, ymax = grid_shape 16 | new_grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])] 17 | for i in range(xmax): 18 | for j in range(ymax): 19 | grid_xx = ( 20 | grid[(i + 1) % xmax][j] + grid[(i - 1) % xmax][j] - 2.0 * grid[i][j] 21 | ) 22 | grid_yy = ( 23 | grid[i][(j + 1) % ymax] + grid[i][(j - 1) % ymax] - 2.0 * grid[i][j] 24 | ) 25 | new_grid[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt 26 | return new_grid 27 | 28 | 29 | def run_experiment(num_iterations): 30 | # setting up initial conditions 31 | grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])] 32 | 33 | block_low = int(grid_shape[0] * 0.4) 34 | block_high = int(grid_shape[0] * 0.5) 35 | for i in range(block_low, block_high): 36 | for j in range(block_low, block_high): 37 | grid[i][j] = 0.005 38 | 39 | start = time.time() 40 | for i in range(num_iterations): 41 | grid = evolve(grid, 0.1) 42 | return time.time() - start 43 | 44 | 45 | if __name__ == "__main__": 46 | run_experiment(500) 47 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_python_memory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | try: 6 | profile 7 | except NameError: 8 | profile = lambda x: x 9 | 10 | grid_shape = (640, 640) 11 | 12 | 13 | @profile 14 | def evolve(grid, dt, out, D=1.0): 15 | xmax, ymax = grid_shape 16 | for i in range(xmax): 17 | for j in range(ymax): 18 | grid_xx = ( 19 | grid[(i + 1) % xmax][j] + grid[(i - 1) % xmax][j] - 2.0 * grid[i][j] 20 | ) 21 | grid_yy = ( 22 | grid[i][(j + 1) % ymax] + grid[i][(j - 1) % ymax] - 2.0 * grid[i][j] 23 | ) 24 | out[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt 25 | 26 | 27 | def run_experiment(num_iterations): 28 | # setting up initial conditions 29 | scratch = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])] 30 | grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])] 31 | 32 | block_low = int(grid_shape[0] * 0.4) 33 | block_high = int(grid_shape[0] * 0.5) 34 | for i in range(block_low, block_high): 35 | for j in range(block_low, block_high): 36 | grid[i][j] = 0.005 37 | 38 | start = time.time() 39 | for i in range(num_iterations): 40 | evolve(grid, 0.1, scratch) 41 | grid, scratch = scratch, grid 42 | return time.time() - start 43 | 44 | 45 | if __name__ == "__main__": 46 | run_experiment(500) 47 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/diffusion_scipy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | 5 | from numpy import add, multiply, zeros 6 | from scipy.ndimage.filters import laplace 7 | 8 | try: 9 | profile 10 | except NameError: 11 | profile = lambda x: x 12 | 13 | grid_shape = (640, 640) 14 | 15 | 16 | def laplacian(grid, out): 17 | laplace(grid, out, mode="wrap") 18 | 19 | 20 | @profile 21 | def evolve(grid, dt, out, D=1): 22 | laplacian(grid, out) 23 | multiply(out, D * dt, out) 24 | add(out, grid, grid) 25 | 26 | 27 | def run_experiment(num_iterations): 28 | scratch = zeros(grid_shape) 29 | grid = zeros(grid_shape) 30 | 31 | block_low = int(grid_shape[0] * 0.4) 32 | block_high = int(grid_shape[0] * 0.5) 33 | grid[block_low:block_high, block_low:block_high] = 0.005 34 | 35 | start = time.time() 36 | for i in range(num_iterations): 37 | evolve(grid, 0.1, scratch) 38 | return time.time() - start 39 | 40 | 41 | if __name__ == "__main__": 42 | run_experiment(500) 43 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_numpy.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_numpy.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 2.11653 s 5 | File: diffusion_numpy.py 6 | Function: evolve at line 16 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 16 @profile 11 | 17 def evolve(grid, dt, D=1): 12 | 18 500 2116531.0 4233.1 100.0 return grid + dt * D * laplacian(grid) 13 | 14 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_numpy_memory.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 1.58502 s 5 | File: diffusion_numpy_memory.py 6 | Function: evolve at line 21 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 21 @profile 11 | 22 def evolve(grid, dt, out, D=1): 12 | 23 500 1327910.0 2655.8 83.8 laplacian(grid, out) 13 | 24 500 100733.0 201.5 6.4 out *= D * dt 14 | 25 500 156377.0 312.8 9.9 out += grid 15 | 16 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory2.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_numpy_memory2.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 1.26046 s 5 | File: diffusion_numpy_memory2.py 6 | Function: evolve at line 35 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 35 @profile 11 | 36 def evolve(grid, dt, out, D=1): 12 | 37 500 1068913.0 2137.8 84.8 laplacian(grid, out) 13 | 38 500 83140.0 166.3 6.6 multiply(out, D*dt, out) 14 | 39 500 108404.0 216.8 8.6 add(out, grid, out) 15 | 16 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory2_numexpr.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_numpy_memory2_numexpr.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 1.33304 s 5 | File: diffusion_numpy_memory2_numexpr.py 6 | Function: evolve at line 36 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 36 @profile 11 | 37 def evolve(grid, dt, out, D=1): 12 | 38 500 1111160.0 2222.3 83.4 laplacian(grid, out) 13 | 39 500 221878.0 443.8 16.6 ne.evaluate("out*D*dt+grid", out=out) 14 | 15 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory2_numexpr_single.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_numpy_memory2_numexpr_single.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 1.68504 s 5 | File: diffusion_numpy_memory2_numexpr_single.py 6 | Function: evolve at line 36 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 36 @profile 11 | 37 def evolve(grid, dt, out, D=1): 12 | 38 500 1102515.0 2205.0 65.4 laplacian(grid, out) 13 | 39 500 582525.0 1165.0 34.6 evaluate("out*D*dt+grid", out=out) 14 | 15 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_numpy_naive.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_numpy_naive.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 2.07788 s 5 | File: diffusion_numpy_naive.py 6 | Function: evolve at line 16 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 16 @profile 11 | 17 def evolve(grid, dt, D=1): 12 | 18 500 2077882.0 4155.8 100.0 return grid + dt * D * laplacian(grid) 13 | 14 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_python.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_python.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 787.161 s 5 | File: diffusion_python.py 6 | Function: evolve at line 12 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 12 @profile 11 | 13 def evolve(grid, dt, D=1.0): 12 | 14 500 843.0 1.7 0.0 xmax, ymax = grid_shape 13 | 15 500 24764794.0 49529.6 3.1 new_grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])] 14 | 16 320500 208683.0 0.7 0.0 for i in range(xmax): 15 | 17 205120000 128928913.0 0.6 16.4 for j in range(ymax): 16 | 18 204800000 222422192.0 1.1 28.3 grid_xx = grid[(i+1)%xmax][j] + grid[(i-1)%xmax][j] - 2.0 * grid[i][j] 17 | 19 204800000 228660607.0 1.1 29.0 grid_yy = grid[i][(j+1)%ymax] + grid[i][(j-1)%ymax] - 2.0 * grid[i][j] 18 | 20 204800000 182174957.0 0.9 23.1 new_grid[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt 19 | 21 500 331.0 0.7 0.0 return new_grid 20 | 21 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_python_memory.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_python_memory.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 541.138 s 5 | File: diffusion_python_memory.py 6 | Function: evolve at line 12 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 12 @profile 11 | 13 def evolve(grid, dt, out, D=1.0): 12 | 14 500 503.0 1.0 0.0 xmax, ymax = grid_shape 13 | 15 320500 131498.0 0.4 0.0 for i in range(xmax): 14 | 16 205120000 81105090.0 0.4 15.0 for j in range(ymax): 15 | 17 204800000 166271837.0 0.8 30.7 grid_xx = grid[(i+1)%xmax][j] + grid[(i-1)%xmax][j] - 2.0 * grid[i][j] 16 | 18 204800000 169216352.0 0.8 31.3 grid_yy = grid[i][(j+1)%ymax] + grid[i][(j-1)%ymax] - 2.0 * grid[i][j] 17 | 19 204800000 124412452.0 0.6 23.0 out[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt 18 | 19 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/kernprof/diffusion_scipy.kernprof: -------------------------------------------------------------------------------- 1 | Wrote profile results to diffusion_scipy.py.lprof 2 | Timer unit: 1e-06 s 3 | 4 | Total time: 2.62417 s 5 | File: diffusion_scipy.py 6 | Function: evolve at line 17 7 | 8 | Line # Hits Time Per Hit % Time Line Contents 9 | ============================================================== 10 | 17 @profile 11 | 18 def evolve(grid, dt, out, D=1): 12 | 19 500 2384139.0 4768.3 90.9 laplacian(grid, out) 13 | 20 500 93736.0 187.5 3.6 multiply(out, D*dt, out) 14 | 21 500 146298.0 292.6 5.6 add(out, grid, grid) 15 | 16 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_numpy.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_numpy_memory.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_memory.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_numpy_memory2.memit: -------------------------------------------------------------------------------- 1 | /home/micha/.pyenv/versions/3.7.2/bin/python: No module named memory_profiler 2 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr_single.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr_single.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_numpy_naive.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_naive.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_python.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_python.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_python_memory.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_python_memory.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/memit/diffusion_scipy.memit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_scipy.memit -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_numpy.novec.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_numpy.py': 3 | 4 | 50,086,999,350 cycles # 2.888 GHz 5 | 53,611,608,977 instructions # 1.07 insn per cycle 6 | 1,131,742,674 cache-references # 65.266 M/sec 7 | 322,483,897 cache-misses # 28.494 % of all cache refs 8 | 4,001,923,035 branches # 230.785 M/sec 9 | 6,211,101 branch-misses # 0.16% of all branches 10 | 17340.464580 task-clock (msec) # 1.000 CPUs utilized 11 | 403,193 faults # 0.023 M/sec 12 | 403,193 minor-faults # 0.023 M/sec 13 | 74 cs # 0.004 K/sec 14 | 6 migrations # 0.000 K/sec 15 | 16 | 17.339656586 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_numpy.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_numpy.py': 3 | 4 | 8,432,416,866 cycles # 2.886 GHz 5 | 7,114,758,602 instructions # 0.84 insn per cycle 6 | 1,040,831,469 cache-references # 356.176 M/sec 7 | 216,490,683 cache-misses # 20.800 % of all cache refs 8 | 1,252,928,847 branches # 428.756 M/sec 9 | 8,174,531 branch-misses # 0.65% of all branches 10 | 2922.239426 task-clock (msec) # 1.285 CPUs utilized 11 | 403,282 faults # 0.138 M/sec 12 | 403,282 minor-faults # 0.138 M/sec 13 | 96 cs # 0.033 K/sec 14 | 5 migrations # 0.002 K/sec 15 | 16 | 2.274377105 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_numpy_memory.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_numpy_memory.py': 3 | 4 | 6,880,906,446 cycles # 2.886 GHz 5 | 5,848,134,537 instructions # 0.85 insn per cycle 6 | 1,077,550,720 cache-references # 452.000 M/sec 7 | 217,974,413 cache-misses # 20.229 % of all cache refs 8 | 1,028,769,315 branches # 431.538 M/sec 9 | 7,492,245 branch-misses # 0.73% of all branches 10 | 2383.962679 task-clock (msec) # 1.373 CPUs utilized 11 | 13,521 faults # 0.006 M/sec 12 | 13,521 minor-faults # 0.006 M/sec 13 | 100 cs # 0.042 K/sec 14 | 8 migrations # 0.003 K/sec 15 | 16 | 1.736322099 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_numpy_memory2.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_numpy_memory2.py': 3 | 4 | 5,971,464,515 cycles # 2.888 GHz 5 | 5,893,131,049 instructions # 0.99 insn per cycle 6 | 1,001,582,133 cache-references # 484.398 M/sec 7 | 30,840,612 cache-misses # 3.079 % of all cache refs 8 | 1,038,649,694 branches # 502.325 M/sec 9 | 7,562,009 branch-misses # 0.73% of all branches 10 | 2067.685884 task-clock (msec) # 1.456 CPUs utilized 11 | 11,981 faults # 0.006 M/sec 12 | 11,981 minor-faults # 0.006 M/sec 13 | 95 cs # 0.046 K/sec 14 | 3 migrations # 0.001 K/sec 15 | 16 | 1.419869071 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_numpy_memory2_numexpr.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_numpy_memory2_numexpr.py': 3 | 4 | 8,856,947,179 cycles # 2.872 GHz 5 | 9,354,357,453 instructions # 1.06 insn per cycle 6 | 1,077,518,384 cache-references # 349.423 M/sec 7 | 59,407,830 cache-misses # 5.513 % of all cache refs 8 | 1,018,525,317 branches # 330.292 M/sec 9 | 11,941,430 branch-misses # 1.17% of all branches 10 | 3083.709890 task-clock (msec) # 1.991 CPUs utilized 11 | 15,820 faults # 0.005 M/sec 12 | 15,820 minor-faults # 0.005 M/sec 13 | 8,671 cs # 0.003 M/sec 14 | 2,096 migrations # 0.680 K/sec 15 | 16 | 1.548924090 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_numpy_memory2_numexpr_single.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_numpy_memory2_numexpr_single.py': 3 | 4 | 7,123,637,362 cycles # 2.888 GHz 5 | 9,190,434,370 instructions # 1.29 insn per cycle 6 | 963,627,201 cache-references # 390.616 M/sec 7 | 34,391,313 cache-misses # 3.569 % of all cache refs 8 | 996,019,993 branches # 403.746 M/sec 9 | 9,228,406 branch-misses # 0.93% of all branches 10 | 2466.945415 task-clock (msec) # 1.357 CPUs utilized 11 | 15,799 faults # 0.006 M/sec 12 | 15,799 minor-faults # 0.006 M/sec 13 | 111 cs # 0.045 K/sec 14 | 10 migrations # 0.004 K/sec 15 | 16 | 1.818284437 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_numpy_naive.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_numpy_naive.py': 3 | 4 | 8,575,098,217 cycles # 2.886 GHz 5 | 7,103,809,843 instructions # 0.83 insn per cycle 6 | 1,078,489,079 cache-references # 362.982 M/sec 7 | 230,990,178 cache-misses # 21.418 % of all cache refs 8 | 1,252,191,334 branches # 421.444 M/sec 9 | 8,694,936 branch-misses # 0.69% of all branches 10 | 2971.194679 task-clock (msec) # 1.279 CPUs utilized 11 | 403,285 faults # 0.136 M/sec 12 | 403,285 minor-faults # 0.136 M/sec 13 | 94 cs # 0.032 K/sec 14 | 9 migrations # 0.003 K/sec 15 | 16 | 2.323691596 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_python.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_python.py': 3 | 4 | 444,281,397,352 cycles # 2.889 GHz 5 | 1,292,776,966,318 instructions # 2.91 insn per cycle 6 | 902,297,518 cache-references # 5.868 M/sec 7 | 337,237,800 cache-misses # 37.375 % of all cache refs 8 | 268,033,459,921 branches # 1743.051 M/sec 9 | 1,845,293,511 branch-misses # 0.69% of all branches 10 | 153772.569474 task-clock (msec) # 1.000 CPUs utilized 11 | 1,569,793 faults # 0.010 M/sec 12 | 1,569,793 minor-faults # 0.010 M/sec 13 | 294 cs # 0.002 K/sec 14 | 11 migrations # 0.000 K/sec 15 | 16 | 153.776786975 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_python_memory.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_python_memory.py': 3 | 4 | 415,864,974,126 cycles # 2.889 GHz 5 | 1,210,522,769,388 instructions # 2.91 insn per cycle 6 | 656,345,027 cache-references # 4.560 M/sec 7 | 349,562,390 cache-misses # 53.259 % of all cache refs 8 | 251,537,944,600 branches # 1747.583 M/sec 9 | 1,970,031,461 branch-misses # 0.78% of all branches 10 | 143934.730837 task-clock (msec) # 1.000 CPUs utilized 11 | 12,791 faults # 0.089 K/sec 12 | 12,791 minor-faults # 0.089 K/sec 13 | 117 cs # 0.001 K/sec 14 | 6 migrations # 0.000 K/sec 15 | 16 | 143.935522122 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/perf/diffusion_scipy.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python diffusion_scipy.py': 3 | 4 | 10,051,801,725 cycles # 2.886 GHz 5 | 16,536,981,020 instructions # 1.65 insn per cycle 6 | 1,554,557,564 cache-references # 446.405 M/sec 7 | 126,627,735 cache-misses # 8.146 % of all cache refs 8 | 2,673,416,633 branches # 767.696 M/sec 9 | 9,626,762 branch-misses # 0.36% of all branches 10 | 3482.391211 task-clock (msec) # 1.228 CPUs utilized 11 | 14,013 faults # 0.004 M/sec 12 | 14,013 minor-faults # 0.004 M/sec 13 | 95 cs # 0.027 K/sec 14 | 5 migrations # 0.001 K/sec 15 | 16 | 2.835263796 seconds time elapsed 17 | 18 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_numpy.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_numpy.py" 2 | User time (seconds): 2.02 3 | System time (seconds): 0.87 4 | Percent of CPU this job got: 128% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 0:02.25 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 39684 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 403309 14 | Voluntary context switches: 96 15 | Involuntary context switches: 26 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_numpy_memory.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_numpy_memory.py" 2 | User time (seconds): 1.95 3 | System time (seconds): 0.37 4 | Percent of CPU this job got: 138% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.67 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 39112 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 13595 14 | Voluntary context switches: 101 15 | Involuntary context switches: 35 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_numpy_memory2.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_numpy_memory2.py" 2 | User time (seconds): 1.70 3 | System time (seconds): 0.40 4 | Percent of CPU this job got: 144% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.45 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 35772 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 12024 14 | Voluntary context switches: 100 15 | Involuntary context switches: 37 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_numpy_memory2_numexpr.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_numpy_memory2_numexpr.py" 2 | User time (seconds): 2.66 3 | System time (seconds): 0.46 4 | Percent of CPU this job got: 210% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.48 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 37784 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 15862 14 | Voluntary context switches: 9341 15 | Involuntary context switches: 48 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_numpy_memory2_numexpr_single.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_numpy_memory2_numexpr_single.py" 2 | User time (seconds): 2.02 3 | System time (seconds): 0.43 4 | Percent of CPU this job got: 135% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.81 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 37660 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 15824 14 | Voluntary context switches: 129 15 | Involuntary context switches: 24 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_numpy_naive.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_numpy_naive.py" 2 | User time (seconds): 1.99 3 | System time (seconds): 0.91 4 | Percent of CPU this job got: 128% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 0:02.25 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 39636 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 403287 14 | Voluntary context switches: 103 15 | Involuntary context switches: 28 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_python.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_python.py" 2 | User time (seconds): 148.44 3 | System time (seconds): 2.14 4 | Percent of CPU this job got: 100% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 2:30.58 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 25400 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 1569834 14 | Voluntary context switches: 78 15 | Involuntary context switches: 215 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_python_memory.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_python_memory.py" 2 | User time (seconds): 138.58 3 | System time (seconds): 0.03 4 | Percent of CPU this job got: 100% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 2:18.61 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 35088 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 12822 14 | Voluntary context switches: 77 15 | Involuntary context switches: 66 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/diffusion_2d/time/diffusion_scipy.time: -------------------------------------------------------------------------------- 1 | Command being timed: "python diffusion_scipy.py" 2 | User time (seconds): 3.06 3 | System time (seconds): 0.40 4 | Percent of CPU this job got: 123% 5 | Elapsed (wall clock) time (h:mm:ss or m:ss): 0:02.82 6 | Average shared text size (kbytes): 0 7 | Average unshared data size (kbytes): 0 8 | Average stack size (kbytes): 0 9 | Average total size (kbytes): 0 10 | Maximum resident set size (kbytes): 40184 11 | Average resident set size (kbytes): 0 12 | Major (requiring I/O) page faults: 0 13 | Minor (reclaiming a frame) page faults: 14029 14 | Voluntary context switches: 98 15 | Involuntary context switches: 24 16 | Swaps: 0 17 | File system inputs: 0 18 | File system outputs: 0 19 | Socket messages sent: 0 20 | Socket messages received: 0 21 | Signals delivered: 0 22 | Page size (bytes): 4096 23 | Exit status: 0 24 | -------------------------------------------------------------------------------- /06_matrix/norm/Makefile: -------------------------------------------------------------------------------- 1 | include ../../Makefile 2 | -------------------------------------------------------------------------------- /06_matrix/norm/norm_array.memit: -------------------------------------------------------------------------------- 1 | /usr/bin/python: No module named memory_profiler 2 | -------------------------------------------------------------------------------- /06_matrix/norm/norm_array.py: -------------------------------------------------------------------------------- 1 | import time 2 | from array import array 3 | 4 | 5 | def norm_square_array(vector): 6 | norm = 0 7 | for v in vector: 8 | norm += v * v 9 | return norm 10 | 11 | 12 | def run_experiment(size, num_iter=3): 13 | vector = array("l", list(range(size))) 14 | times = [] 15 | for i in range(num_iter): 16 | start = time.time() 17 | norm_square_array(vector) 18 | times.append(time.time() - start) 19 | return min(times) 20 | 21 | 22 | if __name__ == "__main__": 23 | print(run_experiment(1000000, 10)) 24 | -------------------------------------------------------------------------------- /06_matrix/norm/norm_numpy.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy 4 | 5 | 6 | def norm_square_numpy(vector): 7 | return numpy.sum(vector * vector) 8 | 9 | 10 | def run_experiment(size, num_iter=3): 11 | vector = numpy.arange(size) 12 | times = [] 13 | for i in range(num_iter): 14 | start = time.time() 15 | norm_square_numpy(vector) 16 | times.append(time.time() - start) 17 | return min(times) 18 | 19 | 20 | if __name__ == "__main__": 21 | print(run_experiment(1000000, 10)) 22 | -------------------------------------------------------------------------------- /06_matrix/norm/norm_numpy_dot.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy 4 | 5 | 6 | def norm_square_numpy_dot(vector): 7 | return numpy.dot(vector, vector) 8 | 9 | 10 | def run_experiment(size, num_iter=3): 11 | vector = numpy.arange(size) 12 | times = [] 13 | for i in range(num_iter): 14 | start = time.time() 15 | norm_square_numpy_dot(vector) 16 | times.append(time.time() - start) 17 | return min(times) 18 | 19 | 20 | if __name__ == "__main__": 21 | print(run_experiment(1000000, 10)) 22 | -------------------------------------------------------------------------------- /06_matrix/norm/norm_python.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def norm_square_list(vector): 5 | norm = 0 6 | for v in vector: 7 | norm += v * v 8 | return norm 9 | 10 | 11 | def run_experiment(size, num_iter=3): 12 | vector = list(range(size)) 13 | times = [] 14 | for i in range(num_iter): 15 | start = time.time() 16 | norm_square_list(vector) 17 | times.append(time.time() - start) 18 | return min(times) 19 | 20 | 21 | if __name__ == "__main__": 22 | print(run_experiment(1000000, 10)) 23 | -------------------------------------------------------------------------------- /06_matrix/norm/norm_python_comprehension.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def norm_square_list_comprehension(vector): 5 | return sum([v * v for v in vector]) 6 | 7 | 8 | def run_experiment(size, num_iter=3): 9 | vector = list(range(size)) 10 | times = [] 11 | for i in range(num_iter): 12 | start = time.time() 13 | norm_square_list_comprehension(vector) 14 | times.append(time.time() - start) 15 | return min(times) 16 | 17 | 18 | if __name__ == "__main__": 19 | print(run_experiment(1000000, 10)) 20 | -------------------------------------------------------------------------------- /06_matrix/norm/perf/norm_array.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python norm_array.py': 3 | 4 | 15,222,087 cache-references 5 | 12,266,506 cache-misses # 80.584 % of all cache refs 6 | 4,932,179,143 cycles # 0.000 GHz 7 | 10,503,076,117 instructions # 2.13 insns per cycle 8 | 2,398,688,365 branches 9 | 100,316 page-faults 10 | 3 CPU-migrations 11 | 12 | 1.832812019 seconds time elapsed 13 | 14 | -------------------------------------------------------------------------------- /06_matrix/norm/perf/norm_numpy.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python norm_numpy.py': 3 | 4 | 6,475,458 cache-references 5 | 3,566,911 cache-misses # 55.084 % of all cache refs 6 | 516,789,816 cycles # 0.000 GHz 7 | 537,953,302 instructions # 1.04 insns per cycle 8 | 100,140,435 branches 9 | 43,102 page-faults 10 | 3 CPU-migrations 11 | 12 | 0.148656614 seconds time elapsed 13 | 14 | -------------------------------------------------------------------------------- /06_matrix/norm/perf/norm_numpy_dot.perf: -------------------------------------------------------------------------------- 1 | 0.0008676052093505859 2 | 0.0008687973022460938 3 | 0.0008668899536132812 4 | 5 | Performance counter stats for 'python norm_numpy_dot.py' (3 runs): 6 | 7 | 3,063,302,089 cycles # 2.173 GHz 8 | 2,113,402,590 instructions # 0.69 insns per cycle 9 | 10,075,677 cache-references # 7.148 M/sec 10 | 315,267 cache-misses # 3.129 % of all cache refs 11 | 434,245,199 branches # 308.079 M/sec 12 | 9,329,547 branch-misses # 2.15% of all branches 13 | 1409.526329 task-clock (msec) # 5.211 CPUs utilized 14 | 11,368 faults # 0.008 M/sec 15 | 11,365 minor-faults # 0.008 M/sec 16 | 62,542 cs # 0.044 M/sec 17 | 73 migrations # 0.052 K/sec 18 | 19 | 0.270503238 seconds time elapsed 20 | 21 | -------------------------------------------------------------------------------- /06_matrix/norm/perf/norm_python.perf: -------------------------------------------------------------------------------- 1 | 2 | Performance counter stats for 'python norm_python.py': 3 | 4 | 21,797,839 cache-references 5 | 16,882,474 cache-misses # 77.450 % of all cache refs 6 | 4,357,716,622 cycles # 0.000 GHz 7 | 8,309,894,889 instructions # 1.91 insns per cycle 8 | 1,885,987,604 branches 9 | 80,780 page-faults 10 | 3 CPU-migrations 11 | 12 | 1.575655973 seconds time elapsed 13 | 14 | -------------------------------------------------------------------------------- /06_matrix/norm/perf/norm_python_comprehension.perf: -------------------------------------------------------------------------------- 1 | 0.0670633316040039 2 | 0.06715917587280273 3 | 0.06727433204650879 4 | 5 | Performance counter stats for 'python norm_python_comprehension.py' (3 runs): 6 | 7 | 2,362,457,333 cycles # 2.764 GHz 8 | 5,957,736,587 instructions # 2.52 insns per cycle 9 | 7,957,733 cache-references # 9.310 M/sec 10 | 3,083,079 cache-misses # 38.743 % of all cache refs 11 | 1,367,533,152 branches # 1599.839 M/sec 12 | 1,305,535 branch-misses # 0.10% of all branches 13 | 854.794366 task-clock (msec) # 1.002 CPUs utilized 14 | 98,670 faults # 0.115 M/sec 15 | 98,670 minor-faults # 0.115 M/sec 16 | 77 cs # 0.090 K/sec 17 | 60 migrations # 0.070 K/sec 18 | 19 | 0.852782517 seconds time elapsed 20 | 21 | -------------------------------------------------------------------------------- /06_matrix/norm/time/norm_array.time: -------------------------------------------------------------------------------- 1 | 1.59user 0.16system 0:01.75elapsed 99%CPU (0avgtext+0avgdata 400748maxresident)k 2 | 0inputs+0outputs (0major+100349minor)pagefaults 0swaps 3 | -------------------------------------------------------------------------------- /06_matrix/norm/time/norm_numpy.time: -------------------------------------------------------------------------------- 1 | 0.07user 0.06system 0:00.14elapsed 100%CPU (0avgtext+0avgdata 170600maxresident)k 2 | 0inputs+0outputs (0major+43008minor)pagefaults 0swaps 3 | -------------------------------------------------------------------------------- /06_matrix/norm/time/norm_python.time: -------------------------------------------------------------------------------- 1 | 1.45user 0.11system 0:01.57elapsed 99%CPU (0avgtext+0avgdata 322600maxresident)k 2 | 0inputs+0outputs (0major+80812minor)pagefaults 0swaps 3 | -------------------------------------------------------------------------------- /06_matrix/pandas/compare_sklearn_lstsq_timing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot 3 | import timeit 4 | from utility import ols_lstsq, ols_sklearn 5 | #%matplotlib 6 | 7 | from sklearn.linear_model import LinearRegression 8 | import matplotlib.pyplot as plt 9 | 10 | df = pd.read_pickle('generated_ols_data.pickle') 11 | print("Loaded") 12 | 13 | number = 10_000 14 | 15 | results = timeit.repeat("ols_lstsq(df.iloc[0])", globals=globals(), number=number) 16 | time_of_fastest = min(results) 17 | print(f"Time to run ols_lstsq for fastest of repeats is {time_of_fastest / number:0.6f} seconds on {number} repeats and taking fastest") 18 | 19 | results = timeit.repeat("ols_sklearn(df.iloc[0])", globals=globals(), number=number) 20 | time_of_fastest = min(results) 21 | print(f"Time to run ols_sklearn for fastest of repeats is {time_of_fastest / number:0.6f} seconds") 22 | 23 | 24 | -------------------------------------------------------------------------------- /06_matrix/pandas/generate_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.linear_model import LinearRegression 4 | #import statsmodels.api as sm 5 | #from sklearn.linear_model import base 6 | 7 | 8 | from numpy.testing import assert_almost_equal 9 | import matplotlib.pyplot as plt 10 | 11 | outfile = "generated_ols_data.pickle" 12 | NBR_DAYS = 14 13 | NBR_PEOPLE = 100_000 14 | #NBR_PEOPLE = 5_000 15 | 16 | lam = 60 # 100 17 | np.random.seed(0) # fix the seed 18 | hours_per_day_per_person = np.random.poisson(lam=lam, size=(NBR_DAYS, NBR_PEOPLE)).T 19 | hours_per_day_per_person = hours_per_day_per_person / 60 20 | 21 | df = pd.DataFrame(hours_per_day_per_person).astype(np.float_) 22 | print(f"Writing {df.shape} to {outfile}") 23 | print(df.head()) 24 | 25 | df.to_pickle(outfile) 26 | 27 | ax = plt.subplot() 28 | df[:3].T.plot(ax=ax, marker='o') 29 | ax.set_title('Random hours of mobile phone usage for 3 people') 30 | ax.set_xlabel('Days') 31 | ax.set_ylabel('Hours of usage') 32 | ax.set_ylim(0, 1.5) 33 | ax.legend() 34 | plt.savefig('random_hours_mobile_phone_usage_3_people.png') 35 | -------------------------------------------------------------------------------- /06_matrix/pandas/sklearn_line_profiler.py: -------------------------------------------------------------------------------- 1 | import timeit 2 | import pandas as pd 3 | import matplotlib.pyplot 4 | from sklearn.linear_model import base 5 | from sklearn.linear_model import LinearRegression 6 | import matplotlib.pyplot as plt 7 | from line_profiler import LineProfiler 8 | import numpy as np 9 | from utility import ols_lstsq, ols_sklearn 10 | 11 | # We learn that 12 | #https://github.com/scikit-learn/scikit-learn/blob/1495f6924/sklearn/linear_model/base.py#L438 13 | # LinearRegression.fit is expensive because 14 | # of calls to check_X_y, _preprocess_data and linalg.lstsq 15 | # https://github.com/scikit-learn/scikit-learn/blob/1495f6924/sklearn/linear_model/base.py#L101 16 | # _preprocess_data 17 | # has 3 expensive lines - check_array, np.asarray, np.average 18 | #https://github.com/scikit-learn/scikit-learn/blob/1495f69242646d239d89a5713982946b8ffcf9d9/sklearn/utils/validation.py#L600 19 | # check_X_y 20 | # checks for array for certain characteristics and lengths 21 | # 22 | 23 | 24 | df = pd.read_pickle('generated_ols_data.pickle') 25 | print(f"Loaded {df.shape} rows") 26 | 27 | est = LinearRegression() 28 | row = df.iloc[0] 29 | X = np.arange(row.shape[0]).reshape(-1, 1).astype(np.float_) 30 | 31 | lp = LineProfiler(est.fit) 32 | print("Run on a single row") 33 | lp.run("est.fit(X, row.values)") 34 | lp.print_stats() 35 | 36 | print("Run on 5000 rows") 37 | lp.run("df[:5000].apply(ols_sklearn, axis=1)") 38 | lp.print_stats() 39 | 40 | lp = LineProfiler(base._preprocess_data) 41 | lp.run("base._preprocess_data(X, row, fit_intercept=True)") 42 | lp.print_stats() 43 | 44 | lp = LineProfiler(base.check_X_y) 45 | lp.run("base.check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True)") 46 | lp.print_stats() 47 | 48 | #%lprun -f est_diagnosis.fit est_diagnosis.fit(np.arange(rowx.shape[0]).reshape(-1, 1), rowx.values) 49 | #lp.run("est_diagnosis.fit(np.arange(rowx.shape[0]).reshape(-1, 1).astype(np.float_), y.values)") 50 | #lp.run("base._preprocess_data(np.arange(rowx.shape[0]).reshape(-1, 1).astype(np.float_), rowx, fit_intercept=True)") 51 | 52 | -------------------------------------------------------------------------------- /06_matrix/pandas/str_operation.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pandas as pd 3 | from numpy.testing import assert_almost_equal, assert_array_almost_equal 4 | import numba 5 | import numpy as np 6 | import matplotlib.pyplot 7 | from utility import ols_lstsq, ols_lstsq_raw 8 | 9 | df = pd.read_pickle('generated_ols_data.pickle') 10 | print(f"Loaded {df.shape} rows") 11 | 12 | results_ols_lstsq = df.apply(ols_lstsq, axis=1) 13 | 14 | #df['m'] = results_ols_lstsq 15 | 16 | #df['growth'] = pd.cut(df['m'], [-1.0, -0.01, 0.01, 1.0], labels=['declining', 'stable', 'growing']) 17 | #display(df['growth'].value_counts()) 18 | 19 | #In [173]: %timeit df.query('growth=="growing"')['m'].mean() 20 | #4.85 ms ± 40.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 21 | #In [174]: %timeit df.groupby('growth')['m'].mean()['growing'] 22 | #1.45 ms ± 8.52 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 23 | #In [175]: mask = df['growth'] == 'growing' 24 | #In [179]: %timeit df[mask]['m'].mean() 25 | #1.9 ms ± 72.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 26 | 27 | 28 | df['0_as_str'] = df[0].apply(lambda v: str(v)) 29 | def find_9(s): 30 | """Return -1 if '9' not found else its location at position >= 0""" 31 | return s.split('.')[1].find('9') 32 | 33 | #%timeit df['0_as_str'].str.split('.', expand=True)[1].str.find('9') 34 | #183 ms ± 2.58 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 35 | #%timeit df['0_as_str'].apply(find_9) 36 | #51 ms ± 987 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /06_matrix/pandas/utility.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import LinearRegression 3 | 4 | def ols_sklearn(row): 5 | """Solve OLS using scikit-learn's LinearRegression""" 6 | est = LinearRegression() 7 | X = np.arange(row.shape[0]).reshape(-1, 1) # shape (14, 1) 8 | # note that the intercept is built inside LinearRegression 9 | est.fit(X, row.values) 10 | m = est.coef_[0] # note c is in est.intercept_ 11 | return m 12 | 13 | def ols_lstsq(row): 14 | """Solve OLS using numpy.linalg.lstsq""" 15 | # build X values for [0, 13] 16 | X = np.arange(row.shape[0]) # shape (14,) 17 | ones = np.ones(row.shape[0]) # constant used to build intercept 18 | A = np.vstack((X, ones)).T # shape(14, 2) 19 | # lstsq returns the coefficient and intercept as the first result 20 | # followed by the residuals and other items 21 | m, c = np.linalg.lstsq(A, row.values, rcond=-1)[0] 22 | return m 23 | 24 | def ols_lstsq_raw(row): 25 | """Variant of `ols_lstsq` where row is a numpy array (not a Series)""" 26 | X = np.arange(row.shape[0]) 27 | ones = np.ones(row.shape[0]) 28 | A = np.vstack((X, ones)).T 29 | m, c = np.linalg.lstsq(A, row, rcond=-1)[0] 30 | return m 31 | 32 | 33 | 34 | def ols_sm(row): 35 | # by default statsmodels fit uses 36 | # https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse (pinv) 37 | # QR method is the alternative (this add 3s to execution in 50k rows test) 38 | sm_X = sm.add_constant(row.index) 39 | model = sm.OLS(row.values, sm_X) 40 | results = model.fit() 41 | #results.params # 2 params, C followed by m 42 | return results.params[1] 43 | -------------------------------------------------------------------------------- /07_compiling/.gitignore: -------------------------------------------------------------------------------- 1 | diffusion.so 2 | -------------------------------------------------------------------------------- /07_compiling/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/07_compiling/.notempty -------------------------------------------------------------------------------- /07_compiling/Makefile: -------------------------------------------------------------------------------- 1 | diffusion.so: diffusion.c 2 | gcc -O3 -std=gnu11 -c diffusion.c 3 | gcc -shared -o diffusion.so diffusion.o 4 | rm -rf diffusion.o 5 | -------------------------------------------------------------------------------- /07_compiling/cffi/diffusion_2d_cffi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import time 4 | 5 | from cffi import FFI, verifier 6 | 7 | import numpy as np 8 | 9 | grid_shape = (512, 512) 10 | 11 | ffi = FFI() 12 | ffi.cdef("void evolve(double **in, double **out, double D, double dt);") # <1> 13 | lib = ffi.dlopen("../diffusion.so") 14 | 15 | 16 | def evolve(grid, dt, out, D=1.0): 17 | pointer_grid = ffi.cast("double**", grid.ctypes.data) # <2> 18 | pointer_out = ffi.cast("double**", out.ctypes.data) 19 | lib.evolve(pointer_grid, pointer_out, D, dt) 20 | 21 | 22 | def run_experiment(num_iterations): 23 | scratch = np.zeros(grid_shape, dtype=np.double) 24 | grid = np.zeros(grid_shape, dtype=np.double) 25 | 26 | block_low = int(grid_shape[0] * 0.4) 27 | block_high = int(grid_shape[0] * 0.5) 28 | grid[block_low:block_high, block_low:block_high] = 0.005 29 | 30 | start = time.time() 31 | for i in range(num_iterations): 32 | evolve(grid, 0.1, scratch) 33 | grid, scratch = scratch, grid 34 | return time.time() - start 35 | 36 | 37 | if __name__ == "__main__": 38 | t = run_experiment(500) 39 | print(t) 40 | 41 | verifier.cleanup_tmpdir() 42 | -------------------------------------------------------------------------------- /07_compiling/cffi/diffusion_2d_cffi_inline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import time 4 | 5 | from cffi import FFI, verifier 6 | 7 | import numpy as np 8 | 9 | grid_shape = (512, 512) 10 | 11 | ffi = FFI() 12 | ffi.cdef("void evolve(double **in, double **out, double D, double dt);") 13 | lib = ffi.verify( 14 | r""" 15 | void evolve(double in[][512], double out[][512], double D, double dt) { 16 | int i, j; 17 | double laplacian; 18 | for (i=1; i<511; i++) { 19 | for (j=1; j<511; j++) { 20 | laplacian = in[i+1][j] + in[i-1][j] + in[i][j+1] + in[i][j-1] - 4 * in[i][j]; 21 | out[i][j] = in[i][j] + D * dt * laplacian; 22 | } 23 | } 24 | } 25 | """, 26 | extra_compile_args=["-O3"], # <1> 27 | ) 28 | 29 | 30 | def evolve(grid, dt, out, D=1.0): 31 | pointer_grid = ffi.cast("double**", grid.ctypes.data) 32 | pointer_out = ffi.cast("double**", out.ctypes.data) 33 | lib.evolve(pointer_grid, pointer_out, D, dt) 34 | 35 | 36 | def run_experiment(num_iterations): 37 | scratch = np.zeros(grid_shape, dtype=np.double) 38 | grid = np.zeros(grid_shape, dtype=np.double) 39 | 40 | block_low = int(grid_shape[0] * 0.4) 41 | block_high = int(grid_shape[0] * 0.5) 42 | grid[block_low:block_high, block_low:block_high] = 0.005 43 | 44 | start = time.time() 45 | for i in range(num_iterations): 46 | evolve(grid, 0.1, scratch) 47 | grid, scratch = scratch, grid 48 | return time.time() - start 49 | 50 | 51 | if __name__ == "__main__": 52 | t = run_experiment(500) 53 | print(t) 54 | 55 | verifier.cleanup_tmpdir() 56 | -------------------------------------------------------------------------------- /07_compiling/cpython_module/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | build 3 | -------------------------------------------------------------------------------- /07_compiling/cpython_module/cdiffusion/diffusion.h: -------------------------------------------------------------------------------- 1 | void evolve(double in[][512], double out[][512], double D, double dt); 2 | -------------------------------------------------------------------------------- /07_compiling/cpython_module/diffusion.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | from cdiffusion import evolve 5 | 6 | grid_shape = (512, 512) 7 | 8 | 9 | def run_experiment(num_iterations): 10 | scratch = np.zeros(grid_shape, dtype=np.double) 11 | grid = np.zeros(grid_shape, dtype=np.double) 12 | 13 | block_low = int(grid_shape[0] * 0.4) 14 | block_high = int(grid_shape[0] * 0.5) 15 | grid[block_low:block_high, block_low:block_high] = 0.005 16 | 17 | start = time.time() 18 | for i in range(num_iterations): 19 | evolve(grid, scratch, 1.0, 0.1) 20 | grid, scratch = scratch, grid 21 | return time.time() - start 22 | 23 | 24 | if __name__ == "__main__": 25 | t = run_experiment(500) 26 | print(t) 27 | -------------------------------------------------------------------------------- /07_compiling/cpython_module/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import Extension, setup 2 | 3 | import numpy.distutils.misc_util 4 | 5 | __version__ = "0.1" 6 | 7 | cdiffusion = Extension( 8 | "cdiffusion", 9 | sources=["../diffusion.c", "cdiffusion/python_interface.c"], 10 | extra_compile_args=["-O3", "-std=c17", "-Wall", "-p", "-pg"], 11 | extra_link_args=["-lc"], 12 | ) 13 | 14 | setup( 15 | name="diffusion", 16 | version=__version__, 17 | ext_modules=[cdiffusion], 18 | packages=["diffusion"], 19 | include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs(), 20 | ) 21 | -------------------------------------------------------------------------------- /07_compiling/ctypes/diffusion_ctypes.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import time 3 | 4 | import numpy as np 5 | 6 | grid_shape = (512, 512) 7 | _diffusion = ctypes.CDLL("../diffusion.so") 8 | 9 | # Create references to the C types that we will need to simplify future code 10 | TYPE_INT = ctypes.c_int 11 | TYPE_DOUBLE = ctypes.c_double 12 | TYPE_DOUBLE_SS = ctypes.POINTER(ctypes.POINTER(ctypes.c_double)) 13 | 14 | # Initialize the signature of the evolve function to: 15 | # void evolve(int, int, double**, double**, double, double) 16 | _diffusion.evolve.argtypes = [TYPE_DOUBLE_SS, TYPE_DOUBLE_SS, TYPE_DOUBLE, TYPE_DOUBLE] 17 | _diffusion.evolve.restype = None 18 | 19 | 20 | def evolve(grid, out, dt, D=1.0): 21 | # First we convert the python types into the relevant C types 22 | assert grid.shape == (512, 512) 23 | cdt = TYPE_DOUBLE(dt) 24 | cD = TYPE_DOUBLE(D) 25 | pointer_grid = grid.ctypes.data_as(TYPE_DOUBLE_SS) 26 | pointer_out = out.ctypes.data_as(TYPE_DOUBLE_SS) 27 | 28 | # Now we can call the function 29 | _diffusion.evolve(pointer_grid, pointer_out, cD, cdt) 30 | 31 | 32 | def run_experiment(num_iterations): 33 | scratch = np.zeros(grid_shape, dtype=ctypes.c_double) 34 | grid = np.zeros(grid_shape, dtype=ctypes.c_double) 35 | 36 | block_low = int(grid_shape[0] * 0.4) 37 | block_high = int(grid_shape[0] * 0.5) 38 | grid[block_low:block_high, block_low:block_high] = 0.005 39 | 40 | start = time.time() 41 | for i in range(num_iterations): 42 | evolve(grid, scratch, 0.1) 43 | grid, scratch = scratch, grid 44 | return time.time() - start 45 | 46 | 47 | if __name__ == "__main__": 48 | t = run_experiment(500) 49 | print(t) 50 | -------------------------------------------------------------------------------- /07_compiling/diffusion.c: -------------------------------------------------------------------------------- 1 | 2 | void evolve(double in[][512], double out[][512], double D, double dt) { 3 | int i, j; 4 | double laplacian; 5 | for (i=1; i<511; i++) { 6 | for (j=1; j<511; j++) { 7 | laplacian = in[i+1][j] + in[i-1][j] + in[i][j+1] + in[i][j-1]\ 8 | - 4 * in[i][j]; 9 | out[i][j] = in[i][j] + D * dt * laplacian; 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /07_compiling/f2py/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | -------------------------------------------------------------------------------- /07_compiling/f2py/Makefile: -------------------------------------------------------------------------------- 1 | diffusion.so: diffusion.f90 2 | f2py -c -m diffusion --fcompiler=gfortran --opt='-O3' diffusion.f90 3 | -------------------------------------------------------------------------------- /07_compiling/f2py/diffusion.f90: -------------------------------------------------------------------------------- 1 | SUBROUTINE evolve(grid, scratch, D, dt, N, M) 2 | !f2py threadsafe 3 | !f2py intent(in) grid 4 | !f2py intent(inplace) scratch 5 | !f2py intent(in) D 6 | !f2py intent(in) dt 7 | !f2py intent(hide) N 8 | !f2py intent(hide) M 9 | INTEGER :: N, M 10 | DOUBLE PRECISION, DIMENSION(N,M) :: grid, scratch 11 | DOUBLE PRECISION, DIMENSION(N-2, M-2) :: laplacian 12 | DOUBLE PRECISION :: D, dt 13 | 14 | laplacian = grid(3:N, 2:M-1) + grid(1:N-2, 2:M-1) + & 15 | grid(2:N-1, 3:M) + grid(2:N-1, 1:M-2) - 4 * grid(2:N-1, 2:M-1) 16 | scratch(2:N-1, 2:M-1) = grid(2:N-1, 2:M-1) + D * dt * laplacian 17 | END SUBROUTINE evolve 18 | -------------------------------------------------------------------------------- /07_compiling/f2py/diffusion_fortran.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | from diffusion import evolve 5 | 6 | grid_shape = (512, 512) 7 | 8 | 9 | def run_experiment(num_iterations): 10 | scratch = np.zeros(grid_shape, dtype=np.double, order="F") # <1> 11 | grid = np.zeros(grid_shape, dtype=np.double, order="F") 12 | 13 | block_low = int(grid_shape[0] * 0.4) 14 | block_high = int(grid_shape[0] * 0.5) 15 | grid[block_low:block_high, block_low:block_high] = 0.005 16 | 17 | start = time.time() 18 | for i in range(num_iterations): 19 | evolve(grid, scratch, 1.0, 0.1) 20 | grid, scratch = scratch, grid 21 | return time.time() - start 22 | 23 | 24 | if __name__ == "__main__": 25 | t = run_experiment(500) 26 | print(t) 27 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/cythonfn.pyx: -------------------------------------------------------------------------------- 1 | def calculate_z(int maxiter, zs, cs): 2 | """Calculate output list using Julia update rule""" 3 | cdef unsigned int i, n 4 | cdef double complex z, c 5 | output = [0] * len(zs) 6 | for i in range(len(zs)): 7 | n = 0 8 | z = zs[i] 9 | c = cs[i] 10 | while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 11 | z = z * z + c 12 | n += 1 13 | output[i] = n 14 | return output 15 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/cythonfn1.pyx: -------------------------------------------------------------------------------- 1 | def calculate_z(maxiter, zs, cs): 2 | """Calculate output list using Julia update rule""" 3 | output = [0] * len(zs) 4 | for i in range(len(zs)): 5 | n = 0 6 | z = zs[i] 7 | c = cs[i] 8 | while n < maxiter and abs(z) < 2: 9 | z = z * z + c 10 | n += 1 11 | output[i] = n 12 | return output 13 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/cythonfn2.pyx: -------------------------------------------------------------------------------- 1 | def calculate_z(int maxiter, zs, cs): 2 | """Calculate output list using Julia update rule""" 3 | cdef unsigned int i, n 4 | cdef double complex z, c 5 | output = [0] * len(zs) 6 | for i in range(len(zs)): 7 | n = 0 8 | z = zs[i] 9 | c = cs[i] 10 | while n < maxiter and abs(z) < 2: 11 | z = z * z + c 12 | n += 1 13 | output[i] = n 14 | return output 15 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/cythonfn3.pyx: -------------------------------------------------------------------------------- 1 | def calculate_z(int maxiter, list zs, list cs): 2 | """Calculate output list using Julia update rule""" 3 | cdef unsigned int i, n 4 | cdef double complex z, c 5 | output = [0] * len(zs) 6 | for i in range(len(zs)): 7 | n = 0 8 | z = zs[i] 9 | c = cs[i] 10 | while n < maxiter and abs(z) < 2: 11 | z = z * z + c 12 | n += 1 13 | output[i] = n 14 | return output 15 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/cythonfn4.pyx: -------------------------------------------------------------------------------- 1 | def calculate_z(int maxiter, zs, cs): 2 | """Calculate output list using Julia update rule""" 3 | cdef unsigned int i, n 4 | cdef double complex z, c 5 | output = [0] * len(zs) 6 | for i in range(len(zs)): 7 | n = 0 8 | z = zs[i] 9 | c = cs[i] 10 | while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 11 | z = z * z + c 12 | n += 1 13 | output[i] = n 14 | return output 15 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/cythonfn5.pyx: -------------------------------------------------------------------------------- 1 | #cython: boundscheck=False 2 | def calculate_z(int maxiter, zs, cs): 3 | """Calculate output list using Julia update rule""" 4 | cdef unsigned int i, n 5 | cdef double complex z, c 6 | output = [0] * len(zs) 7 | for i in range(len(zs)): 8 | n = 0 9 | z = zs[i] 10 | c = cs[i] 11 | while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 12 | z = z * z + c 13 | n += 1 14 | output[i] = n 15 | return output 16 | 17 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/julia1.py: -------------------------------------------------------------------------------- 1 | """Julia set generator without optional PIL-based image drawing""" 2 | import time 3 | #from cythonfn import calculate_z 4 | import cythonfn 5 | 6 | # area of complex space to investigate 7 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8 8 | c_real, c_imag = -0.62772, -.42193 9 | 10 | def calc_pure_python(desired_width, max_iterations): 11 | """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display""" 12 | x_step = (x2 - x1) / desired_width 13 | y_step = (y1 - y2) / desired_width 14 | x = [] 15 | y = [] 16 | ycoord = y2 17 | while ycoord > y1: 18 | y.append(ycoord) 19 | ycoord += y_step 20 | xcoord = x1 21 | while xcoord < x2: 22 | x.append(xcoord) 23 | xcoord += x_step 24 | # build a list of co-ordinates and the initial condition for each cell. 25 | # Note that our initial condition is a constant and could easily be removed, 26 | # we use it to simulate a real-world scenario with several inputs to our function 27 | zs = [] 28 | cs = [] 29 | for ycoord in y: 30 | for xcoord in x: 31 | zs.append(complex(xcoord, ycoord)) 32 | cs.append(complex(c_real, c_imag)) 33 | 34 | print("Length of x:", len(x)) 35 | print("Total elements:", len(zs)) 36 | start_time = time.time() 37 | output = cythonfn.calculate_z(max_iterations, zs, cs) 38 | end_time = time.time() 39 | secs = end_time - start_time 40 | print(f"Took {secs:0.2f} seconds") 41 | 42 | assert sum(output) == 33219980 # this sum is expected for 1000^2 grid with 300 iterations 43 | 44 | 45 | 46 | # Calculate the Julia set using a pure Python solution with 47 | # reasonable defaults for a laptop 48 | calc_pure_python(desired_width=1000, max_iterations=300) 49 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | from Cython.Build import cythonize 4 | setup(ext_modules=cythonize("cythonfn.pyx", compiler_directives={"language_level": "3"})) 5 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython_pyximport/cythonfn.pyx: -------------------------------------------------------------------------------- 1 | def calculate_z(maxiter, zs, cs): 2 | """Calculate output list using Julia update rule""" 3 | output = [0] * len(zs) 4 | for i in range(len(zs)): 5 | n = 0 6 | z = zs[i] 7 | c = cs[i] 8 | while n < maxiter and abs(z) < 2: 9 | z = z * z + c 10 | n += 1 11 | output[i] = n 12 | return output 13 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/cpython_pyximport/julia1.py: -------------------------------------------------------------------------------- 1 | """Julia set generator without optional PIL-based image drawing""" 2 | import time 3 | import pyximport 4 | pyximport.install(language_level=3) 5 | import cythonfn 6 | 7 | 8 | # area of complex space to investigate 9 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8 10 | c_real, c_imag = -0.62772, -.42193 11 | 12 | def calc_pure_python(desired_width, max_iterations): 13 | """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display""" 14 | x_step = (x2 - x1) / desired_width 15 | y_step = (y1 - y2) / desired_width 16 | x = [] 17 | y = [] 18 | ycoord = y2 19 | while ycoord > y1: 20 | y.append(ycoord) 21 | ycoord += y_step 22 | xcoord = x1 23 | while xcoord < x2: 24 | x.append(xcoord) 25 | xcoord += x_step 26 | # build a list of co-ordinates and the initial condition for each cell. 27 | # Note that our initial condition is a constant and could easily be removed, 28 | # we use it to simulate a real-world scenario with several inputs to our function 29 | zs = [] 30 | cs = [] 31 | for ycoord in y: 32 | for xcoord in x: 33 | zs.append(complex(xcoord, ycoord)) 34 | cs.append(complex(c_real, c_imag)) 35 | 36 | print("Length of x:", len(x)) 37 | print("Total elements:", len(zs)) 38 | start_time = time.time() 39 | output = cythonfn.calculate_z(max_iterations, zs, cs) 40 | end_time = time.time() 41 | secs = end_time - start_time 42 | print(f"Took {secs:0.2f} seconds") 43 | 44 | assert sum(output) == 33219980 # this sum is expected for 1000^2 grid with 300 iterations 45 | 46 | 47 | 48 | # Calculate the Julia set using a pure Python solution with 49 | # reasonable defaults for a laptop 50 | calc_pure_python(desired_width=1000, max_iterations=300) 51 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/cythonfn.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs): 5 | """Calculate output list using Julia update rule""" 6 | cdef unsigned int i, n 7 | cdef double complex z, c 8 | cdef int[:] output = np.empty(len(zs), dtype=np.int32) 9 | for i in range(len(zs)): 10 | n = 0 11 | z = zs[i] 12 | c = cs[i] 13 | while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 14 | z = z * z + c 15 | n += 1 16 | output[i] = n 17 | return output 18 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/cythonfn1.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs): 5 | """Calculate output list using Julia update rule""" 6 | cdef unsigned int i, n 7 | cdef double complex z, c 8 | cdef int[:] output = np.empty(len(zs), dtype=np.int32) 9 | for i in range(len(zs)): 10 | n = 0 11 | z = zs[i] 12 | c = cs[i] 13 | while n < maxiter and abs(z) < 2: 14 | z = z * z + c 15 | n += 1 16 | output[i] = n 17 | return output 18 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/cythonfn2.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs): 5 | """Calculate output list using Julia update rule""" 6 | cdef unsigned int i, n 7 | cdef double complex z, c 8 | cdef int[:] output = np.empty(len(zs), dtype=np.int32) 9 | for i in range(len(zs)): 10 | n = 0 11 | z = zs[i] 12 | c = cs[i] 13 | while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 14 | z = z * z + c 15 | n += 1 16 | output[i] = n 17 | return output 18 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/julia1.py: -------------------------------------------------------------------------------- 1 | """Julia set generator without optional PIL-based image drawing""" 2 | import time 3 | import numpy as np 4 | import cythonfn 5 | 6 | # area of complex space to investigate 7 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8 8 | c_real, c_imag = -0.62772, -.42193 9 | 10 | 11 | def calc_pure_python(desired_width, max_iterations): 12 | """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display""" 13 | x_step = (x2 - x1) / desired_width 14 | y_step = (y1 - y2) / desired_width 15 | x = [] 16 | y = [] 17 | ycoord = y2 18 | while ycoord > y1: 19 | y.append(ycoord) 20 | ycoord += y_step 21 | xcoord = x1 22 | while xcoord < x2: 23 | x.append(xcoord) 24 | xcoord += x_step 25 | # build a list of co-ordinates and the initial condition for each cell. 26 | # Note that our initial condition is a constant and could easily be removed, 27 | # we use it to simulate a real-world scenario with several inputs to our function 28 | zs = [] 29 | cs = [] 30 | for ycoord in y: 31 | for xcoord in x: 32 | zs.append(complex(xcoord, ycoord)) 33 | cs.append(complex(c_real, c_imag)) 34 | 35 | zs_np = np.array(zs, np.complex128) 36 | cs_np = np.array(cs, np.complex128) 37 | 38 | print("Length of x:", len(x)) 39 | print("Total elements:", len(zs)) 40 | start_time = time.time() 41 | output = cythonfn.calculate_z(max_iterations, zs_np, cs_np) 42 | end_time = time.time() 43 | secs = end_time - start_time 44 | print(f"Took {secs:0.2f} seconds") 45 | 46 | validation_sum = sum(output) 47 | print("Total sum of elements (for validation):", validation_sum) 48 | 49 | 50 | # Calculate the Julia set using a pure Python solution with 51 | # reasonable defaults for a laptop 52 | # set draw_output to True to use PIL to draw an image 53 | calc_pure_python(desired_width=1000, max_iterations=300) 54 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn.pyx: -------------------------------------------------------------------------------- 1 | #cython: boundscheck=False 2 | from cython.parallel import parallel, prange 3 | import numpy as np 4 | cimport numpy as np 5 | 6 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs): 7 | """Calculate output list using Julia update rule""" 8 | cdef unsigned int i, length 9 | cdef double complex z, c 10 | cdef int[:] output = np.empty(len(zs), dtype=np.int32) 11 | length = len(zs) 12 | with nogil, parallel(): 13 | for i in prange(length, schedule="guided"): 14 | z = zs[i] 15 | c = cs[i] 16 | output[i] = 0 17 | while output[i] < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 18 | z = z * z + c 19 | output[i] += 1 20 | return output 21 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn1.pyx: -------------------------------------------------------------------------------- 1 | from cython.parallel import prange 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs): 6 | """Calculate output list using Julia update rule""" 7 | cdef unsigned int i, length 8 | cdef double complex z, c 9 | cdef int[:] output = np.empty(len(zs), dtype=np.int32) 10 | length = len(zs) 11 | with nogil: 12 | for i in prange(length, schedule="guided"): 13 | z = zs[i] 14 | c = cs[i] 15 | output[i] = 0 16 | while output[i] < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 17 | z = z * z + c 18 | output[i] += 1 19 | return output 20 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn2.pyx: -------------------------------------------------------------------------------- 1 | #cython: boundscheck=False 2 | from cython.parallel import prange 3 | import numpy as np 4 | cimport numpy as np 5 | 6 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs): 7 | """Calculate output list using Julia update rule""" 8 | cdef unsigned int i, length, n 9 | cdef double complex z, c 10 | cdef int[:] output = np.empty(len(zs), dtype=np.int32) 11 | length = len(zs) 12 | with nogil: 13 | for i in prange(length, schedule="guided"): 14 | z = zs[i] 15 | c = cs[i] 16 | n = 0 17 | while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 18 | z = z * z + c 19 | n = n + 1 20 | output[i] = n 21 | return output 22 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn3.pyx: -------------------------------------------------------------------------------- 1 | #cython: boundscheck=False 2 | from cython.parallel import parallel, prange 3 | import numpy as np 4 | cimport numpy as np 5 | 6 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs): 7 | """Calculate output list using Julia update rule""" 8 | cdef unsigned int i, length 9 | cdef double complex z, c 10 | cdef int[:] output = np.empty(len(zs), dtype=np.int32) 11 | length = len(zs) 12 | with nogil, parallel(): 13 | for i in prange(length, schedule="guided"): 14 | z = zs[i] 15 | c = cs[i] 16 | output[i] = 0 17 | while output[i] < maxiter and (z.real * z.real + z.imag * z.imag) < 4: 18 | z = z * z + c 19 | output[i] += 1 20 | return output 21 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/parallel/julia1.py: -------------------------------------------------------------------------------- 1 | """Julia set generator without optional PIL-based image drawing""" 2 | import time 3 | import numpy as np 4 | import cythonfn 5 | 6 | # area of complex space to investigate 7 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8 8 | c_real, c_imag = -0.62772, -.42193 9 | 10 | 11 | def calc_pure_python(desired_width, max_iterations): 12 | """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display""" 13 | x_step = (x2 - x1) / desired_width 14 | y_step = (y1 - y2) / desired_width 15 | x = [] 16 | y = [] 17 | ycoord = y2 18 | while ycoord > y1: 19 | y.append(ycoord) 20 | ycoord += y_step 21 | xcoord = x1 22 | while xcoord < x2: 23 | x.append(xcoord) 24 | xcoord += x_step 25 | # build a list of co-ordinates and the initial condition for each cell. 26 | # Note that our initial condition is a constant and could easily be removed, 27 | # we use it to simulate a real-world scenario with several inputs to our function 28 | zs = [] 29 | cs = [] 30 | for ycoord in y: 31 | for xcoord in x: 32 | zs.append(complex(xcoord, ycoord)) 33 | cs.append(complex(c_real, c_imag)) 34 | 35 | zs_np = np.array(zs, np.complex128) 36 | cs_np = np.array(cs, np.complex128) 37 | 38 | print("Length of x:", len(x)) 39 | print("Total elements:", len(zs)) 40 | start_time = time.time() 41 | output = cythonfn.calculate_z(max_iterations, zs_np, cs_np) 42 | end_time = time.time() 43 | secs = end_time - start_time 44 | print(f"Took {secs:0.2f} seconds") 45 | 46 | validation_sum = sum(output) 47 | print("Total sum of elements (for validation):", validation_sum) 48 | 49 | 50 | # Calculate the Julia set using a pure Python solution with 51 | # reasonable defaults for a laptop 52 | # set draw_output to True to use PIL to draw an image 53 | calc_pure_python(desired_width=1000, max_iterations=300) 54 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/parallel/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | import numpy as np 4 | 5 | ext_modules = [Extension( 6 | "cythonfn", 7 | ["cythonfn.pyx"], 8 | 9 | extra_compile_args=['-fopenmp'], 10 | extra_link_args=['-fopenmp'], 11 | )] 12 | 13 | from Cython.Build import cythonize 14 | setup(ext_modules=cythonize(ext_modules, compiler_directives={"language_level": "3"},),include_dirs=[np.get_include()]) 15 | 16 | -------------------------------------------------------------------------------- /07_compiling/julia/cython/nparray_memoryview/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | import numpy as np 3 | 4 | from Cython.Build import cythonize 5 | setup(ext_modules=cythonize("cythonfn.pyx", compiler_directives={"language_level": "3"}), 6 | include_dirs=[np.get_include()]) 7 | 8 | -------------------------------------------------------------------------------- /07_compiling/pytorch/compare.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import partial 3 | 4 | from tqdm import tqdm 5 | 6 | import diffusion_numpy 7 | import diffusion_pytorch 8 | import numpy as np 9 | import pylab as py 10 | 11 | 12 | def get_timings(fxn): 13 | start = time.perf_counter() 14 | fxn() 15 | return time.perf_counter() - start 16 | 17 | 18 | if __name__ == "__main__": 19 | grid_sizes = (256, 512, 1024, 2048, 4096) 20 | n_iter = 1000 21 | 22 | diffusion_pytorch.run_experiment(1, (24, 24), "cuda:0") 23 | results_pytorch_gpu = [] 24 | for g in tqdm(grid_sizes, desc="pytorch gpu"): 25 | r = get_timings( 26 | partial(diffusion_pytorch.run_experiment, n_iter, (g, g), "cuda:0") 27 | ) 28 | results_pytorch_gpu.append(r) 29 | results_pytorch_gpu = np.asarray(results_pytorch_gpu) 30 | 31 | results_numpy = [] 32 | for g in tqdm(grid_sizes, desc="numpy"): 33 | r = get_timings(partial(diffusion_numpy.run_experiment, n_iter, (g, g))) 34 | results_numpy.append(r) 35 | results_numpy = np.asarray(results_numpy) 36 | 37 | # diffusion_pytorch.run_experiment(1, (24, 24), 'cpu') 38 | # results_pytorch_cpu = [] 39 | # for g in tqdm(grid_sizes, desc="pytorch cpu"): 40 | # r = get_timings(partial(diffusion_pytorch.run_experiment, n_iter, (g, g), 'cpu')) 41 | # results_pytorch_cpu.append(r) 42 | # results_pytorch_cpu = np.asarray(results_pytorch_cpu) 43 | 44 | print(grid_sizes) 45 | print(results_numpy / results_pytorch_gpu) 46 | print() 47 | 48 | fig = py.figure() 49 | py.plot(grid_sizes, results_numpy, "-v", label="Numpy") 50 | py.plot(grid_sizes, results_pytorch_gpu, "-o", label="PyTorch GPU") 51 | # py.plot(grid_sizes, results_pytorch_cpu, '-x', label="PyTorch CPU") 52 | py.legend() 53 | py.title("Runtime for various grid sizes") 54 | py.xlabel("Grid size") 55 | py.ylabel("Runtime (seconds)") 56 | py.yscale("log") 57 | py.savefig("../../../images/comparison_pytorch_vs_numpy.png") 58 | py.close(fig) 59 | -------------------------------------------------------------------------------- /07_compiling/pytorch/diffusion_numpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import timeit 4 | 5 | from numpy import roll, zeros 6 | 7 | GRID_SHAPE = (2048, 2048) 8 | 9 | 10 | def laplacian(grid): 11 | return ( 12 | roll(grid, +1, 0) 13 | + roll(grid, -1, 0) 14 | + roll(grid, +1, 1) 15 | + roll(grid, -1, 1) 16 | - 4 * grid 17 | ) 18 | 19 | 20 | def evolve(grid, dt, D=1): 21 | return grid + dt * D * laplacian(grid) 22 | 23 | 24 | def run_experiment(num_iterations, grid_shape=GRID_SHAPE): 25 | grid = zeros(grid_shape) 26 | 27 | block_low = int(grid_shape[0] * 0.4) 28 | block_high = int(grid_shape[0] * 0.5) 29 | grid[block_low:block_high, block_low:block_high] = 0.005 30 | 31 | for i in range(num_iterations): 32 | grid = evolve(grid, 0.1) 33 | return grid 34 | 35 | 36 | if __name__ == "__main__": 37 | n_iter = 100 38 | N, runtime = timeit.Timer( 39 | f"run_experiment({n_iter})", globals=globals() 40 | ).autorange() 41 | print(f"Runtime with grid {grid_shape}: {runtime / N:0.4f}s") 42 | -------------------------------------------------------------------------------- /07_compiling/pytorch/random_access.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import partial 3 | 4 | import torch 5 | 6 | 7 | def timer(fxn, max_time=5): 8 | N = 0 9 | total_time = 0 10 | fxn() 11 | while total_time < max_time: 12 | start = time.perf_counter() 13 | fxn() 14 | total_time += time.perf_counter() - start 15 | N += 1 16 | return total_time / N 17 | 18 | 19 | def task(A, target): 20 | result = 0 21 | i = 0 22 | N = 0 23 | while result < target: 24 | r = A[i] 25 | result += r 26 | i = A[i] 27 | N += 1 28 | return N 29 | 30 | 31 | if __name__ == "__main__": 32 | N = 1000 33 | print(f"Testing with array of length {N}") 34 | 35 | A_py = (torch.rand(N) * N).type(torch.int).to("cuda:0") 36 | A_np = A_py.cpu().numpy() 37 | 38 | t_py = timer(partial(task, A_py, 500)) 39 | t_np = timer(partial(task, A_np, 500)) 40 | print(f"PyTorch took: {t_py:0.3e}s") 41 | print(f"Numpy took: {t_np:0.3e}s") 42 | print(f"Numpy is {100 - t_np/t_py*100:0.2f}% faster") 43 | -------------------------------------------------------------------------------- /07_compiling/pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch 2 | -------------------------------------------------------------------------------- /08_concurrency/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/.notempty -------------------------------------------------------------------------------- /08_concurrency/cralwer/asyncio/crawler.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import random 3 | import string 4 | 5 | import aiohttp 6 | 7 | 8 | def generate_urls(base_url, num_urls): 9 | for i in range(num_urls): 10 | yield base_url + "".join(random.sample(string.ascii_lowercase, 10)) 11 | 12 | 13 | def chunked_http_client(num_chunks): 14 | """ 15 | Returns a function that can fetch from a URL, ensuring that only 16 | "num_chunks" of simultaneous connects are made. 17 | """ 18 | semaphore = asyncio.Semaphore(num_chunks) # <1> 19 | 20 | async def http_get(url, client_session): # <2> 21 | nonlocal semaphore 22 | async with semaphore: 23 | async with client_session.request("GET", url) as response: 24 | return await response.content.read() 25 | 26 | return http_get 27 | 28 | 29 | async def run_experiment(base_url, num_iter=1000): 30 | urls = generate_urls(base_url, num_iter) 31 | http_client = chunked_http_client(100) 32 | responses_sum = 0 33 | async with aiohttp.ClientSession() as client_session: 34 | tasks = [http_client(url, client_session) for url in urls] # <3> 35 | for future in asyncio.as_completed(tasks): # <4> 36 | data = await future 37 | responses_sum += len(data) 38 | return responses_sum 39 | 40 | 41 | if __name__ == "__main__": 42 | import time 43 | 44 | loop = asyncio.get_event_loop() 45 | delay = 100 46 | num_iter = 1000 47 | 48 | start = time.time() 49 | result = loop.run_until_complete( 50 | run_experiment( 51 | f"http://127.0.0.1:8080/add?name=asyncio&delay={delay}&", num_iter 52 | ) 53 | ) 54 | end = time.time() 55 | print(f"Result: {result}, Time: {end - start}") 56 | -------------------------------------------------------------------------------- /08_concurrency/cralwer/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ulimit -n 2048 4 | 5 | (python server.py --port=8080 &> /dev/null) & 6 | server_pid=$! 7 | sleep 1 # wait for server to be ready 8 | 9 | for i in asyncio gevent tornado serial 10 | do 11 | pushd $i 12 | python crawler.py 13 | popd 14 | done 15 | 16 | curl "localhost:8080/add?flush=True" 17 | kill $server_pid 18 | 19 | mkdir images 20 | python visualize.py 21 | -------------------------------------------------------------------------------- /08_concurrency/cralwer/gevent/crawler.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | import urllib.error 4 | import urllib.parse 5 | import urllib.request 6 | from contextlib import closing 7 | 8 | import gevent 9 | from gevent import monkey 10 | from gevent.lock import Semaphore 11 | 12 | monkey.patch_socket() 13 | 14 | 15 | def generate_urls(base_url, num_urls): 16 | for i in range(num_urls): 17 | yield base_url + "".join(random.sample(string.ascii_lowercase, 10)) 18 | 19 | 20 | def download(url, semaphore): 21 | with semaphore: # <2> 22 | with closing(urllib.request.urlopen(url)) as data: 23 | return data.read() 24 | 25 | 26 | def chunked_requests(urls, chunk_size=100): 27 | """ 28 | Given an iterable of urls, this function will yield back the contents of the 29 | URLs. The requests will be batched up in "chunk_size" batches using a 30 | semaphore 31 | """ 32 | semaphore = Semaphore(chunk_size) # <1> 33 | requests = [gevent.spawn(download, u, semaphore) for u in urls] # <3> 34 | for response in gevent.iwait(requests): 35 | yield response 36 | 37 | 38 | def run_experiment(base_url, num_iter=1000): 39 | urls = generate_urls(base_url, num_iter) 40 | response_futures = chunked_requests(urls, 100) # <4> 41 | response_size = sum(len(r.value) for r in response_futures) 42 | return response_size 43 | 44 | 45 | if __name__ == "__main__": 46 | import time 47 | 48 | delay = 100 49 | num_iter = 1000 50 | base_url = f"http://127.0.0.1:8080/add?name=gevent&delay={delay}&" 51 | 52 | start = time.time() 53 | result = run_experiment(base_url, num_iter) 54 | end = time.time() 55 | print(f"Result: {result}, Time: {end - start}") 56 | -------------------------------------------------------------------------------- /08_concurrency/cralwer/images/asyncio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/asyncio.png -------------------------------------------------------------------------------- /08_concurrency/cralwer/images/gevent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/gevent.png -------------------------------------------------------------------------------- /08_concurrency/cralwer/images/grequests.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/grequests.png -------------------------------------------------------------------------------- /08_concurrency/cralwer/images/parallel_requests.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/parallel_requests.png -------------------------------------------------------------------------------- /08_concurrency/cralwer/images/serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/serial.png -------------------------------------------------------------------------------- /08_concurrency/cralwer/images/tornado.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/tornado.png -------------------------------------------------------------------------------- /08_concurrency/cralwer/parallel_requests.json: -------------------------------------------------------------------------------- 1 | {"50": [[1, 25.65538716316223], [26, 1.0850036144256592], [51, 0.6126880645751953], [76, 0.4636659622192383], [101, 0.40830278396606445], [126, 0.3793010711669922], [151, 0.37442588806152344], [176, 0.38240480422973633], [201, 0.3782355785369873], [226, 0.3731365203857422], [251, 0.35320305824279785], [276, 0.363983154296875], [301, 0.3743610382080078], [326, 0.39357542991638184], [351, 0.38543701171875], [376, 0.39388275146484375], [401, 1.178931713104248], [426, 1.2373404502868652], [451, 1.2409429550170898], [476, 1.2445108890533447]], "300": [[1, 150.9001648426056], [26, 6.09351110458374], [51, 3.1123225688934326], [76, 2.1965770721435547], [101, 1.651137351989746], [126, 1.3618719577789307], [151, 1.2949111461639404], [176, 1.0845177173614502], [201, 1.0513508319854736], [226, 1.0078027248382568], [251, 0.8154821395874023], [276, 1.4151887893676758], [301, 0.8222417831420898], [326, 0.8313257694244385], [351, 0.765545129776001], [376, 0.7526142597198486], [401, 0.7465243339538574], [426, 0.735400915145874], [451, 1.464097499847412], [476, 1.4736013412475586]], "550": [[1, 276.0473265647888], [26, 11.085755825042725], [51, 5.614370822906494], [76, 3.968106269836426], [101, 2.901707887649536], [126, 2.3537800312042236], [151, 2.3039634227752686], [176, 1.8280274868011475], [201, 1.7996525764465332], [226, 1.7501981258392334], [251, 1.342069387435913], [276, 1.3174073696136475], [301, 1.3279151916503906], [326, 1.323756456375122], [351, 1.6952769756317139], [376, 1.2549707889556885], [401, 1.257312536239624], [426, 1.68515944480896], [451, 1.7300856113433838], [476, 1.7483623027801514]], "800": [[1, 401.2360224723816], [26, 16.11333131790161], [51, 8.121342658996582], [76, 5.696434736251831], [101, 4.1515562534332275], [126, 3.358698844909668], [151, 3.2921557426452637], [176, 2.590240478515625], [201, 2.5426173210144043], [226, 2.4937210083007812], [251, 1.8391218185424805], [276, 1.813462734222412], [301, 1.8160362243652344], [326, 1.817732810974121], [351, 1.7900586128234863], [376, 1.7599964141845703], [401, 1.7514441013336182], [426, 1.7195136547088623], [451, 1.9931211471557617], [476, 1.9955008029937744]]} -------------------------------------------------------------------------------- /08_concurrency/cralwer/serial/crawler.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | 4 | import requests 5 | 6 | 7 | def generate_urls(base_url, num_urls): 8 | """ 9 | We add random characters to the end of the URL to break any caching 10 | mechanisms in the requests library or the server 11 | """ 12 | for i in range(num_urls): 13 | yield base_url + "".join(random.sample(string.ascii_lowercase, 10)) 14 | 15 | 16 | def run_experiment(base_url, num_iter=1000): 17 | response_size = 0 18 | for url in generate_urls(base_url, num_iter): 19 | response = requests.get(url) 20 | response_size += len(response.text) 21 | return response_size 22 | 23 | 24 | if __name__ == "__main__": 25 | import time 26 | 27 | delay = 100 28 | num_iter = 1000 29 | base_url = f"http://127.0.0.1:8080/add?name=serial&delay={delay}&" 30 | 31 | start = time.time() 32 | result = run_experiment(base_url, num_iter) 33 | end = time.time() 34 | print(f"Result: {result}, Time: {end - start}") 35 | -------------------------------------------------------------------------------- /08_concurrency/cralwer/server.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from collections import defaultdict 4 | 5 | from tornado import gen, httpserver, ioloop, options, web 6 | 7 | options.define("port", default=8080, help="Port to serve on") 8 | 9 | 10 | class AddMetric(web.RequestHandler): 11 | metric_data = defaultdict(list) 12 | 13 | async def get(self): 14 | if self.get_argument("flush", False): 15 | json.dump(self.metric_data, open("metric_data.json", "w+")) 16 | else: 17 | name = self.get_argument("name") 18 | try: 19 | delay = int(self.get_argument("delay", 1024)) 20 | except ValueError: 21 | raise web.HTTPError(400, reason="Invalid value for delay") 22 | 23 | start = time.time() 24 | await gen.sleep(delay / 1000.0) 25 | self.write(".") 26 | self.finish() 27 | end = time.time() 28 | self.metric_data[name].append( 29 | {"start": start, "end": end, "dt": end - start} 30 | ) 31 | 32 | 33 | if __name__ == "__main__": 34 | options.parse_command_line() 35 | port = options.options.port 36 | 37 | application = web.Application([(r"/add", AddMetric)]) 38 | 39 | http_server = httpserver.HTTPServer(application) 40 | http_server.listen(port) 41 | print(("Listening on port: {}".format(port))) 42 | ioloop.IOLoop.instance().start() 43 | -------------------------------------------------------------------------------- /08_concurrency/cralwer/tornado/crawler.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import random 3 | import string 4 | from functools import partial 5 | 6 | from tornado.httpclient import AsyncHTTPClient 7 | 8 | AsyncHTTPClient.configure( 9 | "tornado.curl_httpclient.CurlAsyncHTTPClient", max_clients=100 # <1> 10 | ) 11 | 12 | 13 | def generate_urls(base_url, num_urls): 14 | for i in range(num_urls): 15 | yield base_url + "".join(random.sample(string.ascii_lowercase, 10)) 16 | 17 | 18 | async def run_experiment(base_url, num_iter=1000): 19 | http_client = AsyncHTTPClient() 20 | urls = generate_urls(base_url, num_iter) 21 | response_sum = 0 22 | tasks = [http_client.fetch(url) for url in urls] # <2> 23 | for task in asyncio.as_completed(tasks): # <3> 24 | response = await task # <4> 25 | response_sum += len(response.body) 26 | return response_sum 27 | 28 | 29 | if __name__ == "__main__": 30 | import time 31 | 32 | delay = 100 33 | num_iter = 1000 34 | run_func = partial( 35 | run_experiment, 36 | f"http://127.0.0.1:8080/add?name=tornado&delay={delay}&", 37 | num_iter, 38 | ) 39 | 40 | start = time.time() 41 | result = asyncio.run(run_func) # <5> 42 | end = time.time() 43 | print(f"Result: {result}, Time: {end - start}") 44 | -------------------------------------------------------------------------------- /08_concurrency/cralwer/tornado_callback/crawler.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from functools import partial 4 | 5 | from tornado import ioloop 6 | from tornado.httpclient import AsyncHTTPClient 7 | 8 | AsyncHTTPClient.configure( 9 | "tornado.curl_httpclient.CurlAsyncHTTPClient", max_clients=100 10 | ) 11 | 12 | 13 | def generate_urls(base_url, num_urls): 14 | for i in range(num_urls): 15 | yield base_url + "".join(random.sample(string.ascii_lowercase, 10)) 16 | 17 | 18 | def fetch_urls(urls, callback): 19 | http_client = AsyncHTTPClient() 20 | urls = list(urls) 21 | responses = [] 22 | 23 | def _finish_fetch_urls(result): 24 | responses.append(result) 25 | if len(responses) == len(urls): 26 | callback(responses) 27 | 28 | for url in urls: 29 | http_client.fetch(url, callback=_finish_fetch_urls) 30 | 31 | 32 | def run_experiment(base_url, num_iter=500, callback=None): 33 | urls = generate_urls(base_url, num_iter) 34 | callback_passthrou = partial(_finish_run_experiment, callback=callback) 35 | fetch_urls(urls, callback_passthrou) 36 | 37 | 38 | def _finish_run_experiment(responses, callback): 39 | response_sum = sum(len(r.body) for r in responses) 40 | print(response_sum) 41 | callback() 42 | 43 | 44 | if __name__ == "__main__": 45 | import time 46 | 47 | delay = 100 48 | num_iter = 500 49 | base_url = "http://127.0.0.1:8080/add?name=tornado_callback&delay={}&".format(delay) 50 | 51 | _ioloop = ioloop.IOLoop.instance() 52 | _ioloop.add_callback(run_experiment, base_url, num_iter, _ioloop.stop) 53 | 54 | start = time.time() 55 | _ioloop.start() 56 | end = time.time() 57 | print((end - start)) 58 | -------------------------------------------------------------------------------- /08_concurrency/cralwer/visualize.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pylab as py 4 | 5 | if __name__ == "__main__": 6 | raw_data = json.load(open("metric_data.json")) 7 | 8 | for name, values in raw_data.items(): 9 | print(name) 10 | py.figure() 11 | py.title("Call timeline for {}".format(name)) 12 | min_t = values[0]["start"] 13 | for i, data in enumerate(values): 14 | py.plot([data["start"] - min_t, data["end"] - min_t], [i, i]) 15 | py.xlabel("Time") 16 | py.ylabel("Request Number") 17 | py.savefig("images/{}.png".format(name)) 18 | -------------------------------------------------------------------------------- /08_concurrency/requirements.txt: -------------------------------------------------------------------------------- 1 | bcrypt 2 | 3 | uvloop 4 | 5 | tornado 6 | pycurl 7 | 8 | aiohttp 9 | aiodns 10 | cchardet 11 | 12 | gevent 13 | 14 | requests 15 | -------------------------------------------------------------------------------- /08_concurrency/workload/images/async_callgraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/async_callgraph.png -------------------------------------------------------------------------------- /08_concurrency/workload/images/workload_async_batches_no-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_async_batches_no-IO.png -------------------------------------------------------------------------------- /08_concurrency/workload/images/workload_async_batches_no-IO_serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_async_batches_no-IO_serial.png -------------------------------------------------------------------------------- /08_concurrency/workload/images/workload_async_no-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_async_no-IO.png -------------------------------------------------------------------------------- /08_concurrency/workload/images/workload_batches_no-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_batches_no-IO.png -------------------------------------------------------------------------------- /08_concurrency/workload/images/workload_file-IO_no-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_file-IO_no-IO.png -------------------------------------------------------------------------------- /08_concurrency/workload/images/workload_no-IO_serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_no-IO_serial.png -------------------------------------------------------------------------------- /08_concurrency/workload/server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from tornado import httpserver, ioloop, options, web 4 | 5 | options.define("port", default=8080, help="Port to serve on") 6 | response = '{{"result": "{result}", "t": 0.5}}' 7 | 8 | 9 | class Addresult(web.RequestHandler): 10 | async def post(self): 11 | result = self.request.body 12 | await asyncio.sleep(0.1) 13 | self.write(response.format(result=result)) 14 | self.set_header("Content-Type", "application/json") 15 | self.finish() 16 | 17 | 18 | if __name__ == "__main__": 19 | options.parse_command_line() 20 | port = options.options.port 21 | 22 | application = web.Application([(r"/add", Addresult)]) 23 | 24 | http_server = httpserver.HTTPServer(application) 25 | http_server.listen(port) 26 | print(("Listening on port: {}".format(port))) 27 | ioloop.IOLoop.instance().start() 28 | -------------------------------------------------------------------------------- /08_concurrency/workload/workloads.json: -------------------------------------------------------------------------------- 1 | {"async": [[600, 8, 9.769921675004298], [400, 10, 25.225514382997062], [400, 11, 50.13304948201403], [400, 12, 99.9148739350203]], "serial": [[600, 8, 71.0296336459869], [400, 10, 66.04494852598873], [400, 11, 90.86424219899345], [400, 12, 140.64069702799316]], "no IO": [[600, 8, 9.369766311021522], [400, 10, 24.93677038900205], [400, 11, 49.76722630299628], [400, 12, 99.49409197299974]], "file IO": [[600, 8, 9.36683275300311], [400, 10, 24.921224841004005], [400, 11, 49.74984498100821], [400, 12, 99.56621096501476]], "batches": [[600, 8, 10.217370290978579], [400, 10, 25.483805308002047], [400, 11, 50.374549086991465], [400, 12, 100.2910156819853]], "async+uvloop": [[600, 8, 9.759301794023486], [400, 10, 25.212604185013333], [400, 11, 50.07677851500921], [400, 12, 99.95755646598991]]} -------------------------------------------------------------------------------- /09_multiprocessing/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/09_multiprocessing/.notempty -------------------------------------------------------------------------------- /09_multiprocessing/locking/ex1_lock.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | import fasteners 4 | # python -m timeit -s "import ex1_lock" "ex1_lock.run_workers()" 5 | # 400ms 6 | 7 | 8 | MAX_COUNT_PER_PROCESS = 1000 9 | FILENAME = "count.txt" 10 | 11 | 12 | def work_smaller_chunks(filename, max_count): 13 | @fasteners.interprocess_locked('/tmp/tmp_lock') 14 | def work_write(filename): 15 | f = open(filename, "r") 16 | try: 17 | nbr = int(f.read()) 18 | except ValueError as err: 19 | print("File is empty, starting to count from 0, error: " + str(err)) 20 | nbr = 0 21 | #print(os.getpid()) 22 | f = open(filename, "w") 23 | f.write(str(nbr + 1) + '\n') 24 | f.close() 25 | 26 | for n in range(max_count): 27 | work_write(filename) 28 | 29 | @fasteners.interprocess_locked('/tmp/tmp_lock') 30 | def work(filename, max_count): 31 | for n in range(max_count): 32 | f = open(filename, "r") 33 | try: 34 | nbr = int(f.read()) 35 | except ValueError as err: 36 | print("File is empty, starting to count from 0, error: " + str(err)) 37 | nbr = 0 38 | f = open(filename, "w") 39 | f.write(str(nbr + 1) + '\n') 40 | f.close() 41 | 42 | 43 | def run_workers(): 44 | NBR_PROCESSES = 4 45 | total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS 46 | print("Starting {} process(es) to count to {}".format(NBR_PROCESSES, total_expected_count)) 47 | # reset counter 48 | f = open(FILENAME, "w") 49 | f.close() 50 | 51 | processes = [] 52 | for process_nbr in range(NBR_PROCESSES): 53 | p = multiprocessing.Process(target=work, args=(FILENAME, MAX_COUNT_PER_PROCESS)) 54 | p.start() 55 | processes.append(p) 56 | 57 | for p in processes: 58 | p.join() 59 | 60 | print("Expecting to see a count of {}".format(total_expected_count)) 61 | print("{} contains:".format(FILENAME)) 62 | os.system('more ' + FILENAME) 63 | 64 | 65 | if __name__ == "__main__": 66 | run_workers() 67 | -------------------------------------------------------------------------------- /09_multiprocessing/locking/ex1_nolock1.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | # python -m timeit -s "import ex1_nolock" "ex1_nolock.run_workers()" 4 | # 71ms 5 | 6 | MAX_COUNT_PER_PROCESS = 1000 7 | FILENAME = "count.txt" 8 | 9 | 10 | def work(filename, max_count): 11 | for n in range(max_count): 12 | f = open(filename, "r") 13 | try: 14 | nbr = int(f.read()) 15 | except ValueError as err: 16 | print("File is empty, starting to count from 0, error: " + str(err)) 17 | nbr = 0 18 | f = open(filename, "w") 19 | f.write(str(nbr + 1) + '\n') 20 | f.close() 21 | 22 | 23 | def run_workers(): 24 | NBR_PROCESSES = 1 25 | total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS 26 | print("Starting {} process(es) to count to {}".format(NBR_PROCESSES, total_expected_count)) 27 | # reset counter 28 | f = open(FILENAME, "w") 29 | f.close() 30 | 31 | processes = [] 32 | for process_nbr in range(NBR_PROCESSES): 33 | p = multiprocessing.Process(target=work, args=(FILENAME, MAX_COUNT_PER_PROCESS)) 34 | p.start() 35 | processes.append(p) 36 | 37 | for p in processes: 38 | p.join() 39 | 40 | print("Expecting to see a count of {}".format(total_expected_count)) 41 | print("{} contains:".format(FILENAME)) 42 | os.system('more ' + FILENAME) 43 | 44 | 45 | if __name__ == "__main__": 46 | run_workers() 47 | -------------------------------------------------------------------------------- /09_multiprocessing/locking/ex1_nolock4.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | # python -m timeit -s "import ex1_nolock" "ex1_nolock.run_workers()" 4 | # 71ms 5 | 6 | MAX_COUNT_PER_PROCESS = 1000 7 | FILENAME = "count.txt" 8 | 9 | 10 | def work(filename, max_count): 11 | for n in range(max_count): 12 | f = open(filename, "r") 13 | try: 14 | nbr = int(f.read()) 15 | except ValueError as err: 16 | print("File is empty, starting to count from 0, error: " + str(err)) 17 | nbr = 0 18 | f = open(filename, "w") 19 | f.write(str(nbr + 1) + '\n') 20 | f.close() 21 | 22 | 23 | def run_workers(): 24 | NBR_PROCESSES = 4 25 | total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS 26 | print("Starting {} process(es) to count to {}".format(NBR_PROCESSES, total_expected_count)) 27 | # reset counter 28 | f = open(FILENAME, "w") 29 | f.close() 30 | 31 | processes = [] 32 | for process_nbr in range(NBR_PROCESSES): 33 | p = multiprocessing.Process(target=work, args=(FILENAME, MAX_COUNT_PER_PROCESS)) 34 | p.start() 35 | processes.append(p) 36 | 37 | for p in processes: 38 | p.join() 39 | 40 | print("Expecting to see a count of {}".format(total_expected_count)) 41 | print("{} contains:".format(FILENAME)) 42 | os.system('more ' + FILENAME) 43 | 44 | 45 | if __name__ == "__main__": 46 | run_workers() 47 | -------------------------------------------------------------------------------- /09_multiprocessing/locking/ex2_lock.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | # python -m timeit -s "import ex2_lock" "ex2_lock.run_workers()" 3 | # 19ms using lock.acquire 4 | # 21ms using with.lock 5 | 6 | 7 | def work(value, max_count, lock): 8 | for n in range(max_count): 9 | with lock: 10 | value.value += 1 11 | #lock.acquire() 12 | #value.value += 1 13 | #lock.release() 14 | 15 | 16 | def run_workers(): 17 | NBR_PROCESSES = 4 18 | MAX_COUNT_PER_PROCESS = 1000 19 | total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS 20 | processes = [] 21 | lock = multiprocessing.Lock() 22 | value = multiprocessing.Value('i', 0) 23 | for process_nbr in range(NBR_PROCESSES): 24 | p = multiprocessing.Process(target=work, args=(value, MAX_COUNT_PER_PROCESS, lock)) 25 | p.start() 26 | processes.append(p) 27 | 28 | # wait for the processes to finish 29 | for p in processes: 30 | p.join() 31 | 32 | # print the final value 33 | print("Expecting to see a count of {}".format(total_expected_count)) 34 | print("We have counted to {}".format(value.value)) 35 | 36 | 37 | if __name__ == "__main__": 38 | run_workers() 39 | -------------------------------------------------------------------------------- /09_multiprocessing/locking/ex2_lock_rawvalue.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | # python -m timeit -s "import ex2_lock_rawvalue" "ex2_lock_rawvalue.run_workers()" 3 | # 18.5ms (slightly faster?) 4 | # 5ms lock.acquire 5 | # 11ms with lock 6 | 7 | 8 | def work(value, max_count, lock): 9 | for n in range(max_count): 10 | with lock: 11 | value.value += 1 12 | #lock.acquire() 13 | #value.value += 1 14 | #lock.release() 15 | 16 | 17 | def run_workers(): 18 | NBR_PROCESSES = 4 19 | MAX_COUNT_PER_PROCESS = 1000 20 | total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS 21 | processes = [] 22 | lock = multiprocessing.Lock() 23 | value = multiprocessing.RawValue('i', 0) 24 | for process_nbr in range(NBR_PROCESSES): 25 | p = multiprocessing.Process(target=work, args=(value, MAX_COUNT_PER_PROCESS, lock)) 26 | p.start() 27 | processes.append(p) 28 | 29 | # wait for the processes to finish 30 | for p in processes: 31 | p.join() 32 | 33 | # print the final value 34 | print("Expecting to see a count of {}".format(total_expected_count)) 35 | print("We have counted to {}".format(value.value)) 36 | 37 | 38 | if __name__ == "__main__": 39 | run_workers() 40 | -------------------------------------------------------------------------------- /09_multiprocessing/locking/ex2_nolock.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | # python -m timeit -s "import ex2_nolock" "ex2_nolock.run_workers()" 3 | # 12ms 4 | 5 | 6 | def work(value, max_count): 7 | for n in range(max_count): 8 | value.value += 1 9 | 10 | 11 | def run_workers(): 12 | NBR_PROCESSES = 4 13 | MAX_COUNT_PER_PROCESS = 1000 14 | total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS 15 | processes = [] 16 | value = multiprocessing.Value('i', 0) 17 | for process_nbr in range(NBR_PROCESSES): 18 | p = multiprocessing.Process(target=work, args=(value, MAX_COUNT_PER_PROCESS)) 19 | p.start() 20 | processes.append(p) 21 | 22 | # wait for the processes to finish 23 | for p in processes: 24 | p.join() 25 | 26 | # print the final value 27 | print("Expecting to see a count of {}".format(total_expected_count)) 28 | print("We have counted to {}".format(value.value)) 29 | 30 | if __name__ == "__main__": 31 | run_workers() 32 | -------------------------------------------------------------------------------- /09_multiprocessing/locking/ex3_redis.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import redis 3 | # python -m timeit -s "import ex3_redis" "ex3_redis.run_workers()" 4 | # 81 ms 5 | 6 | 7 | rds = redis.StrictRedis() 8 | REDIS_KEY = "ex3_redis_key" 9 | 10 | 11 | def work(max_count): 12 | for n in range(max_count): 13 | rds.incr(REDIS_KEY) 14 | 15 | 16 | def run_workers(): 17 | NBR_PROCESSES = 4 18 | MAX_COUNT_PER_PROCESS = 1000 19 | total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS 20 | 21 | rds[REDIS_KEY] = 0 22 | 23 | processes = [] 24 | for process_nbr in range(NBR_PROCESSES): 25 | p = multiprocessing.Process(target=work, args=(MAX_COUNT_PER_PROCESS,)) 26 | p.start() 27 | processes.append(p) 28 | 29 | for p in processes: 30 | p.join() 31 | 32 | # print the final value 33 | print("Expecting to see a count of {}".format(total_expected_count)) 34 | print("We have counted to {}".format(rds[REDIS_KEY])) 35 | 36 | 37 | if __name__ == "__main__": 38 | run_workers() 39 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_lists_parallel/pi_graph_speed_tests.py: -------------------------------------------------------------------------------- 1 | """Graph execution time for serial, threaded and processes forms of Pi estimation with lists""" 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # timings generated using 6 | # pi_lists_series, pi_lists_parallel 1 2 4 8, pi_lists_parallel --processes 1 2 4 8 7 | speeds = np.array([[71.1], 8 | [71.1, 71.0, 70.7, 71.0], 9 | [71.0, 37.1, 18.1, 18.7]]) 10 | 11 | nbr_cores = np.array([[1], 12 | [1, 2, 4, 8], 13 | [1, 2, 4, 8]]) 14 | 15 | labels = np.array(["Serial", "Threads", "Processes"]) 16 | 17 | plt.figure(1) 18 | plt.clf() 19 | markers = ['-.o', '--x', '-x'] 20 | for nc, sp, label, mk in zip(nbr_cores, speeds, labels, markers): 21 | plt.plot(nc, sp, mk, label=label, linewidth=2) 22 | plt.annotate("Serial and Threads have similar execution time", (nbr_cores[0][0]+0.2, speeds[0][0]+0.9) ) 23 | plt.legend(loc="lower left", framealpha=0.8) 24 | plt.ylim(0, 80) 25 | plt.xlim(0.5, 8.5) 26 | plt.ylabel("Execution time (seconds) - smaller is better") 27 | plt.xlabel("Number of workers") 28 | plt.title("Time to estimate Pi using objects with 100,000,000\ndart throws in series, threaded and with processes") 29 | #plt.grid() 30 | #plt.show() 31 | plt.tight_layout() 32 | plt.savefig("09_pi_lists_graph_speed_tests_threaded_processes.png") 33 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_lists_parallel/pi_lists_parallel_joblib.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | import time 4 | import argparse 5 | from joblib import Parallel, delayed 6 | from pi_lists_parallel import estimate_nbr_points_in_quarter_circle 7 | 8 | 9 | if __name__ == "__main__": 10 | nbr_samples_in_total = int(1e8) 11 | nbr_parallel_blocks = 8 12 | 13 | nbr_samples_per_worker = int(nbr_samples_in_total / nbr_parallel_blocks) 14 | print("Making {:,} samples per {} worker".format(nbr_samples_per_worker, nbr_parallel_blocks)) 15 | t1 = time.time() 16 | nbr_in_quarter_unit_circles = Parallel(n_jobs=nbr_parallel_blocks, verbose=1)(delayed(estimate_nbr_points_in_quarter_circle)(nbr_samples_per_worker) for sample_idx in range(nbr_parallel_blocks)) 17 | pi_estimate = sum(nbr_in_quarter_unit_circles) * 4 / float(nbr_samples_in_total) 18 | print("Estimated pi", pi_estimate) 19 | print("Delta:", time.time() - t1) 20 | 21 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_lists_parallel/pi_lists_parallel_joblib_cache.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | import time 4 | import argparse 5 | from joblib import Parallel, delayed 6 | from joblib import Memory 7 | from pi_lists_parallel import estimate_nbr_points_in_quarter_circle as estimate_nbr_points_in_quarter_circle_orig 8 | 9 | memory = Memory("./joblib_cache", verbose=0) 10 | 11 | @memory.cache 12 | def estimate_nbr_points_in_quarter_circle_with_idx(nbr_estimates, idx): 13 | print(f"Executing estimate_nbr_points_in_quarter_circle with {nbr_estimates} on sample {idx} on pid {os.getpid()}") 14 | nbr_trials_in_quarter_unit_circle = 0 15 | for step in range(int(nbr_estimates)): 16 | x = random.uniform(0, 1) 17 | y = random.uniform(0, 1) 18 | is_in_unit_circle = x * x + y * y <= 1.0 19 | nbr_trials_in_quarter_unit_circle += is_in_unit_circle 20 | 21 | return nbr_trials_in_quarter_unit_circle 22 | 23 | 24 | estimate_nbr_points_in_quarter_circle = memory.cache(estimate_nbr_points_in_quarter_circle_orig) 25 | 26 | if __name__ == "__main__": 27 | nbr_samples_in_total = int(1e8) 28 | nbr_parallel_blocks = 8 29 | 30 | nbr_samples_per_worker = int(nbr_samples_in_total / nbr_parallel_blocks) 31 | print("Making {:,} samples per {} worker".format(nbr_samples_per_worker, nbr_parallel_blocks)) 32 | t1 = time.time() 33 | # beware if you don't have a sample_idx, you cache the same result! 34 | nbr_in_quarter_unit_circles = Parallel(n_jobs=nbr_parallel_blocks)(delayed(estimate_nbr_points_in_quarter_circle_with_idx)(nbr_samples_per_worker, idx) for idx in range(nbr_parallel_blocks)) 35 | #nbr_in_quarter_unit_circles = Parallel(n_jobs=nbr_parallel_blocks)(delayed(estimate_nbr_points_in_quarter_circle)(nbr_samples_per_worker) for idx in range(nbr_parallel_blocks)) 36 | pi_estimate = sum(nbr_in_quarter_unit_circles) * 4 / float(nbr_samples_in_total) 37 | print("Estimated pi", pi_estimate) 38 | print("Delta:", time.time() - t1) 39 | 40 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_lists_parallel/pi_lists_series.py: -------------------------------------------------------------------------------- 1 | """Estimate Pi using 1 large array""" 2 | import time 3 | import numpy as np 4 | import pi_lists_parallel 5 | 6 | nbr_samples_in_total = int(1e8) 7 | 8 | t1 = time.time() 9 | nbr_in_circle = pi_lists_parallel.estimate_nbr_points_in_quarter_circle(nbr_samples_in_total) 10 | print("Took {0:2f}s".format(time.time() - t1)) 11 | pi_estimate = float(nbr_in_circle) / nbr_samples_in_total * 4 12 | print("Estimated pi", pi_estimate) 13 | print("Pi", np.pi) 14 | 15 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_monte_carlo_diagram/pi_plot_monte_carlo_example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | if __name__ == "__main__": 6 | np.random.seed(0) # force repeatable plt 7 | nbr_items = int(1E4) 8 | xs = np.random.uniform(0, 1, nbr_items) 9 | ys = np.random.uniform(0, 1, nbr_items) 10 | estimate_inside_quarter_unit_circle = (xs * xs + ys * ys) <= 1 11 | nbr_trials_in_quarter_unit_circle = np.sum(estimate_inside_quarter_unit_circle) 12 | pi = (nbr_trials_in_quarter_unit_circle * 4) / nbr_items # estimate for the full circle 13 | 14 | plt.figure(1, figsize=(8, 8)) 15 | plt.clf() 16 | plt.plot(xs[estimate_inside_quarter_unit_circle], ys[estimate_inside_quarter_unit_circle], 'bx') 17 | plt.plot(xs[estimate_inside_quarter_unit_circle == False], ys[estimate_inside_quarter_unit_circle == False], 'g.') 18 | 19 | unit_circle_xs = np.arange(0, 1, 0.001) 20 | unit_circle_ys = np.sin(np.arccos(unit_circle_xs)) 21 | plt.plot(unit_circle_xs, unit_circle_ys, linewidth=2, c="k") 22 | plt.xticks([0.0, 1.0]) 23 | plt.yticks([0.0, 1.0]) 24 | plt.title("Pi estimated as {} using \n{:,} Monte Carlo dart throws".format(pi, int(nbr_items))) 25 | #plt.show() 26 | plt.tight_layout() 27 | plt.savefig("09_pi_plot_monte_carlo_example.png") 28 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_processes_parallel/pi_graph_speed_tests.py: -------------------------------------------------------------------------------- 1 | """Graph execution time for serial, threaded and processes forms of Pi estimation with numpy""" 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # timings generated using 6 | # pi_numpy_serial_blocks.py 7 | # (serial.py - same as serial blocks but for 1 large array only) 8 | # pi_numpy_parallel_worker.py 9 | speeds = np.array([[2.46], 10 | [2.46, 2.19, 2.13, 2.05], 11 | [2.46, 1.61, 0.88, 0.85]]) 12 | 13 | nbr_cores = np.array([[1], 14 | [1, 2, 4, 8], 15 | [1, 2, 4, 8]]) 16 | 17 | labels = np.array(["Serial", "Threads", "Processes"]) 18 | 19 | plt.figure(1) 20 | plt.clf() 21 | markers = ['-.x', '--x', '-x'] 22 | for nc, sp, label, mk in zip(nbr_cores, speeds, labels, markers): 23 | plt.plot(nc, sp, mk, label=label, linewidth=2) 24 | plt.legend(loc="lower left", framealpha=0.8) 25 | plt.ylim(0, 3) 26 | plt.xlim(0.5, 8.5) 27 | plt.ylabel("Execution time (seconds) - smaller is better") 28 | plt.xlabel("Number of workers") 29 | plt.title("Time to estimate Pi using numpy with 100,000,000\ndart throws in series, threaded and with processes") 30 | #plt.grid() 31 | #plt.show() 32 | plt.tight_layout() 33 | plt.savefig("09_pi_numpy_graph_speed_tests_threaded_processes.png") 34 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_processes_parallel/pi_numpy_serial.py: -------------------------------------------------------------------------------- 1 | """Estimate Pi using 1 large array""" 2 | import time 3 | import numpy as np 4 | import pi_numpy_parallel_worker 5 | 6 | nbr_samples_in_total = int(1e8) 7 | 8 | t1 = time.time() 9 | nbr_in_circle = pi_numpy_parallel_worker.estimate_nbr_points_in_quarter_circle(nbr_samples_in_total) 10 | print("Took {}s".format(time.time() - t1)) 11 | pi_estimate = float(nbr_in_circle) / nbr_samples_in_total * 4 12 | print("Estimated pi", pi_estimate) 13 | print("Pi", np.pi) 14 | -------------------------------------------------------------------------------- /09_multiprocessing/pi_estimation/pi_processes_parallel/pi_numpy_serial_blocks.py: -------------------------------------------------------------------------------- 1 | """Estimate Pi using blocks of serial work on 1 CPU""" 2 | import time 3 | #from multiprocessing.dummy import Pool 4 | import numpy as np 5 | 6 | 7 | def estimate_nbr_points_in_circle(nbr_samples): 8 | # set random seed for numpy in each new process 9 | # else the fork will mean they all share the same state 10 | np.random.seed() 11 | xs = np.random.uniform(0, 1, nbr_samples) 12 | ys = np.random.uniform(0, 1, nbr_samples) 13 | estimate_inside_quarter_unit_circle = (xs * xs + ys * ys) <= 1 14 | nbr_trials_in_quarter_unit_circle = np.sum(estimate_inside_quarter_unit_circle) 15 | return nbr_trials_in_quarter_unit_circle 16 | 17 | 18 | if __name__ == "__main__": 19 | nbr_samples_in_total = (1e8) 20 | 21 | nbr_parallel_blocks = 4 22 | nbr_samples_per_worker = int(nbr_samples_in_total / nbr_parallel_blocks) 23 | print("Making {} samples per worker".format(nbr_samples_per_worker)) 24 | 25 | t1 = time.time() 26 | nbr_in_circle = 0 27 | for npb in range(nbr_parallel_blocks): 28 | nbr_in_circle += estimate_nbr_points_in_circle(nbr_samples_per_worker) 29 | print("Took {}s".format(time.time() - t1)) 30 | pi_estimate = float(nbr_in_circle) / nbr_samples_in_total * 4 31 | print("Estimated pi", pi_estimate) 32 | print("Pi", np.pi) 33 | -------------------------------------------------------------------------------- /09_multiprocessing/prime_generation/plot_serial_vs_queue_times.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | 3 | 4 | plt.figure() 5 | 6 | # primes_queue.py on job C 7 | xs = [1, 2, 4, 8] 8 | ys = [97, 97, 109, 111] 9 | #plt.scatter(xs, ys, marker='x') 10 | plt.plot(xs, ys, '--x', label="Using Queues") 11 | plt.annotate("1 child process via Queues", (xs[0], ys[0]-5)) 12 | 13 | # primes_queue_less_work - not sure there's any point showing this? 14 | #xs = [1, 2, 4, 8] 15 | #ys = [57, 36, 48, 49] 16 | #plt.scatter(xs, ys, marker='v') 17 | 18 | xs = [1] 19 | ys = [24] 20 | #plt.scatter(xs, ys, marker='o') 21 | plt.plot(xs, ys, '-o', label="No queue") 22 | plt.annotate("No queue", (xs[0], ys[0])) 23 | plt.xlim(0.5, 8.5) 24 | plt.ylim(0, 120) 25 | 26 | plt.title("The overhead of Queues on lightweight tasks") 27 | plt.ylabel("Seconds (smaller is better)") 28 | plt.xlabel("Number of processes") 29 | plt.legend(loc="center right") 30 | 31 | plt.draw() 32 | plt.tight_layout() 33 | plt.savefig("multiprocessing_serial_vs_queue_times.png") 34 | -------------------------------------------------------------------------------- /09_multiprocessing/prime_generation/primes.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | 4 | 5 | def check_prime(n): 6 | if n % 2 == 0: 7 | return False 8 | for i in range(3, int(math.sqrt(n)) + 1, 2): 9 | if n % i == 0: 10 | return False 11 | return True 12 | 13 | 14 | if __name__ == "__main__": 15 | primes = [] 16 | t1 = time.time() 17 | #number_range = xrange(100000000, 100010000) # A 18 | #number_range = xrange(100000000, 100100000) # B 19 | number_range = range(100000000, 101000000) # C 20 | #number_range = xrange(1000000000, 1000100000) # D 21 | #number_range = xrange(100000000000, 100000100000) # E 22 | 23 | for possible_prime in number_range: 24 | if check_prime(possible_prime): 25 | primes.append(possible_prime) 26 | 27 | print("Took:", time.time() - t1) 28 | print(len(primes), primes[:10], primes[-10:]) 29 | -------------------------------------------------------------------------------- /09_multiprocessing/prime_generation/primes_pool.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | import multiprocessing 4 | #import numpy as np 5 | import itertools 6 | 7 | 8 | def check_prime(n): 9 | if n % 2 == 0: 10 | return False 11 | for i in xrange(3, int(math.sqrt(n)) + 1, 2): 12 | if n % i == 0: 13 | return False 14 | return True 15 | 16 | 17 | if __name__ == "__main__": 18 | primes = [] 19 | NBR_PROCESSES = 4 20 | pool = multiprocessing.Pool(processes=NBR_PROCESSES) 21 | 22 | t1 = time.time() 23 | #number_range = xrange(100000000, 100010000) # A 24 | #number_range = xrange(100000000, 100100000) # B 25 | number_range = xrange(100000000, 101000000) # C 26 | #number_range = xrange(1000000000, 1000100000) # D 27 | #number_range = xrange(100000000000, 100000100000) # E 28 | 29 | #are_primes = pool.map(check_prime, number_range) # original 30 | #primes = np.array(number_range)[np.array(are_primes)] # original 31 | # 32 | # note using pool.map is fastest, but uses ram 33 | # using pool.imap is slower but uses less ram 34 | # pool.imap_unordered is even slower 35 | are_primes = pool.map(check_prime, number_range) 36 | primes = [p for p in itertools.compress(number_range, are_primes)] 37 | 38 | print "Took:", time.time() - t1 39 | print len(primes), primes[:10], primes[-10:] 40 | -------------------------------------------------------------------------------- /09_multiprocessing/prime_validation/primes.py: -------------------------------------------------------------------------------- 1 | import math 2 | import timeit 3 | 4 | 5 | def check_prime(n): 6 | if n % 2 == 0: 7 | return False 8 | from_i = 3 9 | to_i = math.sqrt(n) + 1 10 | for i in range(from_i, int(to_i), 2): 11 | if n % i == 0: 12 | return False 13 | return True 14 | 15 | 16 | if __name__ == "__main__": 17 | for label, nbr in [("trivial non-prime", 112272535095295), 18 | ("expensive non-prime18_1", 100109100129100369), 19 | ("expensive non-prime18_2", 100109100129101027), 20 | #("prime", 112272535095293)]: # 15 21 | #("prime17", 10000000002065383)] 22 | ("prime18_1", 100109100129100151), 23 | ("prime18_2", 100109100129162907)]: 24 | #("prime23", 22360679774997896964091)]: 25 | 26 | time_costs = timeit.repeat(stmt="check_prime({})".format(nbr), repeat=20, number=1, 27 | setup="from __main__ import check_prime") 28 | print("{:24} ({}) {: 3.6f}s".format(label, nbr, min(time_costs))) 29 | -------------------------------------------------------------------------------- /09_multiprocessing/prime_validation/primes_factor_test.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | 4 | 5 | def check_prime(n): 6 | if n % 2 == 0: 7 | return False, 2 8 | for i in xrange(3, int(math.sqrt(n)) + 1): 9 | if n % i == 0: 10 | return False, i 11 | return True, None 12 | 13 | 14 | if __name__ == "__main__": 15 | primes = [] 16 | t1 = time.time() 17 | 18 | # 100109100129100151 big prime 19 | # http://primes.utm.edu/curios/page.php/100109100129100151.html 20 | #number_range = xrange(100109100129100153, 100109100129101238, 2) 21 | number_range = xrange(100109100129101237, 100109100129201238, 2) 22 | 23 | # new expensive near-primes 24 | # [(95362951, (100109100129100369, 7.254560947418213)) 25 | # (171656941, (100109100129101027, 13.052711009979248)) 26 | # (121344023, (100109100129101291, 8.994053840637207) 27 | # note these two lines of timings look really wrong, they're about 4sec 28 | # each really 29 | # [(265687139, (100109100129102047, 19.642582178115845)), (219609683, (100109100129102277, 16.178056001663208)), (121344023, (100109100129101291, 8.994053840637207))] 30 | # [(316096873, (100109100129126653, 23.480671882629395)), (313994287, (100109100129111617, 23.262380123138428)), (307151363, (100109100129140177, 22.80288815498352))] 31 | # primes 32 | # 100109100129162907 33 | # 100109100129162947 34 | 35 | highest_factors = {} 36 | for possible_prime in number_range: 37 | t2 = time.time() 38 | is_prime, factor = check_prime(possible_prime) 39 | if is_prime: 40 | primes.append(possible_prime) 41 | print "GOT NEW PRIME", possible_prime 42 | else: 43 | highest_factors[factor] = (possible_prime, time.time() - t2) 44 | hf = highest_factors.items() 45 | hf.sort(reverse=True) 46 | print hf[:3] 47 | print "Took:", time.time() - t1 48 | print len(primes), primes[:10], primes[-10:] 49 | -------------------------------------------------------------------------------- /09_multiprocessing/prime_validation/primes_pool_per_number1.py: -------------------------------------------------------------------------------- 1 | """Check primality by splitting the list of factors""" 2 | import math 3 | import timeit 4 | from multiprocessing import Pool 5 | import create_range 6 | 7 | 8 | def check_prime_in_range(n_from_i_to_i): 9 | (n, (from_i, to_i)) = n_from_i_to_i 10 | if n % 2 == 0: 11 | return False 12 | assert from_i % 2 != 0 13 | for i in range(from_i, int(to_i), 2): 14 | if n % i == 0: 15 | return False 16 | return True 17 | 18 | 19 | def check_prime(n, pool, nbr_processes): 20 | from_i = 3 21 | to_i = int(math.sqrt(n)) + 1 22 | ranges_to_check = create_range.create(from_i, to_i, nbr_processes) 23 | ranges_to_check = list(zip(len(ranges_to_check) * [n], ranges_to_check)) 24 | assert len(ranges_to_check) == nbr_processes 25 | results = pool.map(check_prime_in_range, ranges_to_check) 26 | if False in results: 27 | return False 28 | return True 29 | 30 | 31 | if __name__ == "__main__": 32 | NBR_PROCESSES = 4 33 | pool = Pool(processes=NBR_PROCESSES) 34 | #import pdb; pdb.set_trace() 35 | print("Testing with {} processes".format(NBR_PROCESSES)) 36 | for label, nbr in [("trivial non-prime", 112272535095295), 37 | ("expensive non-prime18_1", 100109100129100369), 38 | ("expensive non-prime18_2", 100109100129101027), 39 | #("prime", 112272535095293)]: # 15 40 | #("prime17", 10000000002065383)] 41 | ("prime18_1", 100109100129100151), 42 | ("prime18_2", 100109100129162907)]: 43 | #("prime23", 22360679774997896964091)]: 44 | time_costs = timeit.repeat(stmt="check_prime({}, pool, {})".format(nbr, NBR_PROCESSES), repeat=20, number=1, 45 | setup="from __main__ import pool, check_prime") 46 | print("{:19} ({}) {: 3.6f}s".format(label, nbr, min(time_costs))) 47 | -------------------------------------------------------------------------------- /09_multiprocessing/prime_validation/primes_pool_per_number2.py: -------------------------------------------------------------------------------- 1 | """Check primality by splitting the list of factors with early prime check""" 2 | import math 3 | import timeit 4 | from multiprocessing import Pool 5 | import create_range 6 | 7 | 8 | def check_prime_in_range(n_from_i_to_i): 9 | (n, (from_i, to_i)) = n_from_i_to_i 10 | if n % 2 == 0: 11 | return False 12 | assert from_i % 2 != 0 13 | for i in range(from_i, int(to_i), 2): 14 | if n % i == 0: 15 | return False 16 | return True 17 | 18 | 19 | def check_prime(n, pool, nbr_processes): 20 | # cheaply check high probability set of possible factors 21 | from_i = 3 22 | to_i = 21 23 | if not check_prime_in_range((n, (from_i, to_i))): 24 | return False 25 | 26 | from_i = to_i 27 | to_i = int(math.sqrt(n)) + 1 28 | ranges_to_check = create_range.create(from_i, to_i, nbr_processes) 29 | ranges_to_check = list(zip(len(ranges_to_check) * [n], ranges_to_check)) 30 | assert len(ranges_to_check) == nbr_processes 31 | results = pool.map(check_prime_in_range, ranges_to_check) 32 | if False in results: 33 | return False 34 | return True 35 | 36 | 37 | if __name__ == "__main__": 38 | NBR_PROCESSES = 4 39 | pool = Pool(processes=NBR_PROCESSES) 40 | print("Testing with {} processes".format(NBR_PROCESSES)) 41 | for label, nbr in [("trivial non-prime", 112272535095295), 42 | ("expensive non-prime18_1", 100109100129100369), 43 | ("expensive non-prime18_2", 100109100129101027), 44 | #("prime", 112272535095293)]: # 15 45 | #("prime17", 10000000002065383)] 46 | ("prime18_1", 100109100129100151), 47 | ("prime18_2", 100109100129162907)]: 48 | #("prime23", 22360679774997896964091)]: 49 | 50 | time_costs = timeit.repeat(stmt="check_prime({}, pool, {})".format(nbr, NBR_PROCESSES), repeat=20, number=1, 51 | setup="from __main__ import pool, check_prime") 52 | print("{:24} ({}) {: 3.6f}s".format(label, nbr, min(time_costs))) 53 | -------------------------------------------------------------------------------- /10_clusters/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/10_clusters/.notempty -------------------------------------------------------------------------------- /10_clusters/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7 2 | 3 | WORKDIR /usr/src/app 4 | COPY requirements.txt ./ 5 | RUN pip install --no-cache-dir -r requirements.txt 6 | 7 | COPY . . 8 | CMD python ./diffusion_numpy_memory2.py 9 | -------------------------------------------------------------------------------- /10_clusters/docker/Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | docker build -t ${USER}/diffusion2d:numpy-memory2 -t micha/diffusion2d:latest . 3 | 4 | run: build 5 | docker run ${USER}/diffusion2d:latest 6 | -------------------------------------------------------------------------------- /10_clusters/docker/diffusion_numpy_memory2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | import timeit 5 | 6 | from numpy import add, copyto, multiply, zeros 7 | 8 | try: 9 | profile 10 | except NameError: 11 | profile = lambda x: x 12 | 13 | grid_shape = (256, 256) 14 | 15 | 16 | def roll_add(rollee, shift, axis, out): 17 | if shift == 1 and axis == 0: 18 | out[1:, :] += rollee[:-1, :] 19 | out[0, :] += rollee[-1, :] 20 | elif shift == -1 and axis == 0: 21 | out[:-1, :] += rollee[1:, :] 22 | out[-1, :] += rollee[0, :] 23 | elif shift == 1 and axis == 1: 24 | out[:, 1:] += rollee[:, :-1] 25 | out[:, 0] += rollee[:, -1] 26 | elif shift == -1 and axis == 1: 27 | out[:, :-1] += rollee[:, 1:] 28 | out[:, -1] += rollee[:, 0] 29 | 30 | 31 | def laplacian(grid, out): 32 | copyto(out, grid) 33 | multiply(out, -4.0, out) 34 | roll_add(grid, +1, 0, out) 35 | roll_add(grid, -1, 0, out) 36 | roll_add(grid, +1, 1, out) 37 | roll_add(grid, -1, 1, out) 38 | 39 | 40 | @profile 41 | def evolve(grid, dt, out, D=1): 42 | laplacian(grid, out) 43 | multiply(out, D * dt, out) 44 | add(out, grid, out) 45 | 46 | 47 | def run_experiment(num_iterations): 48 | scratch = zeros(grid_shape) 49 | grid = zeros(grid_shape) 50 | 51 | block_low = int(grid_shape[0] * 0.4) 52 | block_high = int(grid_shape[0] * 0.5) 53 | grid[block_low:block_high, block_low:block_high] = 0.005 54 | 55 | start = time.time() 56 | for i in range(num_iterations): 57 | evolve(grid, 0.1, scratch) 58 | grid, scratch = scratch, grid 59 | return time.time() - start 60 | 61 | 62 | if __name__ == "__main__": 63 | n_runs = 100 64 | runtime = timeit.timeit(f"run_experiment({n_runs})", number=25, globals=globals()) 65 | print(f"Runtime for {n_runs} with grid {grid_shape}: {runtime:0.4f}s") 66 | -------------------------------------------------------------------------------- /10_clusters/docker/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.18.0 2 | -------------------------------------------------------------------------------- /10_clusters/ipython_parallel/pi_ipython_cluster.py: -------------------------------------------------------------------------------- 1 | import time 2 | import ipyparallel as ipp 3 | from ipyparallel import require 4 | 5 | 6 | @require('random') 7 | def estimate_nbr_points_in_quarter_circle(nbr_estimates): 8 | """Monte carlo estimate of the number of points in a 9 | quarter circle using pure Python""" 10 | print(f"Executing estimate_nbr_points_in_quarter_circlewith {nbr_estimates:,} on pid {os.getpid()}") 11 | nbr_trials_in_quarter_unit_circle = 0 12 | for step in range(int(nbr_estimates)): 13 | x = random.uniform(0, 1) 14 | y = random.uniform(0, 1) 15 | is_in_unit_circle = x * x + y * y <= 1.0 16 | nbr_trials_in_quarter_unit_circle += is_in_unit_circle 17 | return nbr_trials_in_quarter_unit_circle 18 | 19 | 20 | if __name__ == "__main__": 21 | c = ipp.Client() 22 | nbr_engines = len(c.ids) 23 | print("We're using {} engines".format(nbr_engines)) 24 | nbr_samples_in_total = 1e8 25 | nbr_parallel_blocks = 4 26 | 27 | dview = c[:] 28 | 29 | nbr_samples_per_worker = nbr_samples_in_total / nbr_parallel_blocks 30 | t1 = time.time() 31 | nbr_in_quarter_unit_circles = dview.apply_sync(estimate_nbr_points_in_quarter_circle, \ 32 | nbr_samples_per_worker) 33 | print("Estimates made:", nbr_in_quarter_unit_circles) 34 | 35 | nbr_jobs = len(nbr_in_quarter_unit_circles) 36 | pi_estimate = sum(nbr_in_quarter_unit_circles) * 4 / nbr_samples_in_total 37 | print("Estimated pi", pi_estimate) 38 | print("Delta:", time.time() - t1) 39 | -------------------------------------------------------------------------------- /10_clusters/nsq/nsq_worker.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import partial 3 | from math import sqrt 4 | 5 | import nsq 6 | 7 | 8 | def is_prime(number): 9 | if number % 2 == 0: 10 | return False 11 | for i in range(3, int(sqrt(number)) + 1, 2): 12 | if number % i == 0: 13 | return False 14 | return True 15 | 16 | 17 | def write_message(topic, data, writer): 18 | response = writer.pub(topic, data) 19 | if isinstance(response, nsq.Error): 20 | print("Error with Message: {}: {}".format(data, response)) 21 | return write_message(data, writer) 22 | else: 23 | print("Published Message: ", data) 24 | 25 | 26 | def calculate_prime(message, writer): 27 | data = json.loads(message.body) 28 | 29 | prime = is_prime(data["number"]) 30 | data["prime"] = prime 31 | if prime: 32 | topic = "prime" 33 | else: 34 | topic = "non_prime" 35 | 36 | output_message = json.dumps(data).encode("utf8") 37 | write_message(topic, output_message, writer) 38 | message.finish() # <1> 39 | 40 | 41 | if __name__ == "__main__": 42 | writer = nsq.Writer(["127.0.0.1:4150"]) 43 | handler = partial(calculate_prime, writer=writer) 44 | reader = nsq.Reader( 45 | message_handler=handler, 46 | nsqd_tcp_addresses=["127.0.0.1:4150"], 47 | topic="numbers", 48 | channel="worker_group_a", 49 | ) 50 | nsq.run() 51 | -------------------------------------------------------------------------------- /11_less_ram/.notempty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/.notempty -------------------------------------------------------------------------------- /11_less_ram/compressing_text/plot_example.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | 4 | labels = ['list_bisect', 'set', 'Marisa Trie', 'DAWG'] 5 | ram_used = [835, 1260, 37, 73] 6 | build_time = [20, 24, 35, 31] 7 | lookup_time = [0.01, 0.002, 0.01, 0.005] 8 | 9 | #labels = ['list_bisect', 'set', 'Marisa Trie', 'DAWG', 'HAT Trie'] 10 | #ram_used = [920, 1112, 293, 958, 244] 11 | #build_time = [47, 31, 55, 63, 44] 12 | #lookup_time = [0.02, 0.003, 0.01, 0.004, 0.005] 13 | 14 | # make the build-time circles much larger 15 | build_time = [bt * 5 for bt in build_time] 16 | 17 | plt.figure(1) 18 | plt.clf() 19 | plt.scatter(ram_used, lookup_time, s=build_time) 20 | 21 | for ram, lookup, label in zip(ram_used, lookup_time, labels): 22 | plt.annotate(label, (ram+15, lookup+0.0005)) 23 | 24 | plt.xlabel('RAM used (MB - lower is better)') 25 | plt.ylabel("Look-up time (seconds - lower is better)") 26 | plt.title("Container behavior for 11 million tokens\nsize represents build time (smaller is better)") 27 | plt.xlim(xmin=0) 28 | plt.ylim((-0.0005, 0.012)) 29 | plt.tight_layout() 30 | plt.savefig("less_ram_tries_dawg_text_11m_tokens.png") 31 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | 3 | # "Moby Words lists by Grady Ward" 4 | # http://www.gutenberg.org/ebooks/3201 5 | #SUMMARISED_FILE = "all_unique_words.txt" # 500k approx 6 | #CODEC = 'Windows-1252' 7 | 8 | CODEC = 'utf-8' 9 | SUMMARISED_FILE = "all_unique_words_wikipedia_via_gensim.txt" 10 | 11 | 12 | def read_words(filename): 13 | # return words from filename using a generator 14 | try: 15 | with codecs.open(filename, 'r', CODEC) as f: 16 | for line_nbr, line in enumerate(f): 17 | items = line.strip().split() 18 | for item in items: 19 | yield item 20 | except UnicodeDecodeError: 21 | print("UnicodeDecodeError for {} near line {} and word {}".format(filename, line_nbr, line)) 22 | 23 | readers = read_words(SUMMARISED_FILE) 24 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_clean_list.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import codecs 3 | import glob 4 | 5 | # Clean the raw wordlists into a unique 500,000(ish) word single file 6 | 7 | # "Moby Words lists by Grady Ward" 8 | # http://www.gutenberg.org/ebooks/3201 9 | LONG_FILES = glob.glob('mword10/*') 10 | SUMMARISED_FILE = "all_unique_words.txt" 11 | 12 | 13 | def read_words(filename): 14 | # return words from filename using a generator 15 | try: 16 | with codecs.open(filename, 'r', 'Windows-1252') as f: 17 | for line_nbr, line in enumerate(f): 18 | items = line.strip().split() 19 | for item in items: 20 | yield item 21 | except UnicodeDecodeError: 22 | print("UnicodeDecodeError for {} near line {} and word {}".format(filename, line_nbr, line)) 23 | 24 | readers = itertools.chain(*(read_words(lf) for lf in LONG_FILES)) 25 | 26 | if __name__ == "__main__": 27 | words_set = set(readers) 28 | print("Summarising input files into one output set of {} words".format(len(words_set))) 29 | with codecs.open(SUMMARISED_FILE, 'w', 'Windows-1252') as f: 30 | for word in words_set: 31 | f.write(word + "\n") 32 | 33 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_clean_list_wikipedia_gensim.py: -------------------------------------------------------------------------------- 1 | import gensim 2 | FILENAME = "/home/ian/workspace/personal_projects/high_performance_python_book_2e/high-performance-python-2e/examples_ian/ian/12_lessram/compressing_text/wikipedia_dump/enwiki_11M/_wordids.txt.bz2" 3 | 4 | id2word = gensim.corpora.Dictionary.load_from_text(FILENAME) 5 | #print(len([w for w in iter(id2word.values())])) 6 | print(len(id2word)) 7 | 8 | SUMMARISED_FILE = "all_unique_words_wikipedia_via_gensim.txt" 9 | 10 | print("Summarising input files into one output set of {} words".format(len(id2word))) 11 | with open(SUMMARISED_FILE, 'w') as f: 12 | for word in id2word.values(): 13 | f.write(word + "\n") 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_dawg.py: -------------------------------------------------------------------------------- 1 | #pip instal DAWG failed 2 | #https://github.com/pytries/DAWG/issues/31 3 | #$ python text_example_dawg.py 4 | #pip install DAWG-Python 5 | #installs ok, but it is a read-only version of a wrapper to DAWG 6 | #https://pypi.org/project/DAWG-Python/ 7 | 8 | import time 9 | import timeit 10 | import text_example 11 | import memory_profiler 12 | import dawg # 13 | 14 | if __name__ == "__main__": 15 | print(("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))) 16 | # avoid building a temporary list of words in Python, store directly in the 17 | # DAWG 18 | t1 = time.time() 19 | words_dawg = dawg.DAWG(text_example.readers) 20 | t2 = time.time() 21 | print(("RAM after creating dawg {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1))) 22 | 23 | assert 'Zwiebel' in words_dawg 24 | time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_dawg", 25 | setup="from __main__ import words_dawg", 26 | number=1, 27 | repeat=10000)) 28 | print(("Summed time to lookup word {:0.4f}s".format(time_cost))) 29 | 30 | t1 = time.time() 31 | words_dawg.save('words_dawg.saved') 32 | t2 = time.time() 33 | d = dawg.DAWG() 34 | with open('words_dawg.saved', 'rb') as f: 35 | words_dawg2 = d.read(f) 36 | t3 = time.time() 37 | print(t2 - t1, t3-t2) 38 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_dawg_load_only.py: -------------------------------------------------------------------------------- 1 | #pip instal DAWG failed 2 | #https://github.com/pytries/DAWG/issues/31 3 | #$ python text_example_dawg.py 4 | #pip install DAWG-Python 5 | #installs ok, but it is a read-only version of a wrapper to DAWG 6 | #https://pypi.org/project/DAWG-Python/ 7 | 8 | import time 9 | import timeit 10 | import text_example 11 | import memory_profiler 12 | import dawg # 13 | 14 | if __name__ == "__main__": 15 | print(("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))) 16 | t2 = time.time() 17 | words_dawg = dawg.DAWG() 18 | with open('words_dawg.saved', 'rb') as f: 19 | words_dawg.read(f) 20 | t3 = time.time() 21 | print(t3-t2) 22 | print(("RAM after load {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))) 23 | 24 | assert 'Zwiebel' in words_dawg 25 | time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_dawg", 26 | setup="from __main__ import words_dawg", 27 | number=1, 28 | repeat=10000)) 29 | print(("Summed time to lookup word {:0.4f}s".format(time_cost))) 30 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_list.py: -------------------------------------------------------------------------------- 1 | import time 2 | import timeit 3 | import text_example 4 | import memory_profiler 5 | 6 | if __name__ == "__main__": 7 | print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0])) 8 | t1 = time.time() 9 | words = [w for w in text_example.readers] 10 | print("Loading {} words".format(len(words))) 11 | t2 = time.time() 12 | print("RAM after creating list {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1)) 13 | 14 | assert 'Zwiebel' in words 15 | time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words", 16 | setup="from __main__ import words", 17 | number=1, 18 | repeat=100)) 19 | time_cost *= 100 # to make it equivalent to 10_000 calls 20 | print("Summed time to lookup word {:0.4f}s".format(time_cost)) 21 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_list_bisect.py: -------------------------------------------------------------------------------- 1 | import time 2 | import timeit 3 | import text_example 4 | import memory_profiler 5 | import bisect 6 | 7 | 8 | def index(a, x): 9 | 'Locate the leftmost value exactly equal to x' 10 | i = bisect.bisect_left(a, x) 11 | if i != len(a) and a[i] == x: 12 | return i 13 | raise ValueError 14 | 15 | 16 | if __name__ == "__main__": 17 | print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0])) 18 | t1 = time.time() 19 | words = [w for w in text_example.readers] 20 | print("Loading {} words".format(len(words))) 21 | t2 = time.time() 22 | print("RAM after creating list {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1)) 23 | print("The list contains {} words".format(len(words))) 24 | words.sort() 25 | t3 = time.time() 26 | print("Sorting list took {:0.1f}s".format(t3 - t2)) 27 | 28 | assert 'Zwiebel' in words 29 | time_cost = sum(timeit.repeat(stmt="index(words, u'Zwiebel')", 30 | setup="from __main__ import words, index", 31 | number=1, 32 | repeat=10000)) 33 | print("Summed time to lookup word {:0.4f}s".format(time_cost)) 34 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_set.py: -------------------------------------------------------------------------------- 1 | import time 2 | import timeit 3 | import text_example 4 | import memory_profiler 5 | 6 | if __name__ == "__main__": 7 | print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0])) 8 | # load the words directly into the set 9 | t1 = time.time() 10 | words_set = set(text_example.readers) 11 | t2 = time.time() 12 | print("RAM after creating set {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1)) 13 | print("The set contains {} words".format(len(words_set))) 14 | 15 | assert 'Zwiebel' in words_set 16 | time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_set", 17 | setup="from __main__ import words_set", 18 | number=1, 19 | repeat=10000)) 20 | print("Summed time to lookup word {:0.4f}s".format(time_cost)) 21 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_trie.py: -------------------------------------------------------------------------------- 1 | import time 2 | import timeit 3 | import text_example 4 | import memory_profiler 5 | import marisa_trie 6 | 7 | if __name__ == "__main__": 8 | print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0])) 9 | # avoid building a temporary list of words in Python, store directly in the 10 | # Trie 11 | t1 = time.time() 12 | words_trie = marisa_trie.Trie(text_example.readers) 13 | t2 = time.time() 14 | print("RAM after creating trie {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1)) 15 | print("The trie contains {} words".format(len(words_trie))) 16 | 17 | assert 'Zwiebel' in words_trie 18 | time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_trie", 19 | setup="from __main__ import words_trie", 20 | number=1, 21 | repeat=10000)) 22 | print("Summed time to lookup word {:0.4f}s".format(time_cost)) 23 | 24 | t1 = time.time() 25 | words_trie.save('words_trie.saved') 26 | del words_trie 27 | print("RAM before loading from disk {:0.1f}MiB".format(memory_profiler.memory_usage()[0])) 28 | t2 = time.time() 29 | d = marisa_trie.Trie() 30 | with open('words_trie.saved', 'rb') as f: 31 | words_trie2 = d.read(f) 32 | t3 = time.time() 33 | print("RAM after loading trie from disk {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1)) 34 | print("The trie contains {} words".format(len(words_trie2))) 35 | print(f"time to save {t2 - t1:f}s, time to load {t3-t2:f}s") 36 | assert 'Zwiebel' in words_trie2 37 | time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_trie2", 38 | setup="from __main__ import words_trie2", 39 | number=1, 40 | repeat=10000)) 41 | print("Summed time to lookup word {:0.4f}s".format(time_cost)) 42 | 43 | -------------------------------------------------------------------------------- /11_less_ram/compressing_text/text_example_trie_load_only.py: -------------------------------------------------------------------------------- 1 | import time 2 | import timeit 3 | import text_example 4 | import memory_profiler 5 | import marisa_trie 6 | 7 | if __name__ == "__main__": 8 | 9 | print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0])) 10 | print("RAM before loading from disk {:0.1f}MiB".format(memory_profiler.memory_usage()[0])) 11 | t2 = time.time() 12 | d = marisa_trie.Trie() 13 | with open('words_trie.saved', 'rb') as f: 14 | words_trie = d.read(f) 15 | t3 = time.time() 16 | print("RAM after loading trie from disk {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t3 - t2)) 17 | print("The trie contains {} words".format(len(words_trie))) 18 | print(f"time to load {t3-t2:f}s") 19 | assert 'Zwiebel' in words_trie 20 | time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_trie", 21 | setup="from __main__ import words_trie", 22 | number=1, 23 | repeat=10000)) 24 | print("Summed time to lookup word {:0.4f}s".format(time_cost)) 25 | 26 | -------------------------------------------------------------------------------- /11_less_ram/morris_counter_example/morris_counter.py: -------------------------------------------------------------------------------- 1 | """Approximate Morris Counter supporting many counters""" 2 | import math 3 | import random 4 | import array 5 | 6 | SMALLEST_UNSIGNED_INTEGER = 'B' # unsighed char, typically 1 byte 7 | 8 | 9 | class MorrisCounter(object): 10 | """Approximate counter, stores exponent and counts approximately 2^exponent 11 | 12 | https://en.wikipedia.org/wiki/Approximate_counting_algorithm""" 13 | def __init__(self, type_code=SMALLEST_UNSIGNED_INTEGER, nbr_counters=1): 14 | self.exponents = array.array(type_code, [0] * nbr_counters) 15 | 16 | def __len__(self): 17 | return len(self.exponents) 18 | 19 | def add_counter(self): 20 | """Add a new zeroed counter""" 21 | self.exponents.append(0) 22 | 23 | def get(self, counter=0): 24 | """Calculate approximate value represented by counter""" 25 | return math.pow(2, self.exponents[counter]) 26 | 27 | def add(self, counter=0): 28 | """Probabilistically add 1 to counter""" 29 | value = self.get(counter) 30 | probability = 1.0 / value 31 | if random.uniform(0, 1) < probability: 32 | self.exponents[counter] += 1 33 | 34 | if __name__ == "__main__": 35 | mc = MorrisCounter() 36 | print("MorrisCounter has {} counters".format(len(mc))) 37 | for n in range(10): 38 | print("Iteration %d, MorrisCounter has: %d" % (n, mc.get())) 39 | mc.add() 40 | 41 | for n in range(990): 42 | mc.add() 43 | print("Iteration 1000, MorrisCounter has: %d" % (mc.get())) 44 | -------------------------------------------------------------------------------- /11_less_ram/numexpr_pandas/make_cross_entropy_picture.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | import pandas as pd 4 | import numpy as np 5 | #nbr_items = 200_000_000 6 | 7 | nbr_items = 99 8 | yp = np.linspace(0.01, 0.99, nbr_items) 9 | yt = np.ones(nbr_items) 10 | answer = -(yt * np.log(yp) + ((1-yt) * (np.log(1-yp)))) 11 | 12 | yt0 = np.zeros(nbr_items) 13 | answer0 = -(yt0 * np.log(yp) + ((1-yt0) * (np.log(1-yp)))) 14 | 15 | df = pd.DataFrame({'yp': yp, 'yt': yt, 'cross_entropy': answer, 'cross_entropy0': answer0}) 16 | 17 | fig, axs = plt.subplots(ncols=2) 18 | ax = axs[0] 19 | df.plot(x='yp', y='cross_entropy', ax=ax, label='Error for yt==1') 20 | df.plot(x='yp', y='cross_entropy0', ax=ax, label='Error for yt==0', linestyle='--') 21 | ax.set_ylabel('Cross Entropy or Error (smaller is better)') 22 | ax.set_xlabel('Predicted Probability (yp)') 23 | ax.set_title('Cross Entropy error for targets yt 0 and 1') 24 | 25 | ax = axs[1] 26 | logs = np.log(yp) 27 | df_log = pd.DataFrame({'yp': yp, 'log_yp': logs}) 28 | df_log.plot(x='yp', y='log_yp', ax=ax) 29 | ax.set_title('Natural Log for x==[0, 1]') 30 | ax.set_xlabel('Predicted Probability (yp)') 31 | 32 | plt.tight_layout() 33 | plt.savefig('cross_entropy.png') 34 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/probabilistic_datastructures/__init__.py -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/_benchmark.clean.pkl: -------------------------------------------------------------------------------- 1 | (lp1 2 | (dp2 3 | S'estimate' 4 | p3 5 | I1073741824 6 | sS'size' 7 | p4 8 | I416 9 | sS'name' 10 | p5 11 | S'Morris Counter' 12 | p6 13 | sS'time' 14 | p7 15 | F750.91865468025208 16 | sa(dp8 17 | g3 18 | I1048576 19 | sg4 20 | I416 21 | sg5 22 | S'Log Log Register' 23 | p9 24 | sg7 25 | F1690.4389050006866 26 | sa(dp10 27 | g3 28 | I4522232 29 | sg4 30 | I23121520 31 | sg5 32 | S'LogLog' 33 | p11 34 | sg7 35 | F2112.2002909183502 36 | sa(dp12 37 | g3 38 | I1628946 39 | sg4 40 | I23121520 41 | sg5 42 | S'SuperLogLog' 43 | p13 44 | sg7 45 | F2416.4608347415924 46 | sa(dp14 47 | g3 48 | I4983171 49 | sg4 50 | I577880 51 | sg5 52 | S'HyperLogLog' 53 | p15 54 | sg7 55 | F2906.5975527763367 56 | sa(dp16 57 | g3 58 | I4912818 59 | sg4 60 | L18704L 61 | sg5 62 | S'KMinValues' 63 | p17 64 | sg7 65 | F3502.6565506458282 66 | sa(dp18 67 | g3 68 | I4949358 69 | sg4 70 | I6936 71 | sg5 72 | S'ScalingBloom' 73 | p19 74 | sg7 75 | F10392.013652801514 76 | sa(dp20 77 | S'estimate' 78 | p21 79 | I4956262 80 | sS'size' 81 | p22 82 | I1148708949 83 | sS'name' 84 | p23 85 | S'Baseline' 86 | p24 87 | sS'time' 88 | p25 89 | F4036.6814231899998 90 | sa. -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/_benchmark.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/probabilistic_datastructures/_benchmark.pkl -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/_benchmark.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import pickle 3 | import time 4 | from contextlib import contextmanager 5 | from pprint import pprint 6 | 7 | from tqdm import tqdm 8 | 9 | from countmemaybe import HyperLogLog, KMinValues 10 | from ll import LL 11 | from llregister import LLRegister 12 | from morriscounter import MorrisCounter 13 | from scalingbloomfilter import ScalingBloomFilter 14 | from superll import SuperLL 15 | 16 | methods = [ 17 | {"name": "LogLog", "obj": LL(16)}, 18 | {"name": "SuperLogLog", "obj": SuperLL(16)}, 19 | {"name": "Morris Counter", "obj": MorrisCounter()}, 20 | {"name": "Log Log Register", "obj": LLRegister()}, 21 | {"name": "HyperLogLog", "obj": HyperLogLog(b=16)}, 22 | {"name": "KMinValues", "obj": KMinValues(k=1 << 16)}, 23 | {"name": "ScalingBloom", "obj": ScalingBloomFilter(1048576)}, 24 | ] 25 | 26 | 27 | @contextmanager 28 | def TimerBlock(name): 29 | start = time.time() 30 | t = ctypes.c_double() 31 | try: 32 | yield t 33 | finally: 34 | t.value = time.time() - start 35 | print(f"[{name}] took {t.value} seconds") 36 | 37 | 38 | def wikireader(filename, buffering=1 << 10): 39 | total = 1148708949 40 | with open(filename, "r", buffering=buffering) as fd: 41 | for line in tqdm(fd, desc="Reading Wiki Data", total=total): 42 | yield line.strip() 43 | 44 | 45 | if __name__ == "__main__": 46 | filename = "/data/datasets/internet/wikipedia/enwiki-20140404-pages-articles.tokens" 47 | print("baseline reading measurement") 48 | with TimerBlock("Iterate File") as baseline: 49 | tmp = 0 50 | for line in wikireader(filename): 51 | tmp += len(line) 52 | 53 | for method in methods: 54 | print((method["name"])) 55 | obj = method["obj"] 56 | with TimerBlock("Iterate File") as bench: 57 | for line in wikireader(filename): 58 | obj.add(line) 59 | method["time"] = bench.value - baseline.value 60 | method["estimate"] = obj.__len__() 61 | 62 | pprint(methods) 63 | pickle.dump(methods, open("_benchmark.pkl", "wb+")) 64 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/bloomfilter.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import bitarray 4 | import mmh3 5 | 6 | 7 | class BloomFilter: 8 | def __init__(self, capacity, error=0.005): 9 | """ 10 | Initialize a Bloom filter with given capacity and false positive rate 11 | """ 12 | self.capacity = capacity 13 | self.error = error 14 | self.num_bits = int((-capacity * math.log(error)) // math.log(2) ** 2 + 1) 15 | self.num_hashes = int((self.num_bits * math.log(2)) // capacity + 1) 16 | self.data = bitarray.bitarray(self.num_bits) 17 | 18 | def _indexes(self, key): 19 | h1, h2 = mmh3.hash64(key) 20 | for i in range(self.num_hashes): 21 | yield (h1 + i * h2) % self.num_bits 22 | 23 | def add(self, key): 24 | for index in self._indexes(key): 25 | self.data[index] = True 26 | 27 | def __contains__(self, key): 28 | return all(self.data[index] for index in self._indexes(key)) 29 | 30 | def __len__(self): 31 | bit_off_num = self.data.count(True) 32 | bit_off_percent = 1.0 - bit_off_num / self.num_bits 33 | length = -1.0 * self.num_bits * math.log(bit_off_percent) / self.num_hashes 34 | return int(length) 35 | 36 | @staticmethod 37 | def union(bloom_a, bloom_b): 38 | assert bloom_a.capacity == bloom_b.capacity, "Capacities must be equal" 39 | assert bloom_a.error == bloom_b.error, "Error rates must be equal" 40 | 41 | bloom_union = BloomFilter(bloom_a.capacity, bloom_a.error) 42 | bloom_union.data = bloom_a.data | bloom_b.data 43 | return bloom_union 44 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/hyperloglog.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from ll import LL 4 | 5 | 6 | class HyperLogLog(LL): 7 | def __len__(self): 8 | indicator = sum(2 ** -m.counter for m in self.registers) 9 | E = self.alpha * (self.num_registers ** 2) / indicator 10 | 11 | if E <= 5.0 / 2.0 * self.num_registers: 12 | V = sum(1 for m in self.registers if m.counter == 0) 13 | if V != 0: 14 | Estar = self.num_registers * math.log(self.num_registers / (1.0 * V), 2) 15 | else: 16 | Estar = E 17 | else: 18 | if E <= 2 ** 32 / 30.0: 19 | Estar = E 20 | else: 21 | Estar = -2 ** 32 * math.log(1 - E / 2 ** 32, 2) 22 | return int(Estar) 23 | 24 | 25 | if __name__ == "__main__": 26 | import mmh3 27 | 28 | hll = HyperLogLog(8) 29 | for i in range(100000): 30 | hll.add(mmh3.hash(str(i))) 31 | print(len(hll)) 32 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/kminvalues.py: -------------------------------------------------------------------------------- 1 | import mmh3 2 | from blist import sortedset 3 | 4 | 5 | class KMinValues: 6 | def __init__(self, num_hashes): 7 | self.num_hashes = num_hashes 8 | self.data = sortedset() 9 | 10 | def add(self, item): 11 | item_hash = mmh3.hash(item) 12 | self.data.add(item_hash) 13 | if len(self.data) > self.num_hashes: 14 | self.data.pop() 15 | 16 | def __len__(self): 17 | if len(self.data) <= 2: 18 | return 0 19 | length = (self.num_hashes - 1) * (2 ** 32 - 1) / (self.data[-2] + 2 ** 31 - 1) 20 | return int(length) 21 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/ll.py: -------------------------------------------------------------------------------- 1 | import mmh3 2 | from llregister import LLRegister 3 | 4 | 5 | class LL: 6 | def __init__(self, p): 7 | self.p = p 8 | self.num_registers = 2 ** p 9 | self.registers = [LLRegister() for i in range(int(2 ** p))] 10 | self.alpha = 0.7213 / (1.0 + 1.079 / self.num_registers) 11 | 12 | def add(self, item): 13 | item_hash = mmh3.hash(str(item)) 14 | register_index = item_hash & (self.num_registers - 1) 15 | register_hash = item_hash >> self.p 16 | self.registers[register_index]._add(register_hash) 17 | 18 | def __len__(self): 19 | register_sum = sum(h.counter for h in self.registers) 20 | length = ( 21 | self.num_registers * self.alpha * 2 ** (register_sum / self.num_registers) 22 | ) 23 | return int(length) 24 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/llregister.py: -------------------------------------------------------------------------------- 1 | import mmh3 2 | from utils import trailing_zeros 3 | 4 | 5 | class LLRegister: 6 | counter = 0 7 | 8 | def add(self, item): 9 | item_hash = mmh3.hash(str(item)) 10 | return self._add(item_hash) 11 | 12 | def _add(self, item_hash): 13 | bit_index = trailing_zeros(item_hash) 14 | if bit_index > self.counter: 15 | self.counter = bit_index 16 | 17 | def __len__(self): 18 | return int(2 ** self.counter) 19 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/morriscounter.py: -------------------------------------------------------------------------------- 1 | from random import random 2 | 3 | 4 | class MorrisCounter: 5 | counter = 0 6 | 7 | def add(self, *args): 8 | if random() < 1.0 / (2 ** self.counter): 9 | self.counter += 1 10 | 11 | def __len__(self): 12 | return int(2 ** self.counter) 13 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/requirements.txt: -------------------------------------------------------------------------------- 1 | bitarray 2 | mmh3 3 | blist 4 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/results/unique.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/probabilistic_datastructures/results/unique.pkl -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/scalingbloomfilter.py: -------------------------------------------------------------------------------- 1 | from bloomfilter import BloomFilter 2 | 3 | 4 | class ScalingBloomFilter: 5 | def __init__(self, capacity, error=0.005, max_fill=0.8, error_tightening_ratio=0.5): 6 | self.capacity = capacity 7 | self.base_error = error 8 | self.max_fill = max_fill 9 | self.items_until_scale = int(capacity * max_fill) 10 | self.error_tightening_ratio = error_tightening_ratio 11 | self.bloom_filters = [] 12 | self.current_bloom = None 13 | self._add_bloom() 14 | 15 | def _add_bloom(self): 16 | new_error = self.base_error * self.error_tightening_ratio ** len( 17 | self.bloom_filters 18 | ) 19 | new_bloom = BloomFilter(self.capacity, new_error) 20 | self.bloom_filters.append(new_bloom) 21 | self.current_bloom = new_bloom 22 | return new_bloom 23 | 24 | def add(self, key): 25 | if key in self: 26 | return True 27 | self.current_bloom.add(key) 28 | self.items_until_scale -= 1 29 | if self.items_until_scale == 0: 30 | bloom_size = len(self.current_bloom) 31 | bloom_max_capacity = int(self.current_bloom.capacity * self.max_fill) 32 | 33 | # We may have been adding many duplicate values into the Bloom, so 34 | # we need to check if we actually need to scale or if we still have 35 | # space 36 | if bloom_size >= bloom_max_capacity: 37 | self._add_bloom() 38 | self.items_until_scale = bloom_max_capacity 39 | else: 40 | self.items_until_scale = int(bloom_max_capacity - bloom_size) 41 | return False 42 | 43 | def __contains__(self, key): 44 | return any(key in bloom for bloom in self.bloom_filters) 45 | 46 | def __len__(self): 47 | return int(sum(len(bloom) for bloom in self.bloom_filters)) 48 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/superll.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from ll import LL 4 | 5 | 6 | class SuperLL(LL): 7 | NMAX = 1000000000 8 | 9 | def __len__(self): 10 | # truncation 11 | register_num1 = int(self.num_registers * 0.7) 12 | registers_truncated1 = sorted(h.counter for h in self.registers)[:register_num1] 13 | 14 | # restriction 15 | B = math.ceil(math.log(self.NMAX / self.num_registers) / math.log(2.0) + 3) 16 | registers_truncated2 = [v for v in registers_truncated1 if v <= B] 17 | register_num2 = len(registers_truncated2) 18 | register_sum = sum(registers_truncated2) 19 | 20 | alpha = 0.7213 / (1.0 + 1.079 / register_num2) 21 | length = 2 ** (register_sum / register_num2) * register_num2 * alpha 22 | return int(length) 23 | -------------------------------------------------------------------------------- /11_less_ram/probabilistic_datastructures/utils.py: -------------------------------------------------------------------------------- 1 | def trailing_zeros(number): 2 | """ 3 | Returns the 1-based index of the first bit set to 1 from the right side of 4 | a 32bit integer 5 | >>> trailing_zeros(0) 6 | 32 7 | >>> trailing_zeros(0b1000) 8 | 4 9 | >>> trailing_zeros(0b10000000) 10 | 8 11 | """ 12 | if not number: 13 | return 32 14 | index = 0 15 | while (number >> index) & 1 == 0: 16 | index += 1 17 | return index + 1 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | "These code examples are excerpted from High Performance Python 2ed by Micha 2 | Gorelick and Ian Ozsvald published by O’Reilly Media, Inc. (ISBN 1492055026) 3 | which you can purchase here: http://shop.oreilly.com/product/0636920268505.do. 4 | You may use these code examples in your programs and documentation. You do not 5 | need to contact O'Reilly for permission unless you’re reproducing a significant 6 | portion of the code. For example, writing a program that uses several chunks of 7 | this example code does not require permission. Selling or distributing a set of 8 | examples from O'Reilly books does require permission. Answering a question by 9 | citing the book and quoting example code does not require permission. 10 | Incorporating a significant amount of this example code into your product’s 11 | documentation does require permission. We appreciate, but do not require, 12 | attribution. An attribution usually includes the title, author, publisher, and 13 | ISBN. For example: “High Performance Python 2ed by Micha Gorelick and Ian Ozsvald 14 | (O’Reilly). Copyright 2014 Micha Gorelick and Ian Ozsvald. 978-1-4920-5502-0” 15 | 16 | If you feel your use of these code examples falls outside fair use or the 17 | permission given here, feel free to contact O'Reilly at 18 | permissions@oreilly.com." 19 | -------------------------------------------------------------------------------- /figures/bandwidth.csv: -------------------------------------------------------------------------------- 1 | Speed (Gbit/s),Name 2 | 0.326,LTE 3 | 1.3,Wireless 802.11ac 4 | 5,USB 3.0 5 | 7,Wireless 802.11ad 6 | 10,10 Gigabit Ethernet 7 | 10,USB 3.1 8 | 16,SATAe 9 | 40,Thunderbolt3 10 | 40,USB 4 11 | 100,100 Gigabit Ethernet 12 | 204.8,DDR4 SDRAM 13 | 256,PCIe 3.0 14 | 409.6,DDR5 SDRAM 15 | -------------------------------------------------------------------------------- /figures/bandwidth.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from collections import defaultdict 3 | 4 | import numpy as np 5 | import pylab as py 6 | 7 | 8 | def autolabel(rects): 9 | """Attach a text label above each bar in *rects*, displaying its height.""" 10 | for rect in rects: 11 | height = rect.get_height() 12 | ax.annotate( 13 | "{}".format(height), 14 | xy=(rect.get_x() + rect.get_width() / 2, height), 15 | xytext=(0, 3), # 3 points vertical offset 16 | textcoords="offset points", 17 | ha="center", 18 | va="bottom", 19 | ) 20 | 21 | 22 | if __name__ == "__main__": 23 | data = list(csv.DictReader(open("bandwidth.csv"))) 24 | 25 | N = len(data) 26 | ind = np.arange(N) 27 | width = 0.35 28 | 29 | ax = py.gca() 30 | bar = ax.bar(ind, [float(d["Speed (Gbit/s)"]) for d in data], width, color="r") 31 | autolabel(bar) 32 | py.ylim(ymin=0) 33 | ax.set_ylabel("Speed (Gbit/s)") 34 | ax.set_xticks(ind + width) 35 | ax.set_xticklabels( 36 | [x["Name"].replace(" ", "\n") for x in data], rotation=45, ha="right" 37 | ) 38 | 39 | py.title("Bandwidth for Common Interfaces") 40 | 41 | py.savefig("../bandwidth.png") 42 | -------------------------------------------------------------------------------- /figures/diffusion_1d.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import numpy as np 4 | import pylab as py 5 | 6 | grid_size = (512,) 7 | 8 | 9 | def laplacian(grid): 10 | return np.roll(grid, +1) + np.roll(grid, -1) - 2 * grid 11 | 12 | 13 | def evolve(grid, dt, D=1): 14 | return grid + dt * D * laplacian(grid) 15 | 16 | 17 | if __name__ == "__main__": 18 | grid = np.zeros(grid_size) 19 | max_val = 1.0 20 | 21 | block_low = int(grid_size[0] * 0.4) 22 | block_high = int(grid_size[0] * 0.6) 23 | grid[block_low:block_high] = max_val 24 | 25 | t = 0 26 | grids = [(t, grid.copy())] 27 | for i in range(3): 28 | for i in range(5000 * (4 ** i) + 1): 29 | grid = evolve(grid, 0.1) 30 | t += i * 0.1 31 | grids.append((t, grid.copy())) 32 | 33 | py.figure() 34 | for i, (t, grid) in enumerate(grids): 35 | py.subplot(len(grids), 1, i + 1) 36 | py.plot(grid) 37 | py.ylabel("t = %0.0f" % t) 38 | py.ylim(ymin=0, ymax=max_val * 1.1) 39 | py.xlim(xmin=0, xmax=grid_size[0]) 40 | 41 | py.xlabel("Position") 42 | 43 | py.subplot(len(grids), 1, 1) 44 | py.title("1D Diffusion of a square function") 45 | 46 | py.tight_layout() 47 | py.savefig("../diffusion_1d.png") 48 | -------------------------------------------------------------------------------- /figures/hll_single_reg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import countmemaybe 4 | import numpy as np 5 | import pylab as py 6 | 7 | 8 | def leading_set_bit(number): 9 | number_binary = bin(number) 10 | return len(number_binary) - number_binary.rfind("1") 11 | 12 | 13 | class HLL(object): 14 | max_index = 0 15 | 16 | def add(self, number): 17 | index = leading_set_bit(number) 18 | self.max_index = max(self.max_index, index) 19 | 20 | def __len__(self): 21 | return 2 ** self.max_index 22 | 23 | 24 | if __name__ == "__main__": 25 | data_list = [] 26 | h1 = HLL() 27 | h = countmemaybe.HyperLogLog() 28 | for i in range(100000): 29 | item = "seee%seeeed234rsdaf" % i 30 | x = h._hash(item) 31 | h1.add(x) 32 | h.add(x) 33 | data_list.append((i + 1, len(h1), len(h))) 34 | 35 | data_numpy = np.asarray(data_list) 36 | py.plot(data_numpy[:, 0], data_numpy[:, 1], ":", label="Single HLL Register") 37 | py.plot(data_numpy[:, 0], data_numpy[:, 2], "--", label="HLL with 16 registers") 38 | py.plot(data_numpy[:, 0], data_numpy[:, 0], label="Actual Size") 39 | py.legend(loc="upper left") 40 | 41 | py.title("Performance of a single HLL Register") 42 | py.xlabel("Size of the set") 43 | py.ylabel("Predicted size of the set") 44 | 45 | # py.show() 46 | py.savefig("../hll_single_reg.png") 47 | -------------------------------------------------------------------------------- /figures/kmv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | 6 | import pylab as py 7 | from kminvalues import KMinValues 8 | 9 | sys.path.append(os.path.abspath("../../examples/probabilistic_datastructures/")) 10 | 11 | 12 | 13 | def plot(kmv): 14 | py.scatter( 15 | [d / float(2 ** 32 - 1) for d in kmv.data[:-1]], 16 | [0] * (len(kmv.data) - 1), 17 | alpha=0.25, 18 | ) 19 | py.axvline(x=(kmv.data[-2] / float(2 ** 32 - 1)), c="r") 20 | py.gca().get_yaxis().set_visible(False) 21 | py.gca().get_xaxis().set_ticklabels([]) 22 | py.gca().get_xaxis().set_ticks([x / 10.0 for x in range(11)]) 23 | 24 | 25 | if __name__ == "__main__": 26 | k = 20 27 | num_panels = 20 28 | kmv = KMinValues(k) 29 | for i in range(k * num_panels + 1): 30 | if i % k == 0 and i != 0: 31 | py.subplot(num_panels, 1, i // k) 32 | if i == k: 33 | py.title("Hash space density for KMV with k=%d" % k) 34 | plot(kmv) 35 | py.xlim((0, 1)) 36 | kmv.add(str(i)) 37 | print("added") 38 | 39 | py.gca().get_xaxis().set_ticks([x / 10.0 for x in range(11)]) 40 | py.gca().get_xaxis().set_ticklabels([x / 10.0 for x in range(11)]) 41 | 42 | py.tight_layout() 43 | py.savefig("../kmv.png") 44 | -------------------------------------------------------------------------------- /figures/list_overallocation.py: -------------------------------------------------------------------------------- 1 | from itertools import islice 2 | 3 | import pylab as py 4 | 5 | 6 | # coding: utf-8 7 | def overalloc_dict(): 8 | o = list_overalloc() 9 | i = 1 10 | s, e, _ = next(o) 11 | while True: 12 | if i > e: 13 | s, e, _ = next(o) 14 | yield e - i 15 | i += 1 16 | 17 | 18 | def list_overalloc(): 19 | s = 1 20 | while True: 21 | e = alloc = s + overalloc(s) 22 | yield s, e, alloc 23 | s = e + 1 24 | 25 | 26 | overalloc = lambda N: (N >> 3) + (3 if N < 9 else 6) 27 | 28 | py.scatter(list(range(1, 10000)), list(islice(overalloc_dict(), 10000 - 1))) 29 | py.ylim(0, 10000 - 1) 30 | py.xlim(0, 10000 - 1) 31 | py.ylim(0, 2000) 32 | py.ylim(0, 1500) 33 | py.ylim(0, 1400) 34 | py.ylim(0, 1300) 35 | py.xlabel("Size of the list") 36 | py.ylabel("Number of elements overallocated") 37 | py.title("Overallocation in lists") 38 | py.savefig("../list_overallocation.png") 39 | -------------------------------------------------------------------------------- /figures/matrix_method_speed.csv: -------------------------------------------------------------------------------- 1 | method,256,512,1024,2048,4096 2 | python,0.00,0.00,0.00,0.00,0.00 3 | python+memory,1.06,1.07,1.07,1.07,1.07 4 | numpy,170.59,116.16,60.49,44.80,45.80 5 | numpy+memory,185.97,140.10,69.67,44.43,45.36 6 | numpy+memory+laplace,203.66,208.15,86.41,90.91,90.53 7 | numpy+memory+laplace+numexpr,97.41,167.49,102.38,105.69,105.25 8 | numpy+memory+scipy,52.27,42.00,36.44,24.70,7.43 9 | -------------------------------------------------------------------------------- /figures/matrix_method_speed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import csv 4 | from itertools import cycle 5 | 6 | import numpy as np 7 | import pylab as py 8 | 9 | markers = cycle("h*o>Dxsp8") 10 | linestyles = cycle(["-", ":", "--", "-."]) 11 | 12 | if __name__ == "__main__": 13 | data_raw = csv.DictReader(open("matrix_method_speed.csv")) 14 | data = [] 15 | max_speedup = 0 16 | for item in data_raw: 17 | name = item.pop("method") 18 | if name != "python": 19 | values = np.asarray(sorted((int(k), float(v)) for k, v in item.items())) 20 | data.append((name, values)) 21 | max_speedup = max(max_speedup, values[:, 1].max()) 22 | 23 | py.figure() 24 | for name, values in data: 25 | py.plot( 26 | values[:, 0], 27 | values[:, 1], 28 | linestyle=next(linestyles), 29 | marker=next(markers), 30 | label=name, 31 | linewidth=4, 32 | ) 33 | 34 | py.ylim(ymin=0, ymax=max_speedup * 1.1) 35 | py.legend( 36 | loc="upper center", 37 | ncol=3, 38 | mode="expand", 39 | borderaxespad=0.0, 40 | labelspacing=0.2, 41 | fontsize=12, 42 | handlelength=5, 43 | ) 44 | 45 | ax = py.gca() 46 | ticks = data[0][1][:, 0] 47 | ax.set_xticks(ticks) 48 | ax.set_xticklabels(["%dx%d" % (x, x) for x in ticks], rotation=25, ha="right") 49 | py.xlim(xmin=ticks.min(), xmax=ticks.max()) 50 | 51 | py.title("Summary of code performance") 52 | py.ylabel("Speedup from pure python (larger is better)") 53 | py.xlabel("Grid Size") 54 | py.tight_layout() 55 | 56 | py.savefig("../matrix_method_speed.png") 57 | -------------------------------------------------------------------------------- /figures/memory_types_data.csv: -------------------------------------------------------------------------------- 1 | type,min_size (byte),max_size (byte),min_read (Mb/s),max_read (Mb/s),min_write (Mb/s),max_write (Mb/s),min_read_latency (ms),max_read_latency (ms) 2 | spinning hard drive,1e12,1.6e13,50,255,50,255,6.7,23 3 | solid state drive,1.28e+11,8e+12,96,3500,40,2500,0.03,.54 4 | RAM,1e+9,6.4e+10,6400,25600,6400,25600,6.5e-5,.0004 5 | L1/L2 Cache,16384,6.4e+7,26214.4,4000000,26214.4,4000000,1.3e-6,.0001 6 | -------------------------------------------------------------------------------- /figures/norm_squared.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import os 4 | import sys 5 | from itertools import cycle 6 | 7 | import matplotlib 8 | import norm_array 9 | import norm_numpy 10 | import norm_numpy_dot 11 | import norm_python 12 | import norm_python_comprehension 13 | import numpy as np 14 | import pylab as py 15 | 16 | sys.path.append(os.path.abspath("../../examples/matrix/norm/")) 17 | 18 | 19 | 20 | 21 | methods = {k: v for k, v in globals().items() if k.startswith("norm")} 22 | 23 | markers = cycle("h*o>Dxsp8") 24 | linestyles = cycle(["-", ":", "--", "-."]) 25 | 26 | if __name__ == "__main__": 27 | timings = {k: [] for k in methods} 28 | for exponent in range(12, 35): 29 | N = int(1.5 ** exponent) 30 | print("exponent:", exponent) 31 | print("N:", N) 32 | for name, method in methods.items(): 33 | t = method.run_experiment(N, num_iter=5) * 1000.0 34 | timings[name].append((N, t)) 35 | print("%s: %f" % (name, t)) 36 | 37 | for name, data in timings.items(): 38 | d = np.asarray(data) 39 | py.plot( 40 | d[:, 0], 41 | d[:, 1], 42 | label=name, 43 | marker=next(markers), 44 | linestyle=next(linestyles), 45 | linewidth=4, 46 | ) 47 | 48 | py.title("Runtime for various norm squared routines") 49 | py.xlabel("Vector length") 50 | py.ylabel("Runtime (miliseconds) -- less is better") 51 | py.yscale("log") 52 | py.xscale("log") 53 | ax = py.gca() 54 | ax.get_xaxis().set_major_formatter(matplotlib.ticker.FormatStrFormatter("%d")) 55 | 56 | ax.xaxis.grid(True, which="minor", alpha=0.4) 57 | ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) 58 | ax.yaxis.grid(True, which="minor", alpha=0.4) 59 | py.legend(loc="upper left", handlelength=5) 60 | 61 | py.tight_layout() 62 | py.savefig("../norm_squared.png") 63 | -------------------------------------------------------------------------------- /figures/processor_clock.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | import dateutil 4 | import matplotlib.dates as mdates 5 | import pylab as py 6 | 7 | if __name__ == "__main__": 8 | data = list(csv.DictReader(open("processor.csv"))) 9 | dates = [dateutil.parser.parse(x["date"]) for x in data if x["date"]] 10 | clock = [x["clock"] for x in data if x["date"]] 11 | 12 | ax = py.gca() 13 | fig = py.gcf() 14 | 15 | ax.scatter(mdates.date2num(dates), clock, alpha=0.5) 16 | ax.set_xticklabels( 17 | [d.strftime("%Y") for d in mdates.num2date(ax.get_xticks())], 18 | rotation=15, 19 | ha="right", 20 | ) 21 | ax.set_yscale("log") 22 | 23 | ax.set_ylabel("Clock speed (MHz)") 24 | ax.set_xlabel("Date of CPU Release") 25 | ax.set_title("Historical growth of CPU clock speed") 26 | 27 | py.savefig("../processor_clock.png") 28 | -------------------------------------------------------------------------------- /fix_cpu_modes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | NO_TURBO=1 4 | MODE=performance 5 | 6 | if [[ "$1" == 'disable' ]]; then 7 | NO_TURBO=0 8 | MODE=powersave 9 | fi 10 | 11 | echo "Setting mode to: $MODE" 12 | echo "Setting no_turbo to: $NO_TURBO" 13 | 14 | for CPUFREQ in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; 15 | do 16 | [ -f $CPUFREQ ] || continue; 17 | echo -n $MODE > $CPUFREQ; 18 | done 19 | 20 | echo ${NO_TURBO} > /sys/devices/system/cpu/intel_pstate/no_turbo 21 | --------------------------------------------------------------------------------