├── 01_understanding
    ├── .notempty
    ├── check_prime.py
    └── reducing_operations.py
├── 02_profiling
    ├── .notempty
    ├── julia1.py
    ├── julia1_decorator.py
    ├── julia1_lineprofiler.py
    ├── julia1_lineprofiler2.py
    ├── julia1_lineprofiler3.py
    ├── julia1_memoryprofiler.py
    ├── julia1_nopil.py
    └── utility.py
├── 03_lists_tuples
    ├── .notempty
    ├── binary_search.py
    ├── binary_vs_linear_search.py
    ├── bisect_example.py
    └── linear_search.py
├── 04_dict_set
    ├── .notempty
    ├── custom_hash_function.py
    ├── custom_vs_default_hash.py
    ├── dict_probing.py
    ├── namespace.py
    ├── timing_hash_function.py
    └── unique_lookup.py
├── 05_iterators
    ├── .notempty
    ├── fibonacci.py
    └── lazy_data_analysis.py
├── 06_matrix
    ├── .notempty
    ├── Makefile
    ├── diffusion_2d
    │   ├── Makefile
    │   ├── README.md
    │   ├── _benchmark.py
    │   ├── diffusion_numpy.py
    │   ├── diffusion_numpy_memory.py
    │   ├── diffusion_numpy_memory2.py
    │   ├── diffusion_numpy_memory2_numexpr.py
    │   ├── diffusion_numpy_memory2_numexpr_single.py
    │   ├── diffusion_numpy_naive.py
    │   ├── diffusion_python.py
    │   ├── diffusion_python_memory.py
    │   ├── diffusion_scipy.py
    │   ├── kernprof
    │   │   ├── diffusion_numpy.kernprof
    │   │   ├── diffusion_numpy_memory.kernprof
    │   │   ├── diffusion_numpy_memory2.kernprof
    │   │   ├── diffusion_numpy_memory2_numexpr.kernprof
    │   │   ├── diffusion_numpy_memory2_numexpr_single.kernprof
    │   │   ├── diffusion_numpy_naive.kernprof
    │   │   ├── diffusion_python.kernprof
    │   │   ├── diffusion_python_memory.kernprof
    │   │   └── diffusion_scipy.kernprof
    │   ├── memit
    │   │   ├── diffusion_numpy.memit
    │   │   ├── diffusion_numpy_memory.memit
    │   │   ├── diffusion_numpy_memory2.memit
    │   │   ├── diffusion_numpy_memory2_numexpr.memit
    │   │   ├── diffusion_numpy_memory2_numexpr_single.memit
    │   │   ├── diffusion_numpy_naive.memit
    │   │   ├── diffusion_python.memit
    │   │   ├── diffusion_python_memory.memit
    │   │   └── diffusion_scipy.memit
    │   ├── perf
    │   │   ├── diffusion_numpy.novec.perf
    │   │   ├── diffusion_numpy.perf
    │   │   ├── diffusion_numpy_memory.perf
    │   │   ├── diffusion_numpy_memory2.perf
    │   │   ├── diffusion_numpy_memory2_numexpr.perf
    │   │   ├── diffusion_numpy_memory2_numexpr_single.perf
    │   │   ├── diffusion_numpy_naive.perf
    │   │   ├── diffusion_python.perf
    │   │   ├── diffusion_python_memory.perf
    │   │   └── diffusion_scipy.perf
    │   └── time
    │   │   ├── diffusion_numpy.time
    │   │   ├── diffusion_numpy_memory.time
    │   │   ├── diffusion_numpy_memory2.time
    │   │   ├── diffusion_numpy_memory2_numexpr.time
    │   │   ├── diffusion_numpy_memory2_numexpr_single.time
    │   │   ├── diffusion_numpy_naive.time
    │   │   ├── diffusion_python.time
    │   │   ├── diffusion_python_memory.time
    │   │   └── diffusion_scipy.time
    ├── norm
    │   ├── Makefile
    │   ├── norm_array.memit
    │   ├── norm_array.py
    │   ├── norm_numpy.py
    │   ├── norm_numpy_dot.py
    │   ├── norm_python.py
    │   ├── norm_python_comprehension.py
    │   ├── perf
    │   │   ├── norm_array.perf
    │   │   ├── norm_numpy.perf
    │   │   ├── norm_numpy_dot.perf
    │   │   ├── norm_python.perf
    │   │   └── norm_python_comprehension.perf
    │   └── time
    │   │   ├── norm_array.time
    │   │   ├── norm_numpy.time
    │   │   └── norm_python.time
    └── pandas
    │   ├── compare_sklearn_lstsq_timing.py
    │   ├── generate_data.py
    │   ├── plot_min_max_slopes.py
    │   ├── sklearn_line_profiler.py
    │   ├── str_operation.py
    │   ├── time_iteration_methods.py
    │   └── utility.py
├── 07_compiling
    ├── .gitignore
    ├── .notempty
    ├── Makefile
    ├── cffi
    │   ├── diffusion_2d_cffi.py
    │   └── diffusion_2d_cffi_inline.py
    ├── cpython_module
    │   ├── .gitignore
    │   ├── cdiffusion
    │   │   ├── diffusion.h
    │   │   └── python_interface.c
    │   ├── diffusion.py
    │   └── setup.py
    ├── ctypes
    │   └── diffusion_ctypes.py
    ├── diffusion.c
    ├── f2py
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── diffusion.f90
    │   └── diffusion_fortran.py
    ├── julia
    │   ├── cython
    │   │   ├── cpython
    │   │   │   ├── cythonfn.pyx
    │   │   │   ├── cythonfn1.pyx
    │   │   │   ├── cythonfn2.pyx
    │   │   │   ├── cythonfn3.pyx
    │   │   │   ├── cythonfn4.pyx
    │   │   │   ├── cythonfn5.pyx
    │   │   │   ├── julia1.py
    │   │   │   └── setup.py
    │   │   ├── cpython_pyximport
    │   │   │   ├── cythonfn.pyx
    │   │   │   └── julia1.py
    │   │   └── nparray_memoryview
    │   │   │   ├── cythonfn.pyx
    │   │   │   ├── cythonfn1.pyx
    │   │   │   ├── cythonfn2.pyx
    │   │   │   ├── julia1.py
    │   │   │   ├── julia1_np_nocython.py
    │   │   │   ├── parallel
    │   │   │       ├── cythonfn.pyx
    │   │   │       ├── cythonfn1.pyx
    │   │   │       ├── cythonfn2.pyx
    │   │   │       ├── cythonfn3.pyx
    │   │   │       ├── julia1.py
    │   │   │       └── setup.py
    │   │   │   └── setup.py
    │   ├── julia1_nopil.py
    │   ├── julia1_nopil_expanded_math_pypy.py
    │   ├── julia1_numba.py
    │   └── julia1_numba_expandedmath_inspection.py
    └── pytorch
    │   ├── compare.py
    │   ├── diffusion_numpy.py
    │   ├── diffusion_pytorch.py
    │   ├── random_access.py
    │   └── requirements.txt
├── 08_concurrency
    ├── .notempty
    ├── cralwer
    │   ├── asyncio
    │   │   └── crawler.py
    │   ├── benchmark.sh
    │   ├── gevent
    │   │   └── crawler.py
    │   ├── images
    │   │   ├── asyncio.png
    │   │   ├── gevent.png
    │   │   ├── grequests.png
    │   │   ├── parallel_requests.png
    │   │   ├── serial.png
    │   │   └── tornado.png
    │   ├── metric_data.json
    │   ├── parallel_requests.json
    │   ├── parallel_requests.py
    │   ├── serial
    │   │   └── crawler.py
    │   ├── server.py
    │   ├── tornado
    │   │   └── crawler.py
    │   ├── tornado_callback
    │   │   └── crawler.py
    │   └── visualize.py
    ├── requirements.txt
    └── workload
    │   ├── images
    │       ├── async_callgraph.png
    │       ├── workload_async_batches_no-IO.png
    │       ├── workload_async_batches_no-IO_serial.png
    │       ├── workload_async_no-IO.png
    │       ├── workload_batches_no-IO.png
    │       ├── workload_file-IO_no-IO.png
    │       └── workload_no-IO_serial.png
    │   ├── server.py
    │   ├── workload.py
    │   └── workloads.json
├── 09_multiprocessing
    ├── .notempty
    ├── locking
    │   ├── ex1_lock.py
    │   ├── ex1_nolock1.py
    │   ├── ex1_nolock4.py
    │   ├── ex2_lock.py
    │   ├── ex2_lock_rawvalue.py
    │   ├── ex2_nolock.py
    │   └── ex3_redis.py
    ├── np_shared_example
    │   ├── np_shared.py
    │   └── rnd_demo
    │   │   ├── np_shared_rnd_parallel.py
    │   │   └── np_shared_rnd_serial.py
    ├── pi_estimation
    │   ├── pi_lists_parallel
    │   │   ├── pi_graph_speed_tests.py
    │   │   ├── pi_lists_parallel.py
    │   │   ├── pi_lists_parallel_joblib.py
    │   │   ├── pi_lists_parallel_joblib_cache.py
    │   │   └── pi_lists_series.py
    │   ├── pi_monte_carlo_diagram
    │   │   └── pi_plot_monte_carlo_example.py
    │   └── pi_processes_parallel
    │   │   ├── pi_graph_speed_tests.py
    │   │   ├── pi_numpy_parallel_worker.py
    │   │   ├── pi_numpy_serial.py
    │   │   └── pi_numpy_serial_blocks.py
    ├── prime_generation
    │   ├── plot_serial_vs_queue_times.py
    │   ├── primes.py
    │   ├── primes_pool.py
    │   ├── primes_queue.py
    │   ├── primes_queue_jobs_feeder_thread.py
    │   └── primes_queue_less_work.py
    └── prime_validation
    │   ├── create_range.py
    │   ├── plot_prime_validation_times.py
    │   ├── primes.py
    │   ├── primes_factor_test.py
    │   ├── primes_pool_per_number1.py
    │   ├── primes_pool_per_number2.py
    │   ├── primes_pool_per_number_manager.py
    │   ├── primes_pool_per_number_mmap.py
    │   ├── primes_pool_per_number_mmap2.py
    │   ├── primes_pool_per_number_mmap3.py
    │   ├── primes_pool_per_number_mmap4.py
    │   ├── primes_pool_per_number_redis.py
    │   ├── primes_pool_per_number_value.py
    │   ├── primes_pool_per_number_value_withinit.py
    │   └── primes_understand_comms_frequency.py
├── 10_clusters
    ├── .notempty
    ├── docker
    │   ├── Dockerfile
    │   ├── Makefile
    │   ├── diffusion_numpy_memory2.py
    │   └── requirements.txt
    ├── ipython_parallel
    │   └── pi_ipython_cluster.py
    └── nsq
    │   └── nsq_worker.py
├── 11_less_ram
    ├── .notempty
    ├── compressing_text
    │   ├── plot_example.py
    │   ├── text_example.py
    │   ├── text_example_clean_list.py
    │   ├── text_example_clean_list_wikipedia_gensim.py
    │   ├── text_example_dawg.py
    │   ├── text_example_dawg_load_only.py
    │   ├── text_example_list.py
    │   ├── text_example_list_bisect.py
    │   ├── text_example_set.py
    │   ├── text_example_trie.py
    │   └── text_example_trie_load_only.py
    ├── getsizeof
    │   └── asizeof.py
    ├── morris_counter_example
    │   ├── morris_counter.py
    │   └── show_morris_counter.py
    ├── numexpr_pandas
    │   └── make_cross_entropy_picture.py
    ├── probabilistic_datastructures
    │   ├── __init__.py
    │   ├── _benchmark.clean.pkl
    │   ├── _benchmark.pkl
    │   ├── _benchmark.py
    │   ├── bloomfilter.py
    │   ├── hyperloglog.py
    │   ├── kminvalues.py
    │   ├── ll.py
    │   ├── llregister.py
    │   ├── morriscounter.py
    │   ├── prob_ds_figure.py
    │   ├── requirements.txt
    │   ├── results
    │   │   └── unique.pkl
    │   ├── scalingbloomfilter.py
    │   ├── superll.py
    │   └── utils.py
    ├── sklearn_hashing_trick
    │   ├── feature_hashing_explanation2.py
    │   ├── feature_hashing_explanation_nb.ipynb
    │   └── feature_hashing_test1.py
    └── sparse
    │   └── benchmark_sparse.py
├── LICENSE.md
├── README.md
├── figures
    ├── array_allocation.graffle
    ├── bandwidth.csv
    ├── bandwidth.py
    ├── dict_probing.py
    ├── diffusion.py
    ├── diffusion_1d.py
    ├── hash-set-theory-example1.graffle
    ├── hash_function.graffle
    ├── hll_single_reg.py
    ├── kmv.py
    ├── list_overallocation.py
    ├── list_resize.graffle
    ├── matrix_method_speed.csv
    ├── matrix_method_speed.py
    ├── memory_types.py
    ├── memory_types_data.csv
    ├── norm_squared.py
    ├── processor.csv
    ├── processor_clock.py
    └── serial_vs_concurrent.graffle
└── fix_cpu_modes.sh


/01_understanding/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/01_understanding/.notempty


--------------------------------------------------------------------------------
/01_understanding/check_prime.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | 
 4 | def check_prime(number):
 5 |     sqrt_number = math.sqrt(number)
 6 |     for i in range(2, int(sqrt_number) + 1):
 7 |         if (number / i).is_integer():
 8 |             return False
 9 |     return True
10 | 
11 | 
12 | print(f"check_prime(10,000,000) = {check_prime(10_000_000)}")
13 | # check_prime(10,000,000) = False
14 | print(f"check_prime(10,000,019) = {check_prime(10_000_019)}")
15 | # check_prime(10,000,019) = True
16 | 


--------------------------------------------------------------------------------
/01_understanding/reducing_operations.py:
--------------------------------------------------------------------------------
 1 | import timeit
 2 | 
 3 | 
 4 | def search_fast(haystack, needle):
 5 |     for item in haystack:
 6 |         if item == needle:
 7 |             return True
 8 |     return False
 9 | 
10 | 
11 | def search_slow(haystack, needle):
12 |     return_value = False
13 |     for item in haystack:
14 |         if item == needle:
15 |             return_value = True
16 |     return return_value
17 | 
18 | 
19 | def search_unknown1(haystack, needle):
20 |     return any((item == needle for item in haystack))
21 | 
22 | 
23 | def search_unknown2(haystack, needle):
24 |     return any([item == needle for item in haystack])
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     iterations = 10000
29 |     haystack = list(range(1000))
30 |     setup = "from __main__ import (haystack, needle, search_fast, search_slow)"
31 | 
32 |     needle = 5
33 |     print(
34 |         f"Testing search speed with {len(haystack)} items and needle close to the head of the list"
35 |     )
36 | 
37 |     t = timeit.timeit(
38 |         stmt="search_fast(haystack, needle)", setup=setup, number=iterations
39 |     )
40 |     print(f"search_fast time: {t/iterations:.5e}")
41 | 
42 |     t = timeit.timeit(
43 |         stmt="search_slow(haystack, needle)", setup=setup, number=iterations
44 |     )
45 |     print(f"search_slow time: {t/iterations:.5e}")
46 | 
47 |     needle = len(haystack) - 10
48 |     print(
49 |         f"Testing search speed with {len(haystack)} items and needle close to the tail of the list"
50 |     )
51 | 
52 |     t = timeit.timeit(
53 |         stmt="search_fast(haystack, needle)", setup=setup, number=iterations
54 |     )
55 |     print(f"search_fast time: {t/iterations:.5e}")
56 | 
57 |     t = timeit.timeit(
58 |         stmt="search_slow(haystack, needle)", setup=setup, number=iterations
59 |     )
60 |     print(f"search_slow time: {t/iterations:.5e}")
61 | 


--------------------------------------------------------------------------------
/02_profiling/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/02_profiling/.notempty


--------------------------------------------------------------------------------
/02_profiling/utility.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def test_some_fn():
 4 |     """Check basic behaviours for our function"""
 5 |     assert some_fn(2) == 4
 6 |     assert some_fn(1) == 1
 7 |     assert some_fn(-1) == 1
 8 | 
 9 | 
10 | # check for line_profiler or memory_profiler in the local scope, both
11 | # are injected by their respective tools or they're absent
12 | # if these tools aren't being used (in which case we need to substite
13 | # a dummy @profile decorator)
14 | if 'line_profiler' not in dir() and 'profile' not in dir():
15 |     def profile(func):
16 |         def inner(*args, **kwargs):
17 |             return func(*args, **kwargs)
18 |         return inner
19 | 
20 | @profile
21 | def some_fn(useful_input):
22 |     """An expensive function that we wish to both test and profile"""
23 |     # artificial 'we're doing something clever and expensive' delay
24 |     time.sleep(1) 
25 |     return useful_input ** 2
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     print(f"Example call `some_fn(2)` == {some_fn(2)}")
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/03_lists_tuples/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/03_lists_tuples/.notempty


--------------------------------------------------------------------------------
/03_lists_tuples/binary_search.py:
--------------------------------------------------------------------------------
 1 | import timeit
 2 | 
 3 | 
 4 | def binary_search(needle, haystack):
 5 |     # imin and imax store the bounds of the haystack that we are currently
 6 |     # considering.  This starts as the bounds of the haystack and slowly
 7 |     # converges to surround the needle.
 8 |     imin, imax = 0, len(haystack)
 9 |     while True:
10 |         if imin >= imax:
11 |             return -1
12 |         midpoint = (imin + imax) // 2
13 |         if haystack[midpoint] > needle:
14 |             imax = midpoint
15 |         elif haystack[midpoint] < needle:
16 |             imin = midpoint + 1
17 |         else:
18 |             return midpoint
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     setup = "from __main__ import (binary_search, haystack, needle)"
23 |     iterations = 10000
24 | 
25 |     for haystack_size in (10000, 100000, 1000000):
26 |         haystack = range(haystack_size)
27 |         for needle in (1, 6000, 9000, 1000000):
28 |             index = binary_search(needle, haystack)
29 |             t = timeit.timeit(
30 |                 stmt="binary_search(needle, haystack)", setup=setup, number=iterations
31 |             )
32 |             print(
33 |                 f"Value {needle: <8} found in haystack of "
34 |                 f"size {len(haystack): <8} at index "
35 |                 f"{index: <8} in {t/iterations:.5e} seconds"
36 |             )
37 | 


--------------------------------------------------------------------------------
/03_lists_tuples/binary_vs_linear_search.py:
--------------------------------------------------------------------------------
 1 | import timeit
 2 | 
 3 | from binary_search import binary_search
 4 | from linear_search import linear_search
 5 | 
 6 | 
 7 | def time_and_log(function, needle, haystack):
 8 |     index = function(needle, haystack)
 9 |     t = timeit.timeit(
10 |         stmt=f"{function.__name__}(needle, haystack)", setup=setup, number=iterations
11 |     )
12 |     print(
13 |         f"[{function.__name__}] Value {needle: <8} found in haystack of "
14 |         f"size {len(haystack): <8} at index "
15 |         f"{index: <8} in {t/iterations:.5e} seconds"
16 |     )
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     setup = "from __main__ import " "(binary_search, linear_search, haystack, needle)"
21 |     iterations = 1000
22 | 
23 |     for haystack_size in (10000, 100000, 1000000):
24 |         haystack = range(haystack_size)
25 |         for needle in (1, 6000, 9000, 1000000):
26 |             time_and_log(linear_search, needle, haystack)
27 |             time_and_log(binary_search, needle, haystack)
28 | 


--------------------------------------------------------------------------------
/03_lists_tuples/bisect_example.py:
--------------------------------------------------------------------------------
 1 | import bisect
 2 | import random
 3 | 
 4 | 
 5 | def find_closest(haystack, needle):
 6 |     # bisect.bisect_left will return the first value in the haystack
 7 |     # that is greater than the needle
 8 |     i = bisect.bisect_left(haystack, needle)
 9 |     if i == len(haystack):
10 |         return i - 1
11 |     elif haystack[i] == needle:
12 |         return i
13 |     elif i > 0:
14 |         j = i - 1
15 |         # since we know the value is larger than needle (and vice versa for the
16 |         # value at j), we don't need to use absolute values here
17 |         if haystack[i] - needle > needle - haystack[j]:
18 |             return j
19 |     return i
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     important_numbers = []
24 |     for i in range(10):
25 |         new_number = random.randint(0, 1000)
26 |         bisect.insort(important_numbers, new_number)
27 | 
28 |     # important_numbers will already be in order because we inserted new elements
29 |     # with bisect.insort
30 |     print(important_numbers)
31 |     # > [14, 265, 496, 661, 683, 734, 881, 892, 973, 992]
32 | 
33 |     closest_index = find_closest(important_numbers, -250)
34 |     print(f"Closest value to -250: {important_numbers[closest_index]}")
35 |     # > Closest value to -250: 14
36 | 
37 |     closest_index = find_closest(important_numbers, 500)
38 |     print(f"Closest value to 500: {important_numbers[closest_index]}")
39 |     # > Closest value to 500: 496
40 | 
41 |     closest_index = find_closest(important_numbers, 1100)
42 |     print(f"Closest value to 1100: {important_numbers[closest_index]}")
43 |     # > Closest value to 1100: 992
44 | 


--------------------------------------------------------------------------------
/03_lists_tuples/linear_search.py:
--------------------------------------------------------------------------------
 1 | import timeit
 2 | 
 3 | 
 4 | def linear_search(needle, array):
 5 |     for i, item in enumerate(array):
 6 |         if item == needle:
 7 |             return i
 8 |     return -1
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     setup = "from __main__ import (linear_search, haystack, needle)"
13 |     iterations = 1000
14 | 
15 |     for haystack_size in (10000, 100000, 1000000):
16 |         haystack = range(haystack_size)
17 |         for needle in (1, 6000, 9000, 1000000):
18 |             index = linear_search(needle, haystack)
19 |             t = timeit.timeit(
20 |                 stmt="linear_search(needle, haystack)", setup=setup, number=iterations
21 |             )
22 |             print(
23 |                 f"Value {needle: <8} found in haystack of "
24 |                 f"size {len(haystack): <8} at index "
25 |                 f"{index: <8} in {t/iterations:.5e} seconds"
26 |             )
27 | 


--------------------------------------------------------------------------------
/04_dict_set/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/04_dict_set/.notempty


--------------------------------------------------------------------------------
/04_dict_set/custom_hash_function.py:
--------------------------------------------------------------------------------
 1 | class City(str):
 2 |     def __hash__(self):
 3 |         return ord(self[0])
 4 | 
 5 | 
 6 | if __name__ == "__main__":
 7 |     print(
 8 |         "Adding Rome, San Francisco, New York and Barcelona to a set.  New York and Barcenlona will collide!"
 9 |     )
10 |     # We create a dictionary where we assign arbitrary values to cities
11 |     data = {
12 |         City("Rome"): "Italy",
13 |         City("San Francisco"): "USA",
14 |         City("New York"): "USA",
15 |         City("Barcelona"): "Spain",
16 |     }
17 | 


--------------------------------------------------------------------------------
/04_dict_set/custom_vs_default_hash.py:
--------------------------------------------------------------------------------
 1 | class Point(object):
 2 |     def __init__(self, x, y):
 3 |         self.x, self.y = x, y
 4 | 
 5 | 
 6 | class PointHash(object):
 7 |     def __init__(self, x, y):
 8 |         self.x, self.y = x, y
 9 | 
10 |     def __hash__(self):
11 |         return hash((self.x, self.y))
12 | 
13 |     def __eq__(self, other):
14 |         return self.x == other.x and self.y == other.y
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     print("Test with default hash function")
19 |     p1 = Point(1, 1)
20 |     p2 = Point(1, 1)
21 |     points = set([p1, p2])
22 |     print("Contents of set([p1, p2]): ", points)
23 |     print("Point(1, 1) in set([p1, p2]) = ", (Point(1, 1) in points))
24 | 
25 |     print("Test with custom hash function")
26 |     p1 = PointHash(1, 1)
27 |     p2 = PointHash(1, 1)
28 |     points = set([p1, p2])
29 |     print("Contents of set([p1, p2]): ", points)
30 |     print("Point(1, 1) in set([p1, p2]) = ", (PointHash(1, 1) in points))
31 | 


--------------------------------------------------------------------------------
/04_dict_set/dict_probing.py:
--------------------------------------------------------------------------------
 1 | from itertools import islice
 2 | 
 3 | 
 4 | def index_sequence(key, mask=0b111, PERTURB_SHIFT=5):
 5 |     perturb = hash(key)
 6 |     i = perturb & mask
 7 |     yield i
 8 |     while True:
 9 |         perturb >>= PERTURB_SHIFT
10 |         i = (i * 5 + perturb + 1) & mask
11 |         yield i
12 | 
13 | 
14 | class ForceHash(object):
15 |     def __init__(self, force_hash):
16 |         self.force_hash = force_hash
17 | 
18 |     def __hash__(self):
19 |         return self.force_hash
20 | 
21 |     def __repr__(self):
22 |         return f"<ForceHash 0b{self.force_hash:08b}>"
23 | 
24 | 
25 | def sample_probe(force_hash, num_samples=10):
26 |     probe_values = index_sequence(force_hash)
27 |     indexes = islice(probe_values, num_samples)
28 |     print(f"First {num_samples} samples for hash {force_hash}: {list(indexes)}")
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     sample_probe(ForceHash(0b00000111))
33 |     sample_probe(ForceHash(0b11100111))
34 |     sample_probe(ForceHash(0b01110111))
35 |     sample_probe(ForceHash(0b01110001))
36 |     sample_probe(ForceHash(0b01110000))
37 | 


--------------------------------------------------------------------------------
/04_dict_set/namespace.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from math import sin
 3 | 
 4 | 
 5 | def test1(x):
 6 |     """
 7 |     >>> %timeit test1(123_456)
 8 |     162 µs ± 3.82 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
 9 |     """
10 |     res = 1
11 |     for _ in range(1000):
12 |         res += math.sin(x)
13 |     return res
14 | 
15 | 
16 | def test2(x):
17 |     """
18 |     >>> %timeit test2(123_456)
19 |     124 µs ± 6.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
20 |     """
21 |     res = 1
22 |     for _ in range(1000):
23 |         res += sin(x)
24 |     return res
25 | 
26 | 
27 | def test3(x, sin=math.sin):
28 |     """
29 |     >>> %timeit test3(123_456)
30 |     105 µs ± 3.35 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
31 |     """
32 |     res = 1
33 |     for _ in range(1000):
34 |         res += sin(x)
35 |     return res
36 | 


--------------------------------------------------------------------------------
/04_dict_set/timing_hash_function.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import timeit
 3 | 
 4 | 
 5 | class BadHash(str):
 6 |     def __hash__(self):
 7 |         return 42
 8 | 
 9 | 
10 | class GoodHash(str):
11 |     def __hash__(self):
12 |         """
13 |         This is a slightly optimized version of twoletter_hash
14 |         """
15 |         return ord(self[1]) + 26 * ord(self[0]) - 2619
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     baddict = set()
20 |     gooddict = set()
21 |     for i in string.ascii_lowercase:
22 |         for j in string.ascii_lowercase:
23 |             key = i + j
24 |             baddict.add(BadHash(key))
25 |             gooddict.add(GoodHash(key))
26 | 
27 |     badtime = timeit.repeat(
28 |         "key in baddict",
29 |         setup="from __main__ import baddict, BadHash; key = BadHash('zz')",
30 |         repeat=3,
31 |         number=100_000,
32 |     )
33 |     goodtime = timeit.repeat(
34 |         "key in gooddict",
35 |         setup="from __main__ import gooddict, GoodHash; key = GoodHash('zz')",
36 |         repeat=3,
37 |         number=100_000,
38 |     )
39 | 
40 |     print(f"Min lookup time for baddict: {min(badtime)}")
41 |     print(f"Min lookup time for gooddict: {min(goodtime)}")
42 | 
43 |     # Results:
44 |     #   Min lookup time for baddict: 17.719061855008476
45 |     #   Min lookup time for gooddict: 0.42408075400453527
46 | 


--------------------------------------------------------------------------------
/04_dict_set/unique_lookup.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import string
 3 | import timeit
 4 | 
 5 | 
 6 | def list_unique_names(phonebook):
 7 |     unique_names = []
 8 |     for name, phonenumber in phonebook:
 9 |         first_name, last_name = name.split(" ", 1)
10 |         for unique in unique_names:
11 |             if unique == first_name:
12 |                 break
13 |         else:
14 |             unique_names.append(first_name)
15 |     return len(unique_names)
16 | 
17 | 
18 | def set_unique_names(phonebook):
19 |     unique_names = set()
20 |     for name, phonenumber in phonebook:
21 |         first_name, last_name = name.split(" ", 1)
22 |         unique_names.add(first_name)
23 |     return len(unique_names)
24 | 
25 | 
26 | def random_name():
27 |     first_name = "".join(random.sample(string.ascii_letters, 8))
28 |     last_name = "".join(random.sample(string.ascii_letters, 8))
29 |     return "{} {}".format(first_name, last_name)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     phonebook = [("John Doe", "555-555-5555"), ("Albert Einstein", "212-555-5555")]
34 | 
35 |     print("Number of unique names from set method:", set_unique_names(phonebook))
36 |     print("Number of unique names from list method:", list_unique_names(phonebook))
37 | 
38 |     setup = (
39 |         "from __main__ import (large_phonebook, set_unique_names, list_unique_names)"
40 |     )
41 |     iterations = 50
42 |     large_phonebook = [(random_name(), "555-555-5555") for i in range(1000)]
43 | 
44 |     t = timeit.timeit(
45 |         stmt="list_unique_names(large_phonebook)", setup=setup, number=iterations
46 |     )
47 |     print(
48 |         f"Finding unique names in a phonebook of length {len(large_phonebook)} "
49 |         f"using lists took: {t / iterations:2e} seconds"
50 |     )
51 | 
52 |     t = timeit.timeit(
53 |         stmt="set_unique_names(large_phonebook)", setup=setup, number=iterations
54 |     )
55 |     print(
56 |         f"Finding unique names in a phonebook of length {len(large_phonebook)} "
57 |         f"using sets took: {t / iterations:2e} seconds"
58 |     )
59 | 


--------------------------------------------------------------------------------
/05_iterators/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/05_iterators/.notempty


--------------------------------------------------------------------------------
/05_iterators/fibonacci.py:
--------------------------------------------------------------------------------
 1 | import timeit
 2 | 
 3 | 
 4 | def fibonacci_list(num_items):
 5 |     numbers = []
 6 |     a, b = 0, 1
 7 |     while len(numbers) < num_items:
 8 |         numbers.append(a)
 9 |         a, b = b, a + b
10 |     return numbers
11 | 
12 | 
13 | def fibonacci_gen(num_items):
14 |     a, b = 0, 1
15 |     while num_items:
16 |         yield a
17 |         a, b = b, a + b
18 |         num_items -= 1
19 | 
20 | 
21 | def test_fibonacci(func, N):
22 |     for i in func(N):
23 |         pass
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     setup = "from __main__ import " "(test_fibonacci, fibonacci_gen, fibonacci_list, N)"
28 |     iterations = 1000
29 | 
30 |     for N in (2, 100, 1_000, 100_00):
31 |         t = timeit.timeit(
32 |             stmt=f"test_fibonacci(fibonacci_list, N)", setup=setup, number=iterations
33 |         )
34 |         print(
35 |             f"fibonacci_list took {t / iterations:.5e}s to calculate {N} fibonacci numbers"
36 |         )
37 | 
38 |         t = timeit.timeit(
39 |             stmt=f"test_fibonacci(fibonacci_gen, N)", setup=setup, number=iterations
40 |         )
41 |         print(
42 |             f"fibonacci_gen took {t / iterations:.5e}s to calculate {N} fibonacci numbers"
43 |         )
44 | 


--------------------------------------------------------------------------------
/05_iterators/lazy_data_analysis.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from itertools import count, filterfalse, groupby, islice
 3 | from random import normalvariate, randint
 4 | 
 5 | from scipy.stats import normaltest
 6 | 
 7 | 
 8 | def read_data(filename):
 9 |     with open(filename) as fd:
10 |         for line in fd:
11 |             data = line.strip().split(",")
12 |             timestamp, value = map(int, data)
13 |             yield datetime.fromtimestamp(timestamp), value
14 | 
15 | 
16 | def read_fake_data(filename):
17 |     for timestamp in count():
18 |         #  We insert an anomalous data point approximately once a week
19 |         if randint(0, 7 * 60 * 60 * 24 - 1) == 1:
20 |             value = normalvariate(0, 1)
21 |         else:
22 |             value = 100
23 |         yield datetime.fromtimestamp(timestamp), value
24 | 
25 | 
26 | def groupby_day(iterable):
27 |     key = lambda row: row[0].day
28 |     for day, data_group in groupby(iterable, key):
29 |         yield list(data_group)
30 | 
31 | 
32 | def is_normal(data, threshold=1e-3):
33 |     _, values = zip(*data)
34 |     k2, p_value = normaltest(values)
35 |     if p_value < threshold:
36 |         return False
37 |     return True
38 | 
39 | 
40 | def filter_anomalous_groups(data):
41 |     yield from filterfalse(is_normal, data)
42 | 
43 | 
44 | def filter_anomalous_data(data):
45 |     data_group = groupby_day(data)
46 |     yield from filter_anomalous_groups(data_group)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     data = read_fake_data("fake_filename")
51 |     anomaly_generator = filter_anomalous_data(data)
52 |     first_five_anomalies = islice(anomaly_generator, 5)
53 | 
54 |     for data_anomaly in first_five_anomalies:
55 |         start_date = data_anomaly[0][0]
56 |         end_date = data_anomaly[-1][0]
57 |         print(f"Anomaly from {start_date} - {end_date}")
58 | 


--------------------------------------------------------------------------------
/06_matrix/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/.notempty


--------------------------------------------------------------------------------
/06_matrix/Makefile:
--------------------------------------------------------------------------------
 1 | SCRIPTS  := $(sort $(filter-out _%.py, $(wildcard *.py)))
 2 | PERF     := $(patsubst %.py, perf/%.perf, $(SCRIPTS))
 3 | TIME     := $(patsubst %.py, time/%.time, $(SCRIPTS))
 4 | MEMIT    := $(patsubst %.py, memit/%.memit, $(SCRIPTS))
 5 | KERNPROF := $(patsubst %.py, kernprof/%.kernprof, $(SCRIPTS))
 6 | 
 7 | all: $(PERF) $(TIME) $(MEMIT) $(KERNPROF)
 8 | 
 9 | perf: $(PERF)
10 | 
11 | time: $(TIME)
12 | 
13 | memit: $(MEMIT)
14 | 
15 | kernprof: $(KERNPROF)
16 | 
17 | %.kernprof: %.py
18 | 	@echo "lineprof-izing $<"
19 | 	mkdir kernprof
20 | 	kernprof -l -v $< > $@ 2>&1
21 | 
22 | %.memit: %.py
23 | 	@echo "%memit-izing $<"
24 | 	mkdir memit
25 | 	python -m memory_profiler $< > $@ 2>&1
26 | 
27 | %.time: %.py
28 | 	@echo "Timing $<"
29 | 	mkdir time
30 | 	time -v python $< > $@ 2>&1
31 | 
32 | %.perf: %.py
33 | 	@echo "Perfiling $<"
34 | 	mkdir perf
35 | 	@perf stat -e cycles,instructions,cache-references,cache-misses,branches,branch-misses,task-clock,faults,page-fault,minor-faults,cs,migrations python $< 2>&1 | sed 's/(\([0-9.]*%\))//g' > $@
36 | 	@grep 'seconds time' $@ | column -t | cut -f1 -d' '
37 | 	@grep 'cache-misses' $@ | column -t
38 | 	@echo ''
39 | 
40 | clean:
41 | 	rm -rf $(PERF) $(TIME) $(MEMIT) $(KERNPROF)
42 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/Makefile:
--------------------------------------------------------------------------------
1 | include ../../Makefile
2 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/README.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | $ python benchmark.py
 3 | Grid size:  (256, 256)
 4 | Pure Python: 1.22s (1.218498e-01s per iteration)
 5 | python+memory: 1.19s (1.186913e-01s per iteration)[1.03x speedup]
 6 | numpy: 0.02s (1.638103e-03s per iteration)[74.38x speedup]
 7 | numpy+memory: 0.01s (1.490402e-03s per iteration)[81.76x speedup]
 8 | numpy+memory2: 0.01s (7.136822e-04s per iteration)[170.73x speedup]
 9 | numpy+memory+scipy: 0.02s (1.522303e-03s per iteration)[80.04x speedup]
10 | 
11 | Grid size:  (512, 512)
12 | Pure Python: 4.89s (4.889611e-01s per iteration)
13 | python+memory: 4.64s (4.643779e-01s per iteration)[1.05x speedup]
14 | numpy: 0.15s (1.469820e-02s per iteration)[33.27x speedup]
15 | numpy+memory: 0.11s (1.104362e-02s per iteration)[44.28x speedup]
16 | numpy+memory2: 0.04s (3.523612e-03s per iteration)[138.77x speedup]
17 | numpy+memory+scipy: 0.08s (8.366203e-03s per iteration)[58.44x speedup]
18 | 
19 | Grid size:  (1024, 1024)
20 | Pure Python: 20.76s (2.075953e+00s per iteration)
21 | python+memory: 20.60s (2.059773e+00s per iteration)[1.01x speedup]
22 | numpy: 0.55s (5.520298e-02s per iteration)[37.61x speedup]
23 | numpy+memory: 0.40s (4.010251e-02s per iteration)[51.77x speedup]
24 | numpy+memory2: 0.17s (1.718290e-02s per iteration)[120.82x speedup]
25 | numpy+memory+scipy: 0.52s (5.219860e-02s per iteration)[39.77x speedup]
26 | ```
27 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_numpy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import timeit
 4 | 
 5 | from numpy import roll, zeros
 6 | 
 7 | try:
 8 |     profile
 9 | except NameError:
10 |     profile = lambda x: x
11 | 
12 | grid_shape = (2048, 2048)
13 | 
14 | 
15 | def laplacian(grid):
16 |     return (
17 |         roll(grid, +1, 0)
18 |         + roll(grid, -1, 0)
19 |         + roll(grid, +1, 1)
20 |         + roll(grid, -1, 1)
21 |         - 4 * grid
22 |     )
23 | 
24 | 
25 | @profile
26 | def evolve(grid, dt, D=1):
27 |     return grid + dt * D * laplacian(grid)
28 | 
29 | 
30 | def run_experiment(num_iterations):
31 |     grid = zeros(grid_shape)
32 | 
33 |     block_low = int(grid_shape[0] * 0.4)
34 |     block_high = int(grid_shape[0] * 0.5)
35 |     grid[block_low:block_high, block_low:block_high] = 0.005
36 | 
37 |     for i in range(num_iterations):
38 |         grid = evolve(grid, 0.1)
39 |     return grid
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     n_iter = 100
44 |     N, runtime = timeit.Timer(
45 |         f"run_experiment({n_iter})", globals=globals()
46 |     ).autorange()
47 |     print(f"Runtime with grid {grid_shape}: {runtime / N:0.4f}s")
48 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_numpy_memory.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | import numpy as np
 6 | 
 7 | try:
 8 |     profile
 9 | except NameError:
10 |     profile = lambda x: x
11 | 
12 | grid_shape = (640, 640)
13 | 
14 | 
15 | def laplacian(grid, out):
16 |     np.copyto(out, grid)
17 |     out *= -4
18 |     out += np.roll(grid, +1, 0)
19 |     out += np.roll(grid, -1, 0)
20 |     out += np.roll(grid, +1, 1)
21 |     out += np.roll(grid, -1, 1)
22 | 
23 | 
24 | @profile
25 | def evolve(grid, dt, out, D=1):
26 |     laplacian(grid, out)
27 |     out *= D * dt
28 |     out += grid
29 | 
30 | 
31 | def run_experiment(num_iterations):
32 |     scratch = np.zeros(grid_shape)
33 |     grid = np.zeros(grid_shape)
34 | 
35 |     block_low = int(grid_shape[0] * 0.4)
36 |     block_high = int(grid_shape[0] * 0.5)
37 |     grid[block_low:block_high, block_low:block_high] = 0.005
38 | 
39 |     start = time.time()
40 |     for i in range(num_iterations):
41 |         evolve(grid, 0.1, scratch)
42 |         grid, scratch = scratch, grid
43 |     return time.time() - start
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     run_experiment(500)
48 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_numpy_memory2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | from numpy import add, copyto, multiply, zeros
 6 | 
 7 | try:
 8 |     profile
 9 | except NameError:
10 |     profile = lambda x: x
11 | 
12 | grid_shape = (640, 640)
13 | 
14 | 
15 | def roll_add(rollee, shift, axis, out):
16 |     if shift == 1 and axis == 0:
17 |         out[1:, :] += rollee[:-1, :]
18 |         out[0, :] += rollee[-1, :]
19 |     elif shift == -1 and axis == 0:
20 |         out[:-1, :] += rollee[1:, :]
21 |         out[-1, :] += rollee[0, :]
22 |     elif shift == 1 and axis == 1:
23 |         out[:, 1:] += rollee[:, :-1]
24 |         out[:, 0] += rollee[:, -1]
25 |     elif shift == -1 and axis == 1:
26 |         out[:, :-1] += rollee[:, 1:]
27 |         out[:, -1] += rollee[:, 0]
28 | 
29 | 
30 | def laplacian(grid, out):
31 |     copyto(out, grid)
32 |     multiply(out, -4.0, out)
33 |     roll_add(grid, +1, 0, out)
34 |     roll_add(grid, -1, 0, out)
35 |     roll_add(grid, +1, 1, out)
36 |     roll_add(grid, -1, 1, out)
37 | 
38 | 
39 | @profile
40 | def evolve(grid, dt, out, D=1):
41 |     laplacian(grid, out)
42 |     multiply(out, D * dt, out)
43 |     add(out, grid, out)
44 | 
45 | 
46 | def run_experiment(num_iterations):
47 |     scratch = zeros(grid_shape)
48 |     grid = zeros(grid_shape)
49 | 
50 |     block_low = int(grid_shape[0] * 0.4)
51 |     block_high = int(grid_shape[0] * 0.5)
52 |     grid[block_low:block_high, block_low:block_high] = 0.005
53 | 
54 |     start = time.time()
55 |     for i in range(num_iterations):
56 |         evolve(grid, 0.1, scratch)
57 |         grid, scratch = scratch, grid
58 |     return time.time() - start
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     run_experiment(500)
63 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_numpy_memory2_numexpr.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | import numexpr as ne
 6 | from numpy import copyto, multiply, zeros
 7 | 
 8 | try:
 9 |     profile
10 | except NameError:
11 |     profile = lambda x: x
12 | 
13 | grid_shape = (640, 640)
14 | 
15 | 
16 | def roll_add(rollee, shift, axis, out):
17 |     if shift == 1 and axis == 0:
18 |         out[1:, :] += rollee[:-1, :]
19 |         out[0, :] += rollee[-1, :]
20 |     elif shift == -1 and axis == 0:
21 |         out[:-1, :] += rollee[1:, :]
22 |         out[-1, :] += rollee[0, :]
23 |     elif shift == 1 and axis == 1:
24 |         out[:, 1:] += rollee[:, :-1]
25 |         out[:, 0] += rollee[:, -1]
26 |     elif shift == -1 and axis == 1:
27 |         out[:, :-1] += rollee[:, 1:]
28 |         out[:, -1] += rollee[:, 0]
29 | 
30 | 
31 | def laplacian(grid, out):
32 |     copyto(out, grid)
33 |     multiply(out, -4.0, out)
34 |     roll_add(grid, +1, 0, out)
35 |     roll_add(grid, -1, 0, out)
36 |     roll_add(grid, +1, 1, out)
37 |     roll_add(grid, -1, 1, out)
38 | 
39 | 
40 | @profile
41 | def evolve(grid, dt, out, D=1):
42 |     laplacian(grid, out)
43 |     ne.evaluate("out*D*dt+grid", out=out)
44 | 
45 | 
46 | def run_experiment(num_iterations):
47 |     scratch = zeros(grid_shape)
48 |     grid = zeros(grid_shape)
49 | 
50 |     block_low = int(grid_shape[0] * 0.4)
51 |     block_high = int(grid_shape[0] * 0.5)
52 |     grid[block_low:block_high, block_low:block_high] = 0.005
53 | 
54 |     start = time.time()
55 |     for i in range(num_iterations):
56 |         evolve(grid, 0.1, scratch)
57 |         grid, scratch = scratch, grid
58 |     return time.time() - start
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     run_experiment(500)
63 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_numpy_memory2_numexpr_single.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | from numexpr import evaluate, set_num_threads
 6 | from numpy import copyto, multiply, zeros
 7 | 
 8 | try:
 9 |     profile
10 | except NameError:
11 |     profile = lambda x: x
12 | 
13 | grid_shape = (640, 640)
14 | 
15 | 
16 | def roll_add(rollee, shift, axis, out):
17 |     if shift == 1 and axis == 0:
18 |         out[1:, :] += rollee[:-1, :]
19 |         out[0, :] += rollee[-1, :]
20 |     elif shift == -1 and axis == 0:
21 |         out[:-1, :] += rollee[1:, :]
22 |         out[-1, :] += rollee[0, :]
23 |     elif shift == 1 and axis == 1:
24 |         out[:, 1:] += rollee[:, :-1]
25 |         out[:, 0] += rollee[:, -1]
26 |     elif shift == -1 and axis == 1:
27 |         out[:, :-1] += rollee[:, 1:]
28 |         out[:, -1] += rollee[:, 0]
29 | 
30 | 
31 | def laplacian(grid, out):
32 |     copyto(out, grid)
33 |     multiply(out, -4.0, out)
34 |     roll_add(grid, +1, 0, out)
35 |     roll_add(grid, -1, 0, out)
36 |     roll_add(grid, +1, 1, out)
37 |     roll_add(grid, -1, 1, out)
38 | 
39 | 
40 | @profile
41 | def evolve(grid, dt, out, D=1):
42 |     laplacian(grid, out)
43 |     evaluate("out*D*dt+grid", out=out)
44 | 
45 | 
46 | def run_experiment(num_iterations):
47 |     previous_threads = set_num_threads(1)
48 | 
49 |     scratch = zeros(grid_shape)
50 |     grid = zeros(grid_shape)
51 | 
52 |     block_low = int(grid_shape[0] * 0.4)
53 |     block_high = int(grid_shape[0] * 0.5)
54 |     grid[block_low:block_high, block_low:block_high] = 0.005
55 | 
56 |     start = time.time()
57 |     for i in range(num_iterations):
58 |         evolve(grid, 0.1, scratch)
59 |         grid, scratch = scratch, grid
60 | 
61 |     set_num_threads(previous_threads)
62 |     return time.time() - start
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     run_experiment(500)
67 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_numpy_naive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | from numpy import roll, zeros
 6 | 
 7 | try:
 8 |     profile
 9 | except NameError:
10 |     profile = lambda x: x
11 | 
12 | grid_shape = (640, 640)
13 | 
14 | 
15 | def laplacian(grid):
16 |     return (
17 |         roll(grid, +1, 0)
18 |         + roll(grid, -1, 0)
19 |         + roll(grid, +1, 1)
20 |         + roll(grid, -1, 1)
21 |         - 4 * grid
22 |     )
23 | 
24 | 
25 | @profile
26 | def evolve(grid, dt, D=1):
27 |     return grid + dt * D * laplacian(grid)
28 | 
29 | 
30 | def run_experiment(num_iterations):
31 |     grid = zeros(grid_shape)
32 | 
33 |     block_low = int(grid_shape[0] * 0.4)
34 |     block_high = int(grid_shape[0] * 0.5)
35 |     grid[block_low:block_high, block_low:block_high] = 0.005
36 | 
37 |     start = time.time()
38 |     for i in range(num_iterations):
39 |         grid = evolve(grid, 0.1)
40 |     return time.time() - start
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     run_experiment(500)
45 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_python.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | try:
 6 |     profile
 7 | except NameError:
 8 |     profile = lambda x: x
 9 | 
10 | grid_shape = (640, 640)
11 | 
12 | 
13 | @profile
14 | def evolve(grid, dt, D=1.0):
15 |     xmax, ymax = grid_shape
16 |     new_grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])]
17 |     for i in range(xmax):
18 |         for j in range(ymax):
19 |             grid_xx = (
20 |                 grid[(i + 1) % xmax][j] + grid[(i - 1) % xmax][j] - 2.0 * grid[i][j]
21 |             )
22 |             grid_yy = (
23 |                 grid[i][(j + 1) % ymax] + grid[i][(j - 1) % ymax] - 2.0 * grid[i][j]
24 |             )
25 |             new_grid[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt
26 |     return new_grid
27 | 
28 | 
29 | def run_experiment(num_iterations):
30 |     # setting up initial conditions
31 |     grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])]
32 | 
33 |     block_low = int(grid_shape[0] * 0.4)
34 |     block_high = int(grid_shape[0] * 0.5)
35 |     for i in range(block_low, block_high):
36 |         for j in range(block_low, block_high):
37 |             grid[i][j] = 0.005
38 | 
39 |     start = time.time()
40 |     for i in range(num_iterations):
41 |         grid = evolve(grid, 0.1)
42 |     return time.time() - start
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     run_experiment(500)
47 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_python_memory.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | try:
 6 |     profile
 7 | except NameError:
 8 |     profile = lambda x: x
 9 | 
10 | grid_shape = (640, 640)
11 | 
12 | 
13 | @profile
14 | def evolve(grid, dt, out, D=1.0):
15 |     xmax, ymax = grid_shape
16 |     for i in range(xmax):
17 |         for j in range(ymax):
18 |             grid_xx = (
19 |                 grid[(i + 1) % xmax][j] + grid[(i - 1) % xmax][j] - 2.0 * grid[i][j]
20 |             )
21 |             grid_yy = (
22 |                 grid[i][(j + 1) % ymax] + grid[i][(j - 1) % ymax] - 2.0 * grid[i][j]
23 |             )
24 |             out[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt
25 | 
26 | 
27 | def run_experiment(num_iterations):
28 |     # setting up initial conditions
29 |     scratch = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])]
30 |     grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])]
31 | 
32 |     block_low = int(grid_shape[0] * 0.4)
33 |     block_high = int(grid_shape[0] * 0.5)
34 |     for i in range(block_low, block_high):
35 |         for j in range(block_low, block_high):
36 |             grid[i][j] = 0.005
37 | 
38 |     start = time.time()
39 |     for i in range(num_iterations):
40 |         evolve(grid, 0.1, scratch)
41 |         grid, scratch = scratch, grid
42 |     return time.time() - start
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     run_experiment(500)
47 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/diffusion_scipy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | 
 5 | from numpy import add, multiply, zeros
 6 | from scipy.ndimage.filters import laplace
 7 | 
 8 | try:
 9 |     profile
10 | except NameError:
11 |     profile = lambda x: x
12 | 
13 | grid_shape = (640, 640)
14 | 
15 | 
16 | def laplacian(grid, out):
17 |     laplace(grid, out, mode="wrap")
18 | 
19 | 
20 | @profile
21 | def evolve(grid, dt, out, D=1):
22 |     laplacian(grid, out)
23 |     multiply(out, D * dt, out)
24 |     add(out, grid, grid)
25 | 
26 | 
27 | def run_experiment(num_iterations):
28 |     scratch = zeros(grid_shape)
29 |     grid = zeros(grid_shape)
30 | 
31 |     block_low = int(grid_shape[0] * 0.4)
32 |     block_high = int(grid_shape[0] * 0.5)
33 |     grid[block_low:block_high, block_low:block_high] = 0.005
34 | 
35 |     start = time.time()
36 |     for i in range(num_iterations):
37 |         evolve(grid, 0.1, scratch)
38 |     return time.time() - start
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     run_experiment(500)
43 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_numpy.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_numpy.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 2.11653 s
 5 | File: diffusion_numpy.py
 6 | Function: evolve at line 16
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     16                                           @profile
11 |     17                                           def evolve(grid, dt, D=1):
12 |     18       500    2116531.0   4233.1    100.0      return grid + dt * D * laplacian(grid)
13 | 
14 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_numpy_memory.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 1.58502 s
 5 | File: diffusion_numpy_memory.py
 6 | Function: evolve at line 21
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     21                                           @profile
11 |     22                                           def evolve(grid, dt, out, D=1):
12 |     23       500    1327910.0   2655.8     83.8      laplacian(grid, out)
13 |     24       500     100733.0    201.5      6.4      out *= D * dt
14 |     25       500     156377.0    312.8      9.9      out += grid
15 | 
16 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory2.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_numpy_memory2.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 1.26046 s
 5 | File: diffusion_numpy_memory2.py
 6 | Function: evolve at line 35
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     35                                           @profile
11 |     36                                           def evolve(grid, dt, out, D=1):
12 |     37       500    1068913.0   2137.8     84.8      laplacian(grid, out)
13 |     38       500      83140.0    166.3      6.6      multiply(out, D*dt, out)
14 |     39       500     108404.0    216.8      8.6      add(out, grid, out)
15 | 
16 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory2_numexpr.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_numpy_memory2_numexpr.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 1.33304 s
 5 | File: diffusion_numpy_memory2_numexpr.py
 6 | Function: evolve at line 36
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     36                                           @profile
11 |     37                                           def evolve(grid, dt, out, D=1):
12 |     38       500    1111160.0   2222.3     83.4      laplacian(grid, out)
13 |     39       500     221878.0    443.8     16.6      ne.evaluate("out*D*dt+grid", out=out)
14 | 
15 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_numpy_memory2_numexpr_single.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_numpy_memory2_numexpr_single.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 1.68504 s
 5 | File: diffusion_numpy_memory2_numexpr_single.py
 6 | Function: evolve at line 36
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     36                                           @profile
11 |     37                                           def evolve(grid, dt, out, D=1):
12 |     38       500    1102515.0   2205.0     65.4      laplacian(grid, out)
13 |     39       500     582525.0   1165.0     34.6      evaluate("out*D*dt+grid", out=out)
14 | 
15 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_numpy_naive.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_numpy_naive.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 2.07788 s
 5 | File: diffusion_numpy_naive.py
 6 | Function: evolve at line 16
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     16                                           @profile
11 |     17                                           def evolve(grid, dt, D=1):
12 |     18       500    2077882.0   4155.8    100.0      return grid + dt * D * laplacian(grid)
13 | 
14 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_python.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_python.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 787.161 s
 5 | File: diffusion_python.py
 6 | Function: evolve at line 12
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     12                                           @profile
11 |     13                                           def evolve(grid, dt, D=1.0):
12 |     14       500        843.0      1.7      0.0      xmax, ymax = grid_shape
13 |     15       500   24764794.0  49529.6      3.1      new_grid = [[0.0 for x in range(grid_shape[1])] for x in range(grid_shape[0])]
14 |     16    320500     208683.0      0.7      0.0      for i in range(xmax):
15 |     17 205120000  128928913.0      0.6     16.4          for j in range(ymax):
16 |     18 204800000  222422192.0      1.1     28.3              grid_xx = grid[(i+1)%xmax][j] + grid[(i-1)%xmax][j] - 2.0 * grid[i][j]
17 |     19 204800000  228660607.0      1.1     29.0              grid_yy = grid[i][(j+1)%ymax] + grid[i][(j-1)%ymax] - 2.0 * grid[i][j]
18 |     20 204800000  182174957.0      0.9     23.1              new_grid[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt
19 |     21       500        331.0      0.7      0.0      return new_grid
20 | 
21 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_python_memory.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_python_memory.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 541.138 s
 5 | File: diffusion_python_memory.py
 6 | Function: evolve at line 12
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     12                                           @profile
11 |     13                                           def evolve(grid, dt, out, D=1.0):
12 |     14       500        503.0      1.0      0.0      xmax, ymax = grid_shape
13 |     15    320500     131498.0      0.4      0.0      for i in range(xmax):
14 |     16 205120000   81105090.0      0.4     15.0          for j in range(ymax):
15 |     17 204800000  166271837.0      0.8     30.7              grid_xx = grid[(i+1)%xmax][j] + grid[(i-1)%xmax][j] - 2.0 * grid[i][j]
16 |     18 204800000  169216352.0      0.8     31.3              grid_yy = grid[i][(j+1)%ymax] + grid[i][(j-1)%ymax] - 2.0 * grid[i][j]
17 |     19 204800000  124412452.0      0.6     23.0              out[i][j] = grid[i][j] + D * (grid_xx + grid_yy) * dt
18 | 
19 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/kernprof/diffusion_scipy.kernprof:
--------------------------------------------------------------------------------
 1 | Wrote profile results to diffusion_scipy.py.lprof
 2 | Timer unit: 1e-06 s
 3 | 
 4 | Total time: 2.62417 s
 5 | File: diffusion_scipy.py
 6 | Function: evolve at line 17
 7 | 
 8 | Line #      Hits         Time  Per Hit   % Time  Line Contents
 9 | ==============================================================
10 |     17                                           @profile
11 |     18                                           def evolve(grid, dt, out, D=1):
12 |     19       500    2384139.0   4768.3     90.9      laplacian(grid, out)
13 |     20       500      93736.0    187.5      3.6      multiply(out, D*dt, out)
14 |     21       500     146298.0    292.6      5.6      add(out, grid, grid)
15 | 
16 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_numpy.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_numpy_memory.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_memory.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_numpy_memory2.memit:
--------------------------------------------------------------------------------
1 | /home/micha/.pyenv/versions/3.7.2/bin/python: No module named memory_profiler
2 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr_single.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_memory2_numexpr_single.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_numpy_naive.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_numpy_naive.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_python.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_python.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_python_memory.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_python_memory.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/memit/diffusion_scipy.memit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/06_matrix/diffusion_2d/memit/diffusion_scipy.memit


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_numpy.novec.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_numpy.py':
 3 | 
 4 |     50,086,999,350      cycles                    #    2.888 GHz                      
 5 |     53,611,608,977      instructions              #    1.07  insn per cycle           
 6 |      1,131,742,674      cache-references          #   65.266 M/sec                    
 7 |        322,483,897      cache-misses              #   28.494 % of all cache refs      
 8 |      4,001,923,035      branches                  #  230.785 M/sec                    
 9 |          6,211,101      branch-misses             #    0.16% of all branches          
10 |       17340.464580      task-clock (msec)         #    1.000 CPUs utilized          
11 |            403,193      faults                    #    0.023 M/sec                  
12 |            403,193      minor-faults              #    0.023 M/sec                  
13 |                 74      cs                        #    0.004 K/sec                  
14 |                  6      migrations                #    0.000 K/sec                  
15 | 
16 |       17.339656586 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_numpy.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_numpy.py':
 3 | 
 4 |      8,432,416,866      cycles                    #    2.886 GHz                      
 5 |      7,114,758,602      instructions              #    0.84  insn per cycle           
 6 |      1,040,831,469      cache-references          #  356.176 M/sec                    
 7 |        216,490,683      cache-misses              #   20.800 % of all cache refs      
 8 |      1,252,928,847      branches                  #  428.756 M/sec                    
 9 |          8,174,531      branch-misses             #    0.65% of all branches          
10 |        2922.239426      task-clock (msec)         #    1.285 CPUs utilized          
11 |            403,282      faults                    #    0.138 M/sec                  
12 |            403,282      minor-faults              #    0.138 M/sec                  
13 |                 96      cs                        #    0.033 K/sec                  
14 |                  5      migrations                #    0.002 K/sec                  
15 | 
16 |        2.274377105 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_numpy_memory.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_numpy_memory.py':
 3 | 
 4 |      6,880,906,446      cycles                    #    2.886 GHz                      
 5 |      5,848,134,537      instructions              #    0.85  insn per cycle           
 6 |      1,077,550,720      cache-references          #  452.000 M/sec                    
 7 |        217,974,413      cache-misses              #   20.229 % of all cache refs      
 8 |      1,028,769,315      branches                  #  431.538 M/sec                    
 9 |          7,492,245      branch-misses             #    0.73% of all branches          
10 |        2383.962679      task-clock (msec)         #    1.373 CPUs utilized          
11 |             13,521      faults                    #    0.006 M/sec                  
12 |             13,521      minor-faults              #    0.006 M/sec                  
13 |                100      cs                        #    0.042 K/sec                  
14 |                  8      migrations                #    0.003 K/sec                  
15 | 
16 |        1.736322099 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_numpy_memory2.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_numpy_memory2.py':
 3 | 
 4 |      5,971,464,515      cycles                    #    2.888 GHz                      
 5 |      5,893,131,049      instructions              #    0.99  insn per cycle           
 6 |      1,001,582,133      cache-references          #  484.398 M/sec                    
 7 |         30,840,612      cache-misses              #    3.079 % of all cache refs      
 8 |      1,038,649,694      branches                  #  502.325 M/sec                    
 9 |          7,562,009      branch-misses             #    0.73% of all branches          
10 |        2067.685884      task-clock (msec)         #    1.456 CPUs utilized          
11 |             11,981      faults                    #    0.006 M/sec                  
12 |             11,981      minor-faults              #    0.006 M/sec                  
13 |                 95      cs                        #    0.046 K/sec                  
14 |                  3      migrations                #    0.001 K/sec                  
15 | 
16 |        1.419869071 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_numpy_memory2_numexpr.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_numpy_memory2_numexpr.py':
 3 | 
 4 |      8,856,947,179      cycles                    #    2.872 GHz                      
 5 |      9,354,357,453      instructions              #    1.06  insn per cycle           
 6 |      1,077,518,384      cache-references          #  349.423 M/sec                    
 7 |         59,407,830      cache-misses              #    5.513 % of all cache refs      
 8 |      1,018,525,317      branches                  #  330.292 M/sec                    
 9 |         11,941,430      branch-misses             #    1.17% of all branches          
10 |        3083.709890      task-clock (msec)         #    1.991 CPUs utilized          
11 |             15,820      faults                    #    0.005 M/sec                  
12 |             15,820      minor-faults              #    0.005 M/sec                  
13 |              8,671      cs                        #    0.003 M/sec                  
14 |              2,096      migrations                #    0.680 K/sec                  
15 | 
16 |        1.548924090 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_numpy_memory2_numexpr_single.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_numpy_memory2_numexpr_single.py':
 3 | 
 4 |      7,123,637,362      cycles                    #    2.888 GHz                      
 5 |      9,190,434,370      instructions              #    1.29  insn per cycle           
 6 |        963,627,201      cache-references          #  390.616 M/sec                    
 7 |         34,391,313      cache-misses              #    3.569 % of all cache refs      
 8 |        996,019,993      branches                  #  403.746 M/sec                    
 9 |          9,228,406      branch-misses             #    0.93% of all branches          
10 |        2466.945415      task-clock (msec)         #    1.357 CPUs utilized          
11 |             15,799      faults                    #    0.006 M/sec                  
12 |             15,799      minor-faults              #    0.006 M/sec                  
13 |                111      cs                        #    0.045 K/sec                  
14 |                 10      migrations                #    0.004 K/sec                  
15 | 
16 |        1.818284437 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_numpy_naive.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_numpy_naive.py':
 3 | 
 4 |      8,575,098,217      cycles                    #    2.886 GHz                      
 5 |      7,103,809,843      instructions              #    0.83  insn per cycle           
 6 |      1,078,489,079      cache-references          #  362.982 M/sec                    
 7 |        230,990,178      cache-misses              #   21.418 % of all cache refs      
 8 |      1,252,191,334      branches                  #  421.444 M/sec                    
 9 |          8,694,936      branch-misses             #    0.69% of all branches          
10 |        2971.194679      task-clock (msec)         #    1.279 CPUs utilized          
11 |            403,285      faults                    #    0.136 M/sec                  
12 |            403,285      minor-faults              #    0.136 M/sec                  
13 |                 94      cs                        #    0.032 K/sec                  
14 |                  9      migrations                #    0.003 K/sec                  
15 | 
16 |        2.323691596 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_python.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_python.py':
 3 | 
 4 |    444,281,397,352      cycles                    #    2.889 GHz                      
 5 |  1,292,776,966,318      instructions              #    2.91  insn per cycle           
 6 |        902,297,518      cache-references          #    5.868 M/sec                    
 7 |        337,237,800      cache-misses              #   37.375 % of all cache refs      
 8 |    268,033,459,921      branches                  # 1743.051 M/sec                    
 9 |      1,845,293,511      branch-misses             #    0.69% of all branches          
10 |      153772.569474      task-clock (msec)         #    1.000 CPUs utilized          
11 |          1,569,793      faults                    #    0.010 M/sec                  
12 |          1,569,793      minor-faults              #    0.010 M/sec                  
13 |                294      cs                        #    0.002 K/sec                  
14 |                 11      migrations                #    0.000 K/sec                  
15 | 
16 |      153.776786975 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_python_memory.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_python_memory.py':
 3 | 
 4 |    415,864,974,126      cycles                    #    2.889 GHz                      
 5 |  1,210,522,769,388      instructions              #    2.91  insn per cycle           
 6 |        656,345,027      cache-references          #    4.560 M/sec                    
 7 |        349,562,390      cache-misses              #   53.259 % of all cache refs      
 8 |    251,537,944,600      branches                  # 1747.583 M/sec                    
 9 |      1,970,031,461      branch-misses             #    0.78% of all branches          
10 |      143934.730837      task-clock (msec)         #    1.000 CPUs utilized          
11 |             12,791      faults                    #    0.089 K/sec                  
12 |             12,791      minor-faults              #    0.089 K/sec                  
13 |                117      cs                        #    0.001 K/sec                  
14 |                  6      migrations                #    0.000 K/sec                  
15 | 
16 |      143.935522122 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/perf/diffusion_scipy.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python diffusion_scipy.py':
 3 | 
 4 |     10,051,801,725      cycles                    #    2.886 GHz                      
 5 |     16,536,981,020      instructions              #    1.65  insn per cycle           
 6 |      1,554,557,564      cache-references          #  446.405 M/sec                    
 7 |        126,627,735      cache-misses              #    8.146 % of all cache refs      
 8 |      2,673,416,633      branches                  #  767.696 M/sec                    
 9 |          9,626,762      branch-misses             #    0.36% of all branches          
10 |        3482.391211      task-clock (msec)         #    1.228 CPUs utilized          
11 |             14,013      faults                    #    0.004 M/sec                  
12 |             14,013      minor-faults              #    0.004 M/sec                  
13 |                 95      cs                        #    0.027 K/sec                  
14 |                  5      migrations                #    0.001 K/sec                  
15 | 
16 |        2.835263796 seconds time elapsed
17 | 
18 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_numpy.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_numpy.py"
 2 | 	User time (seconds): 2.02
 3 | 	System time (seconds): 0.87
 4 | 	Percent of CPU this job got: 128%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:02.25
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 39684
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 403309
14 | 	Voluntary context switches: 96
15 | 	Involuntary context switches: 26
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_numpy_memory.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_numpy_memory.py"
 2 | 	User time (seconds): 1.95
 3 | 	System time (seconds): 0.37
 4 | 	Percent of CPU this job got: 138%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.67
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 39112
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 13595
14 | 	Voluntary context switches: 101
15 | 	Involuntary context switches: 35
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_numpy_memory2.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_numpy_memory2.py"
 2 | 	User time (seconds): 1.70
 3 | 	System time (seconds): 0.40
 4 | 	Percent of CPU this job got: 144%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.45
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 35772
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 12024
14 | 	Voluntary context switches: 100
15 | 	Involuntary context switches: 37
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_numpy_memory2_numexpr.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_numpy_memory2_numexpr.py"
 2 | 	User time (seconds): 2.66
 3 | 	System time (seconds): 0.46
 4 | 	Percent of CPU this job got: 210%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.48
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 37784
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 15862
14 | 	Voluntary context switches: 9341
15 | 	Involuntary context switches: 48
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_numpy_memory2_numexpr_single.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_numpy_memory2_numexpr_single.py"
 2 | 	User time (seconds): 2.02
 3 | 	System time (seconds): 0.43
 4 | 	Percent of CPU this job got: 135%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.81
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 37660
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 15824
14 | 	Voluntary context switches: 129
15 | 	Involuntary context switches: 24
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_numpy_naive.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_numpy_naive.py"
 2 | 	User time (seconds): 1.99
 3 | 	System time (seconds): 0.91
 4 | 	Percent of CPU this job got: 128%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:02.25
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 39636
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 403287
14 | 	Voluntary context switches: 103
15 | 	Involuntary context switches: 28
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_python.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_python.py"
 2 | 	User time (seconds): 148.44
 3 | 	System time (seconds): 2.14
 4 | 	Percent of CPU this job got: 100%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 2:30.58
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 25400
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 1569834
14 | 	Voluntary context switches: 78
15 | 	Involuntary context switches: 215
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_python_memory.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_python_memory.py"
 2 | 	User time (seconds): 138.58
 3 | 	System time (seconds): 0.03
 4 | 	Percent of CPU this job got: 100%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 2:18.61
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 35088
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 12822
14 | 	Voluntary context switches: 77
15 | 	Involuntary context switches: 66
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/diffusion_2d/time/diffusion_scipy.time:
--------------------------------------------------------------------------------
 1 | 	Command being timed: "python diffusion_scipy.py"
 2 | 	User time (seconds): 3.06
 3 | 	System time (seconds): 0.40
 4 | 	Percent of CPU this job got: 123%
 5 | 	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:02.82
 6 | 	Average shared text size (kbytes): 0
 7 | 	Average unshared data size (kbytes): 0
 8 | 	Average stack size (kbytes): 0
 9 | 	Average total size (kbytes): 0
10 | 	Maximum resident set size (kbytes): 40184
11 | 	Average resident set size (kbytes): 0
12 | 	Major (requiring I/O) page faults: 0
13 | 	Minor (reclaiming a frame) page faults: 14029
14 | 	Voluntary context switches: 98
15 | 	Involuntary context switches: 24
16 | 	Swaps: 0
17 | 	File system inputs: 0
18 | 	File system outputs: 0
19 | 	Socket messages sent: 0
20 | 	Socket messages received: 0
21 | 	Signals delivered: 0
22 | 	Page size (bytes): 4096
23 | 	Exit status: 0
24 | 


--------------------------------------------------------------------------------
/06_matrix/norm/Makefile:
--------------------------------------------------------------------------------
1 | include ../../Makefile
2 | 


--------------------------------------------------------------------------------
/06_matrix/norm/norm_array.memit:
--------------------------------------------------------------------------------
1 | /usr/bin/python: No module named memory_profiler
2 | 


--------------------------------------------------------------------------------
/06_matrix/norm/norm_array.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from array import array
 3 | 
 4 | 
 5 | def norm_square_array(vector):
 6 |     norm = 0
 7 |     for v in vector:
 8 |         norm += v * v
 9 |     return norm
10 | 
11 | 
12 | def run_experiment(size, num_iter=3):
13 |     vector = array("l", list(range(size)))
14 |     times = []
15 |     for i in range(num_iter):
16 |         start = time.time()
17 |         norm_square_array(vector)
18 |         times.append(time.time() - start)
19 |     return min(times)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     print(run_experiment(1000000, 10))
24 | 


--------------------------------------------------------------------------------
/06_matrix/norm/norm_numpy.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy
 4 | 
 5 | 
 6 | def norm_square_numpy(vector):
 7 |     return numpy.sum(vector * vector)
 8 | 
 9 | 
10 | def run_experiment(size, num_iter=3):
11 |     vector = numpy.arange(size)
12 |     times = []
13 |     for i in range(num_iter):
14 |         start = time.time()
15 |         norm_square_numpy(vector)
16 |         times.append(time.time() - start)
17 |     return min(times)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     print(run_experiment(1000000, 10))
22 | 


--------------------------------------------------------------------------------
/06_matrix/norm/norm_numpy_dot.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy
 4 | 
 5 | 
 6 | def norm_square_numpy_dot(vector):
 7 |     return numpy.dot(vector, vector)
 8 | 
 9 | 
10 | def run_experiment(size, num_iter=3):
11 |     vector = numpy.arange(size)
12 |     times = []
13 |     for i in range(num_iter):
14 |         start = time.time()
15 |         norm_square_numpy_dot(vector)
16 |         times.append(time.time() - start)
17 |     return min(times)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     print(run_experiment(1000000, 10))
22 | 


--------------------------------------------------------------------------------
/06_matrix/norm/norm_python.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | def norm_square_list(vector):
 5 |     norm = 0
 6 |     for v in vector:
 7 |         norm += v * v
 8 |     return norm
 9 | 
10 | 
11 | def run_experiment(size, num_iter=3):
12 |     vector = list(range(size))
13 |     times = []
14 |     for i in range(num_iter):
15 |         start = time.time()
16 |         norm_square_list(vector)
17 |         times.append(time.time() - start)
18 |     return min(times)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     print(run_experiment(1000000, 10))
23 | 


--------------------------------------------------------------------------------
/06_matrix/norm/norm_python_comprehension.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | def norm_square_list_comprehension(vector):
 5 |     return sum([v * v for v in vector])
 6 | 
 7 | 
 8 | def run_experiment(size, num_iter=3):
 9 |     vector = list(range(size))
10 |     times = []
11 |     for i in range(num_iter):
12 |         start = time.time()
13 |         norm_square_list_comprehension(vector)
14 |         times.append(time.time() - start)
15 |     return min(times)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     print(run_experiment(1000000, 10))
20 | 


--------------------------------------------------------------------------------
/06_matrix/norm/perf/norm_array.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python norm_array.py':
 3 | 
 4 |         15,222,087 cache-references                                            
 5 |         12,266,506 cache-misses              #   80.584 % of all cache refs    
 6 |      4,932,179,143 cycles                    #    0.000 GHz                    
 7 |     10,503,076,117 instructions              #    2.13  insns per cycle        
 8 |      2,398,688,365 branches                                                    
 9 |            100,316 page-faults                                                 
10 |                  3 CPU-migrations                                              
11 | 
12 |        1.832812019 seconds time elapsed
13 | 
14 | 


--------------------------------------------------------------------------------
/06_matrix/norm/perf/norm_numpy.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python norm_numpy.py':
 3 | 
 4 |          6,475,458 cache-references                                            
 5 |          3,566,911 cache-misses              #   55.084 % of all cache refs    
 6 |        516,789,816 cycles                    #    0.000 GHz                    
 7 |        537,953,302 instructions              #    1.04  insns per cycle        
 8 |        100,140,435 branches                                                    
 9 |             43,102 page-faults                                                 
10 |                  3 CPU-migrations                                              
11 | 
12 |        0.148656614 seconds time elapsed
13 | 
14 | 


--------------------------------------------------------------------------------
/06_matrix/norm/perf/norm_numpy_dot.perf:
--------------------------------------------------------------------------------
 1 | 0.0008676052093505859
 2 | 0.0008687973022460938
 3 | 0.0008668899536132812
 4 | 
 5 |  Performance counter stats for 'python norm_numpy_dot.py' (3 runs):
 6 | 
 7 |      3,063,302,089      cycles                    #    2.173 GHz                        
 8 |      2,113,402,590      instructions              #    0.69  insns per cycle            
 9 |         10,075,677      cache-references          #    7.148 M/sec                      
10 |            315,267      cache-misses              #    3.129 % of all cache refs        
11 |        434,245,199      branches                  #  308.079 M/sec                      
12 |          9,329,547      branch-misses             #    2.15% of all branches            
13 |        1409.526329      task-clock (msec)         #    5.211 CPUs utilized            
14 |             11,368      faults                    #    0.008 M/sec                    
15 |             11,365      minor-faults              #    0.008 M/sec                    
16 |             62,542      cs                        #    0.044 M/sec                    
17 |                 73      migrations                #    0.052 K/sec                    
18 | 
19 |        0.270503238 seconds time elapsed                                          
20 | 
21 | 


--------------------------------------------------------------------------------
/06_matrix/norm/perf/norm_python.perf:
--------------------------------------------------------------------------------
 1 | 
 2 |  Performance counter stats for 'python norm_python.py':
 3 | 
 4 |         21,797,839 cache-references                                            
 5 |         16,882,474 cache-misses              #   77.450 % of all cache refs    
 6 |      4,357,716,622 cycles                    #    0.000 GHz                    
 7 |      8,309,894,889 instructions              #    1.91  insns per cycle        
 8 |      1,885,987,604 branches                                                    
 9 |             80,780 page-faults                                                 
10 |                  3 CPU-migrations                                              
11 | 
12 |        1.575655973 seconds time elapsed
13 | 
14 | 


--------------------------------------------------------------------------------
/06_matrix/norm/perf/norm_python_comprehension.perf:
--------------------------------------------------------------------------------
 1 | 0.0670633316040039
 2 | 0.06715917587280273
 3 | 0.06727433204650879
 4 | 
 5 |  Performance counter stats for 'python norm_python_comprehension.py' (3 runs):
 6 | 
 7 |      2,362,457,333      cycles                    #    2.764 GHz                        
 8 |      5,957,736,587      instructions              #    2.52  insns per cycle            
 9 |          7,957,733      cache-references          #    9.310 M/sec                      
10 |          3,083,079      cache-misses              #   38.743 % of all cache refs        
11 |      1,367,533,152      branches                  # 1599.839 M/sec                      
12 |          1,305,535      branch-misses             #    0.10% of all branches            
13 |         854.794366      task-clock (msec)         #    1.002 CPUs utilized            
14 |             98,670      faults                    #    0.115 M/sec                    
15 |             98,670      minor-faults              #    0.115 M/sec                    
16 |                 77      cs                        #    0.090 K/sec                    
17 |                 60      migrations                #    0.070 K/sec                  
18 | 
19 |        0.852782517 seconds time elapsed                                          
20 | 
21 | 


--------------------------------------------------------------------------------
/06_matrix/norm/time/norm_array.time:
--------------------------------------------------------------------------------
1 | 1.59user 0.16system 0:01.75elapsed 99%CPU (0avgtext+0avgdata 400748maxresident)k
2 | 0inputs+0outputs (0major+100349minor)pagefaults 0swaps
3 | 


--------------------------------------------------------------------------------
/06_matrix/norm/time/norm_numpy.time:
--------------------------------------------------------------------------------
1 | 0.07user 0.06system 0:00.14elapsed 100%CPU (0avgtext+0avgdata 170600maxresident)k
2 | 0inputs+0outputs (0major+43008minor)pagefaults 0swaps
3 | 


--------------------------------------------------------------------------------
/06_matrix/norm/time/norm_python.time:
--------------------------------------------------------------------------------
1 | 1.45user 0.11system 0:01.57elapsed 99%CPU (0avgtext+0avgdata 322600maxresident)k
2 | 0inputs+0outputs (0major+80812minor)pagefaults 0swaps
3 | 


--------------------------------------------------------------------------------
/06_matrix/pandas/compare_sklearn_lstsq_timing.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot
 3 | import timeit
 4 | from utility import ols_lstsq, ols_sklearn
 5 | #%matplotlib
 6 | 
 7 | from sklearn.linear_model import LinearRegression
 8 | import matplotlib.pyplot as plt
 9 | 
10 | df = pd.read_pickle('generated_ols_data.pickle')
11 | print("Loaded")
12 | 
13 | number = 10_000
14 | 
15 | results = timeit.repeat("ols_lstsq(df.iloc[0])", globals=globals(), number=number)
16 | time_of_fastest = min(results)
17 | print(f"Time to run ols_lstsq for fastest of repeats is {time_of_fastest / number:0.6f} seconds on {number} repeats and taking fastest")
18 | 
19 | results = timeit.repeat("ols_sklearn(df.iloc[0])", globals=globals(), number=number)
20 | time_of_fastest = min(results)
21 | print(f"Time to run ols_sklearn for fastest of repeats is {time_of_fastest / number:0.6f} seconds")
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/06_matrix/pandas/generate_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.linear_model import LinearRegression
 4 | #import statsmodels.api as sm
 5 | #from sklearn.linear_model import base
 6 | 
 7 | 
 8 | from numpy.testing import assert_almost_equal
 9 | import matplotlib.pyplot as plt
10 | 
11 | outfile = "generated_ols_data.pickle"
12 | NBR_DAYS = 14
13 | NBR_PEOPLE = 100_000
14 | #NBR_PEOPLE = 5_000
15 | 
16 | lam = 60 # 100
17 | np.random.seed(0) # fix the seed
18 | hours_per_day_per_person = np.random.poisson(lam=lam, size=(NBR_DAYS, NBR_PEOPLE)).T
19 | hours_per_day_per_person =  hours_per_day_per_person / 60
20 | 
21 | df = pd.DataFrame(hours_per_day_per_person).astype(np.float_)
22 | print(f"Writing {df.shape} to {outfile}")
23 | print(df.head())
24 | 
25 | df.to_pickle(outfile)
26 | 
27 | ax = plt.subplot()
28 | df[:3].T.plot(ax=ax, marker='o')
29 | ax.set_title('Random hours of mobile phone usage for 3 people')
30 | ax.set_xlabel('Days')
31 | ax.set_ylabel('Hours of usage')
32 | ax.set_ylim(0, 1.5)
33 | ax.legend()
34 | plt.savefig('random_hours_mobile_phone_usage_3_people.png')
35 | 


--------------------------------------------------------------------------------
/06_matrix/pandas/sklearn_line_profiler.py:
--------------------------------------------------------------------------------
 1 | import timeit
 2 | import pandas as pd
 3 | import matplotlib.pyplot
 4 | from sklearn.linear_model import base
 5 | from sklearn.linear_model import LinearRegression
 6 | import matplotlib.pyplot as plt
 7 | from line_profiler import LineProfiler
 8 | import numpy as np 
 9 | from utility import ols_lstsq, ols_sklearn
10 | 
11 | # We learn that
12 | #https://github.com/scikit-learn/scikit-learn/blob/1495f6924/sklearn/linear_model/base.py#L438
13 | # LinearRegression.fit is expensive because
14 | # of calls to check_X_y, _preprocess_data and linalg.lstsq
15 | # https://github.com/scikit-learn/scikit-learn/blob/1495f6924/sklearn/linear_model/base.py#L101
16 | # _preprocess_data
17 | # has 3 expensive lines - check_array, np.asarray, np.average
18 | #https://github.com/scikit-learn/scikit-learn/blob/1495f69242646d239d89a5713982946b8ffcf9d9/sklearn/utils/validation.py#L600
19 | # check_X_y
20 | # checks for array for certain characteristics and lengths
21 | # 
22 | 
23 | 
24 | df = pd.read_pickle('generated_ols_data.pickle')
25 | print(f"Loaded {df.shape} rows")
26 | 
27 | est = LinearRegression()
28 | row = df.iloc[0]
29 | X = np.arange(row.shape[0]).reshape(-1, 1).astype(np.float_)
30 | 
31 | lp = LineProfiler(est.fit)
32 | print("Run on a single row")
33 | lp.run("est.fit(X, row.values)")
34 | lp.print_stats()
35 | 
36 | print("Run on 5000 rows")
37 | lp.run("df[:5000].apply(ols_sklearn, axis=1)")
38 | lp.print_stats()
39 | 
40 | lp = LineProfiler(base._preprocess_data)
41 | lp.run("base._preprocess_data(X, row, fit_intercept=True)")
42 | lp.print_stats()
43 | 
44 | lp = LineProfiler(base.check_X_y)
45 | lp.run("base.check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True)")
46 | lp.print_stats()
47 | 
48 | #%lprun -f est_diagnosis.fit est_diagnosis.fit(np.arange(rowx.shape[0]).reshape(-1, 1), rowx.values)
49 | #lp.run("est_diagnosis.fit(np.arange(rowx.shape[0]).reshape(-1, 1).astype(np.float_), y.values)")
50 | #lp.run("base._preprocess_data(np.arange(rowx.shape[0]).reshape(-1, 1).astype(np.float_), rowx, fit_intercept=True)")
51 | 
52 | 


--------------------------------------------------------------------------------
/06_matrix/pandas/str_operation.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import pandas as pd
 3 | from numpy.testing import assert_almost_equal, assert_array_almost_equal
 4 | import numba
 5 | import numpy as np
 6 | import matplotlib.pyplot
 7 | from utility import ols_lstsq, ols_lstsq_raw
 8 | 
 9 | df = pd.read_pickle('generated_ols_data.pickle')
10 | print(f"Loaded {df.shape} rows")
11 | 
12 | results_ols_lstsq = df.apply(ols_lstsq, axis=1)
13 | 
14 | #df['m'] = results_ols_lstsq
15 | 
16 | #df['growth'] = pd.cut(df['m'], [-1.0, -0.01, 0.01, 1.0], labels=['declining', 'stable', 'growing'])
17 | #display(df['growth'].value_counts())
18 | 
19 | #In [173]: %timeit df.query('growth=="growing"')['m'].mean()           
20 | #4.85 ms ± 40.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
21 | #In [174]: %timeit df.groupby('growth')['m'].mean()['growing']       
22 | #1.45 ms ± 8.52 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
23 | #In [175]: mask = df['growth'] == 'growing'
24 | #In [179]: %timeit df[mask]['m'].mean()  
25 | #1.9 ms ± 72.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
26 | 
27 | 
28 | df['0_as_str'] = df[0].apply(lambda v: str(v))
29 | def find_9(s): 
30 |     """Return -1 if '9' not found else its location at position >= 0"""
31 |     return s.split('.')[1].find('9')
32 | 
33 | #%timeit df['0_as_str'].str.split('.', expand=True)[1].str.find('9')
34 | #183 ms ± 2.58 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
35 | #%timeit df['0_as_str'].apply(find_9)
36 | #51 ms ± 987 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/06_matrix/pandas/utility.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LinearRegression
 3 | 
 4 | def ols_sklearn(row):
 5 |     """Solve OLS using scikit-learn's LinearRegression"""
 6 |     est = LinearRegression() 
 7 |     X = np.arange(row.shape[0]).reshape(-1, 1) # shape (14, 1)
 8 |     # note that the intercept is built inside LinearRegression
 9 |     est.fit(X, row.values) 
10 |     m = est.coef_[0] # note c is in est.intercept_
11 |     return m
12 | 
13 | def ols_lstsq(row):
14 |     """Solve OLS using numpy.linalg.lstsq"""
15 |     # build X values for [0, 13]
16 |     X = np.arange(row.shape[0]) # shape (14,)
17 |     ones = np.ones(row.shape[0]) # constant used to build intercept
18 |     A = np.vstack((X, ones)).T # shape(14, 2)
19 |     # lstsq returns the coefficient and intercept as the first result 
20 |     # followed by the residuals and other items
21 |     m, c = np.linalg.lstsq(A, row.values, rcond=-1)[0] 
22 |     return m
23 | 
24 | def ols_lstsq_raw(row):
25 |     """Variant of `ols_lstsq` where row is a numpy array (not a Series)"""
26 |     X = np.arange(row.shape[0])
27 |     ones = np.ones(row.shape[0])
28 |     A = np.vstack((X, ones)).T
29 |     m, c = np.linalg.lstsq(A, row, rcond=-1)[0] 
30 |     return m
31 | 
32 | 
33 | 
34 | def ols_sm(row):
35 |     # by default statsmodels fit uses 
36 |     # https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse (pinv)
37 |     # QR method is the alternative (this add 3s to execution in 50k rows test)
38 |     sm_X = sm.add_constant(row.index)
39 |     model = sm.OLS(row.values, sm_X)
40 |     results = model.fit()
41 |     #results.params # 2 params, C followed by m
42 |     return results.params[1]
43 | 


--------------------------------------------------------------------------------
/07_compiling/.gitignore:
--------------------------------------------------------------------------------
1 | diffusion.so
2 | 


--------------------------------------------------------------------------------
/07_compiling/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/07_compiling/.notempty


--------------------------------------------------------------------------------
/07_compiling/Makefile:
--------------------------------------------------------------------------------
1 | diffusion.so: diffusion.c
2 | 	gcc -O3 -std=gnu11 -c diffusion.c
3 | 	gcc -shared -o diffusion.so diffusion.o
4 | 	rm -rf diffusion.o
5 | 


--------------------------------------------------------------------------------
/07_compiling/cffi/diffusion_2d_cffi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | 
 3 | import time
 4 | 
 5 | from cffi import FFI, verifier
 6 | 
 7 | import numpy as np
 8 | 
 9 | grid_shape = (512, 512)
10 | 
11 | ffi = FFI()
12 | ffi.cdef("void evolve(double **in, double **out, double D, double dt);")  # <1>
13 | lib = ffi.dlopen("../diffusion.so")
14 | 
15 | 
16 | def evolve(grid, dt, out, D=1.0):
17 |     pointer_grid = ffi.cast("double**", grid.ctypes.data)  # <2>
18 |     pointer_out = ffi.cast("double**", out.ctypes.data)
19 |     lib.evolve(pointer_grid, pointer_out, D, dt)
20 | 
21 | 
22 | def run_experiment(num_iterations):
23 |     scratch = np.zeros(grid_shape, dtype=np.double)
24 |     grid = np.zeros(grid_shape, dtype=np.double)
25 | 
26 |     block_low = int(grid_shape[0] * 0.4)
27 |     block_high = int(grid_shape[0] * 0.5)
28 |     grid[block_low:block_high, block_low:block_high] = 0.005
29 | 
30 |     start = time.time()
31 |     for i in range(num_iterations):
32 |         evolve(grid, 0.1, scratch)
33 |         grid, scratch = scratch, grid
34 |     return time.time() - start
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     t = run_experiment(500)
39 |     print(t)
40 | 
41 |     verifier.cleanup_tmpdir()
42 | 


--------------------------------------------------------------------------------
/07_compiling/cffi/diffusion_2d_cffi_inline.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | 
 3 | import time
 4 | 
 5 | from cffi import FFI, verifier
 6 | 
 7 | import numpy as np
 8 | 
 9 | grid_shape = (512, 512)
10 | 
11 | ffi = FFI()
12 | ffi.cdef("void evolve(double **in, double **out, double D, double dt);")
13 | lib = ffi.verify(
14 |     r"""
15 | void evolve(double in[][512], double out[][512], double D, double dt) {
16 |     int i, j;
17 |     double laplacian;
18 |     for (i=1; i<511; i++) {
19 |         for (j=1; j<511; j++) {
20 |             laplacian = in[i+1][j] + in[i-1][j] + in[i][j+1] + in[i][j-1] - 4 * in[i][j];
21 |             out[i][j] = in[i][j] + D * dt * laplacian;
22 |         }
23 |     }
24 | }
25 | """,
26 |     extra_compile_args=["-O3"],  # <1>
27 | )
28 | 
29 | 
30 | def evolve(grid, dt, out, D=1.0):
31 |     pointer_grid = ffi.cast("double**", grid.ctypes.data)
32 |     pointer_out = ffi.cast("double**", out.ctypes.data)
33 |     lib.evolve(pointer_grid, pointer_out, D, dt)
34 | 
35 | 
36 | def run_experiment(num_iterations):
37 |     scratch = np.zeros(grid_shape, dtype=np.double)
38 |     grid = np.zeros(grid_shape, dtype=np.double)
39 | 
40 |     block_low = int(grid_shape[0] * 0.4)
41 |     block_high = int(grid_shape[0] * 0.5)
42 |     grid[block_low:block_high, block_low:block_high] = 0.005
43 | 
44 |     start = time.time()
45 |     for i in range(num_iterations):
46 |         evolve(grid, 0.1, scratch)
47 |         grid, scratch = scratch, grid
48 |     return time.time() - start
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     t = run_experiment(500)
53 |     print(t)
54 | 
55 |     verifier.cleanup_tmpdir()
56 | 


--------------------------------------------------------------------------------
/07_compiling/cpython_module/.gitignore:
--------------------------------------------------------------------------------
1 | *.so
2 | build
3 | 


--------------------------------------------------------------------------------
/07_compiling/cpython_module/cdiffusion/diffusion.h:
--------------------------------------------------------------------------------
1 | void evolve(double in[][512], double out[][512], double D, double dt);
2 | 


--------------------------------------------------------------------------------
/07_compiling/cpython_module/diffusion.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy as np
 4 | from cdiffusion import evolve
 5 | 
 6 | grid_shape = (512, 512)
 7 | 
 8 | 
 9 | def run_experiment(num_iterations):
10 |     scratch = np.zeros(grid_shape, dtype=np.double)
11 |     grid = np.zeros(grid_shape, dtype=np.double)
12 | 
13 |     block_low = int(grid_shape[0] * 0.4)
14 |     block_high = int(grid_shape[0] * 0.5)
15 |     grid[block_low:block_high, block_low:block_high] = 0.005
16 | 
17 |     start = time.time()
18 |     for i in range(num_iterations):
19 |         evolve(grid, scratch, 1.0, 0.1)
20 |         grid, scratch = scratch, grid
21 |     return time.time() - start
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     t = run_experiment(500)
26 |     print(t)
27 | 


--------------------------------------------------------------------------------
/07_compiling/cpython_module/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import Extension, setup
 2 | 
 3 | import numpy.distutils.misc_util
 4 | 
 5 | __version__ = "0.1"
 6 | 
 7 | cdiffusion = Extension(
 8 |     "cdiffusion",
 9 |     sources=["../diffusion.c", "cdiffusion/python_interface.c"],
10 |     extra_compile_args=["-O3", "-std=c17", "-Wall", "-p", "-pg"],
11 |     extra_link_args=["-lc"],
12 | )
13 | 
14 | setup(
15 |     name="diffusion",
16 |     version=__version__,
17 |     ext_modules=[cdiffusion],
18 |     packages=["diffusion"],
19 |     include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs(),
20 | )
21 | 


--------------------------------------------------------------------------------
/07_compiling/ctypes/diffusion_ctypes.py:
--------------------------------------------------------------------------------
 1 | import ctypes
 2 | import time
 3 | 
 4 | import numpy as np
 5 | 
 6 | grid_shape = (512, 512)
 7 | _diffusion = ctypes.CDLL("../diffusion.so")
 8 | 
 9 | # Create references to the C types that we will need to simplify future code
10 | TYPE_INT = ctypes.c_int
11 | TYPE_DOUBLE = ctypes.c_double
12 | TYPE_DOUBLE_SS = ctypes.POINTER(ctypes.POINTER(ctypes.c_double))
13 | 
14 | # Initialize the signature of the evolve function to:
15 | # void evolve(int, int, double**, double**, double, double)
16 | _diffusion.evolve.argtypes = [TYPE_DOUBLE_SS, TYPE_DOUBLE_SS, TYPE_DOUBLE, TYPE_DOUBLE]
17 | _diffusion.evolve.restype = None
18 | 
19 | 
20 | def evolve(grid, out, dt, D=1.0):
21 |     # First we convert the python types into the relevant C types
22 |     assert grid.shape == (512, 512)
23 |     cdt = TYPE_DOUBLE(dt)
24 |     cD = TYPE_DOUBLE(D)
25 |     pointer_grid = grid.ctypes.data_as(TYPE_DOUBLE_SS)
26 |     pointer_out = out.ctypes.data_as(TYPE_DOUBLE_SS)
27 | 
28 |     # Now we can call the function
29 |     _diffusion.evolve(pointer_grid, pointer_out, cD, cdt)
30 | 
31 | 
32 | def run_experiment(num_iterations):
33 |     scratch = np.zeros(grid_shape, dtype=ctypes.c_double)
34 |     grid = np.zeros(grid_shape, dtype=ctypes.c_double)
35 | 
36 |     block_low = int(grid_shape[0] * 0.4)
37 |     block_high = int(grid_shape[0] * 0.5)
38 |     grid[block_low:block_high, block_low:block_high] = 0.005
39 | 
40 |     start = time.time()
41 |     for i in range(num_iterations):
42 |         evolve(grid, scratch, 0.1)
43 |         grid, scratch = scratch, grid
44 |     return time.time() - start
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     t = run_experiment(500)
49 |     print(t)
50 | 


--------------------------------------------------------------------------------
/07_compiling/diffusion.c:
--------------------------------------------------------------------------------
 1 | 
 2 | void evolve(double in[][512], double out[][512], double D, double dt) {
 3 |     int i, j;
 4 |     double laplacian;
 5 |     for (i=1; i<511; i++) {
 6 |         for (j=1; j<511; j++) {
 7 |             laplacian = in[i+1][j] + in[i-1][j] + in[i][j+1] + in[i][j-1]\
 8 |                 - 4 * in[i][j];
 9 |             out[i][j] = in[i][j] + D * dt * laplacian;
10 |         }
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/07_compiling/f2py/.gitignore:
--------------------------------------------------------------------------------
1 | *.so
2 | 


--------------------------------------------------------------------------------
/07_compiling/f2py/Makefile:
--------------------------------------------------------------------------------
1 | diffusion.so: diffusion.f90
2 | 	f2py -c -m diffusion --fcompiler=gfortran --opt='-O3' diffusion.f90
3 | 


--------------------------------------------------------------------------------
/07_compiling/f2py/diffusion.f90:
--------------------------------------------------------------------------------
 1 | SUBROUTINE evolve(grid, scratch, D, dt, N, M)
 2 |     !f2py threadsafe
 3 |     !f2py intent(in) grid
 4 |     !f2py intent(inplace) scratch
 5 |     !f2py intent(in) D
 6 |     !f2py intent(in) dt
 7 |     !f2py intent(hide) N
 8 |     !f2py intent(hide) M
 9 |     INTEGER :: N, M
10 |     DOUBLE PRECISION, DIMENSION(N,M) :: grid, scratch
11 |     DOUBLE PRECISION, DIMENSION(N-2, M-2) :: laplacian
12 |     DOUBLE PRECISION :: D, dt
13 | 
14 |     laplacian = grid(3:N, 2:M-1) + grid(1:N-2, 2:M-1) + &
15 |                 grid(2:N-1, 3:M) + grid(2:N-1, 1:M-2) - 4 * grid(2:N-1, 2:M-1)
16 |     scratch(2:N-1, 2:M-1) = grid(2:N-1, 2:M-1) + D * dt * laplacian
17 | END SUBROUTINE evolve
18 | 


--------------------------------------------------------------------------------
/07_compiling/f2py/diffusion_fortran.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy as np
 4 | from diffusion import evolve
 5 | 
 6 | grid_shape = (512, 512)
 7 | 
 8 | 
 9 | def run_experiment(num_iterations):
10 |     scratch = np.zeros(grid_shape, dtype=np.double, order="F")  # <1>
11 |     grid = np.zeros(grid_shape, dtype=np.double, order="F")
12 | 
13 |     block_low = int(grid_shape[0] * 0.4)
14 |     block_high = int(grid_shape[0] * 0.5)
15 |     grid[block_low:block_high, block_low:block_high] = 0.005
16 | 
17 |     start = time.time()
18 |     for i in range(num_iterations):
19 |         evolve(grid, scratch, 1.0, 0.1)
20 |         grid, scratch = scratch, grid
21 |     return time.time() - start
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     t = run_experiment(500)
26 |     print(t)
27 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/cythonfn.pyx:
--------------------------------------------------------------------------------
 1 | def calculate_z(int maxiter, zs, cs):
 2 |     """Calculate output list using Julia update rule"""
 3 |     cdef unsigned int i, n
 4 |     cdef double complex z, c
 5 |     output = [0] * len(zs)
 6 |     for i in range(len(zs)):
 7 |         n = 0
 8 |         z = zs[i]
 9 |         c = cs[i]
10 |         while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
11 |             z = z * z + c
12 |             n += 1
13 |         output[i] = n
14 |     return output
15 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/cythonfn1.pyx:
--------------------------------------------------------------------------------
 1 | def calculate_z(maxiter, zs, cs):
 2 |     """Calculate output list using Julia update rule"""
 3 |     output = [0] * len(zs)
 4 |     for i in range(len(zs)):
 5 |         n = 0
 6 |         z = zs[i]
 7 |         c = cs[i]
 8 |         while n < maxiter and abs(z) < 2:
 9 |             z = z * z + c
10 |             n += 1
11 |         output[i] = n
12 |     return output
13 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/cythonfn2.pyx:
--------------------------------------------------------------------------------
 1 | def calculate_z(int maxiter, zs, cs):
 2 |     """Calculate output list using Julia update rule"""
 3 |     cdef unsigned int i, n
 4 |     cdef double complex z, c
 5 |     output = [0] * len(zs)
 6 |     for i in range(len(zs)):
 7 |         n = 0
 8 |         z = zs[i]
 9 |         c = cs[i]
10 |         while n < maxiter and abs(z) < 2:
11 |             z = z * z + c
12 |             n += 1
13 |         output[i] = n
14 |     return output
15 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/cythonfn3.pyx:
--------------------------------------------------------------------------------
 1 | def calculate_z(int maxiter, list zs, list cs):
 2 |     """Calculate output list using Julia update rule"""
 3 |     cdef unsigned int i, n
 4 |     cdef double complex z, c
 5 |     output = [0] * len(zs)
 6 |     for i in range(len(zs)):
 7 |         n = 0
 8 |         z = zs[i]
 9 |         c = cs[i]
10 |         while n < maxiter and abs(z) < 2:
11 |             z = z * z + c
12 |             n += 1
13 |         output[i] = n
14 |     return output
15 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/cythonfn4.pyx:
--------------------------------------------------------------------------------
 1 | def calculate_z(int maxiter, zs, cs):
 2 |     """Calculate output list using Julia update rule"""
 3 |     cdef unsigned int i, n
 4 |     cdef double complex z, c
 5 |     output = [0] * len(zs)
 6 |     for i in range(len(zs)):
 7 |         n = 0
 8 |         z = zs[i]
 9 |         c = cs[i]
10 |         while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
11 |             z = z * z + c
12 |             n += 1
13 |         output[i] = n
14 |     return output
15 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/cythonfn5.pyx:
--------------------------------------------------------------------------------
 1 | #cython: boundscheck=False
 2 | def calculate_z(int maxiter, zs, cs):
 3 |     """Calculate output list using Julia update rule"""
 4 |     cdef unsigned int i, n
 5 |     cdef double complex z, c
 6 |     output = [0] * len(zs)
 7 |     for i in range(len(zs)):
 8 |         n = 0
 9 |         z = zs[i]
10 |         c = cs[i]
11 |         while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
12 |             z = z * z + c
13 |             n += 1
14 |         output[i] = n
15 |     return output
16 | 
17 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/julia1.py:
--------------------------------------------------------------------------------
 1 | """Julia set generator without optional PIL-based image drawing"""
 2 | import time
 3 | #from cythonfn import calculate_z
 4 | import cythonfn 
 5 | 
 6 | # area of complex space to investigate
 7 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8
 8 | c_real, c_imag = -0.62772, -.42193
 9 | 
10 | def calc_pure_python(desired_width, max_iterations):
11 |     """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display"""
12 |     x_step = (x2 - x1) / desired_width
13 |     y_step = (y1 - y2) / desired_width
14 |     x = []
15 |     y = []
16 |     ycoord = y2
17 |     while ycoord > y1:
18 |         y.append(ycoord)
19 |         ycoord += y_step
20 |     xcoord = x1
21 |     while xcoord < x2:
22 |         x.append(xcoord)
23 |         xcoord += x_step
24 |     # build a list of co-ordinates and the initial condition for each cell.
25 |     # Note that our initial condition is a constant and could easily be removed,
26 |     # we use it to simulate a real-world scenario with several inputs to our function
27 |     zs = []
28 |     cs = []
29 |     for ycoord in y:
30 |         for xcoord in x:
31 |             zs.append(complex(xcoord, ycoord))
32 |             cs.append(complex(c_real, c_imag))
33 | 
34 |     print("Length of x:", len(x))
35 |     print("Total elements:", len(zs))
36 |     start_time = time.time()
37 |     output = cythonfn.calculate_z(max_iterations, zs, cs)
38 |     end_time = time.time()
39 |     secs = end_time - start_time
40 |     print(f"Took {secs:0.2f} seconds")
41 | 
42 |     assert sum(output) == 33219980  # this sum is expected for 1000^2 grid with 300 iterations
43 | 
44 | 
45 | 
46 | # Calculate the Julia set using a pure Python solution with
47 | # reasonable defaults for a laptop
48 | calc_pure_python(desired_width=1000, max_iterations=300)
49 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | 
3 | from Cython.Build import cythonize
4 | setup(ext_modules=cythonize("cythonfn.pyx", compiler_directives={"language_level": "3"}))
5 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython_pyximport/cythonfn.pyx:
--------------------------------------------------------------------------------
 1 | def calculate_z(maxiter, zs, cs):
 2 |     """Calculate output list using Julia update rule"""
 3 |     output = [0] * len(zs)
 4 |     for i in range(len(zs)):
 5 |         n = 0
 6 |         z = zs[i]
 7 |         c = cs[i]
 8 |         while n < maxiter and abs(z) < 2:
 9 |             z = z * z + c
10 |             n += 1
11 |         output[i] = n
12 |     return output
13 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/cpython_pyximport/julia1.py:
--------------------------------------------------------------------------------
 1 | """Julia set generator without optional PIL-based image drawing"""
 2 | import time
 3 | import pyximport
 4 | pyximport.install(language_level=3)
 5 | import cythonfn
 6 | 
 7 | 
 8 | # area of complex space to investigate
 9 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8
10 | c_real, c_imag = -0.62772, -.42193
11 | 
12 | def calc_pure_python(desired_width, max_iterations):
13 |     """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display"""
14 |     x_step = (x2 - x1) / desired_width
15 |     y_step = (y1 - y2) / desired_width
16 |     x = []
17 |     y = []
18 |     ycoord = y2
19 |     while ycoord > y1:
20 |         y.append(ycoord)
21 |         ycoord += y_step
22 |     xcoord = x1
23 |     while xcoord < x2:
24 |         x.append(xcoord)
25 |         xcoord += x_step
26 |     # build a list of co-ordinates and the initial condition for each cell.
27 |     # Note that our initial condition is a constant and could easily be removed,
28 |     # we use it to simulate a real-world scenario with several inputs to our function
29 |     zs = []
30 |     cs = []
31 |     for ycoord in y:
32 |         for xcoord in x:
33 |             zs.append(complex(xcoord, ycoord))
34 |             cs.append(complex(c_real, c_imag))
35 | 
36 |     print("Length of x:", len(x))
37 |     print("Total elements:", len(zs))
38 |     start_time = time.time()
39 |     output = cythonfn.calculate_z(max_iterations, zs, cs)
40 |     end_time = time.time()
41 |     secs = end_time - start_time
42 |     print(f"Took {secs:0.2f} seconds")
43 | 
44 |     assert sum(output) == 33219980  # this sum is expected for 1000^2 grid with 300 iterations
45 | 
46 | 
47 | 
48 | # Calculate the Julia set using a pure Python solution with
49 | # reasonable defaults for a laptop
50 | calc_pure_python(desired_width=1000, max_iterations=300)
51 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/cythonfn.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs):
 5 |     """Calculate output list using Julia update rule"""
 6 |     cdef unsigned int i, n
 7 |     cdef double complex z, c
 8 |     cdef int[:] output = np.empty(len(zs), dtype=np.int32)
 9 |     for i in range(len(zs)):
10 |         n = 0
11 |         z = zs[i]
12 |         c = cs[i]
13 |         while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
14 |             z = z * z + c
15 |             n += 1
16 |         output[i] = n
17 |     return output
18 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/cythonfn1.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs):
 5 |     """Calculate output list using Julia update rule"""
 6 |     cdef unsigned int i, n
 7 |     cdef double complex z, c
 8 |     cdef int[:] output = np.empty(len(zs), dtype=np.int32)
 9 |     for i in range(len(zs)):
10 |         n = 0
11 |         z = zs[i]
12 |         c = cs[i]
13 |         while n < maxiter and abs(z) < 2:
14 |             z = z * z + c
15 |             n += 1
16 |         output[i] = n
17 |     return output
18 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/cythonfn2.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs):
 5 |     """Calculate output list using Julia update rule"""
 6 |     cdef unsigned int i, n
 7 |     cdef double complex z, c
 8 |     cdef int[:] output = np.empty(len(zs), dtype=np.int32)
 9 |     for i in range(len(zs)):
10 |         n = 0
11 |         z = zs[i]
12 |         c = cs[i]
13 |         while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
14 |             z = z * z + c
15 |             n += 1
16 |         output[i] = n
17 |     return output
18 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/julia1.py:
--------------------------------------------------------------------------------
 1 | """Julia set generator without optional PIL-based image drawing"""
 2 | import time
 3 | import numpy as np
 4 | import cythonfn 
 5 | 
 6 | # area of complex space to investigate
 7 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8
 8 | c_real, c_imag = -0.62772, -.42193
 9 | 
10 | 
11 | def calc_pure_python(desired_width, max_iterations):
12 |     """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display"""
13 |     x_step = (x2 - x1) / desired_width
14 |     y_step = (y1 - y2) / desired_width
15 |     x = []
16 |     y = []
17 |     ycoord = y2
18 |     while ycoord > y1:
19 |         y.append(ycoord)
20 |         ycoord += y_step
21 |     xcoord = x1
22 |     while xcoord < x2:
23 |         x.append(xcoord)
24 |         xcoord += x_step
25 |     # build a list of co-ordinates and the initial condition for each cell.
26 |     # Note that our initial condition is a constant and could easily be removed,
27 |     # we use it to simulate a real-world scenario with several inputs to our function
28 |     zs = []
29 |     cs = []
30 |     for ycoord in y:
31 |         for xcoord in x:
32 |             zs.append(complex(xcoord, ycoord))
33 |             cs.append(complex(c_real, c_imag))
34 | 
35 |     zs_np = np.array(zs, np.complex128)
36 |     cs_np = np.array(cs, np.complex128)
37 | 
38 |     print("Length of x:", len(x))
39 |     print("Total elements:", len(zs))
40 |     start_time = time.time()
41 |     output = cythonfn.calculate_z(max_iterations, zs_np, cs_np)
42 |     end_time = time.time()
43 |     secs = end_time - start_time
44 |     print(f"Took {secs:0.2f} seconds")
45 | 
46 |     validation_sum = sum(output)
47 |     print("Total sum of elements (for validation):", validation_sum)
48 | 
49 | 
50 | # Calculate the Julia set using a pure Python solution with
51 | # reasonable defaults for a laptop
52 | # set draw_output to True to use PIL to draw an image
53 | calc_pure_python(desired_width=1000, max_iterations=300)
54 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn.pyx:
--------------------------------------------------------------------------------
 1 | #cython: boundscheck=False
 2 | from cython.parallel import parallel, prange
 3 | import numpy as np
 4 | cimport numpy as np
 5 | 
 6 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs):
 7 |     """Calculate output list using Julia update rule"""
 8 |     cdef unsigned int i, length
 9 |     cdef double complex z, c
10 |     cdef int[:] output = np.empty(len(zs), dtype=np.int32)
11 |     length = len(zs)
12 |     with nogil, parallel():
13 |         for i in prange(length, schedule="guided"):
14 |             z = zs[i]
15 |             c = cs[i]
16 |             output[i] = 0
17 |             while output[i] < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
18 |                 z = z * z + c
19 |                 output[i] += 1
20 |     return output
21 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn1.pyx:
--------------------------------------------------------------------------------
 1 | from cython.parallel import prange
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs):
 6 |     """Calculate output list using Julia update rule"""
 7 |     cdef unsigned int i, length
 8 |     cdef double complex z, c
 9 |     cdef int[:] output = np.empty(len(zs), dtype=np.int32)
10 |     length = len(zs)
11 |     with nogil:
12 |         for i in prange(length, schedule="guided"):
13 |             z = zs[i]
14 |             c = cs[i]
15 |             output[i] = 0
16 |             while output[i] < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
17 |                 z = z * z + c
18 |                 output[i] += 1
19 |     return output
20 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn2.pyx:
--------------------------------------------------------------------------------
 1 | #cython: boundscheck=False
 2 | from cython.parallel import prange
 3 | import numpy as np
 4 | cimport numpy as np
 5 | 
 6 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs):
 7 |     """Calculate output list using Julia update rule"""
 8 |     cdef unsigned int i, length, n
 9 |     cdef double complex z, c
10 |     cdef int[:] output = np.empty(len(zs), dtype=np.int32)
11 |     length = len(zs)
12 |     with nogil:
13 |         for i in prange(length, schedule="guided"):
14 |             z = zs[i]
15 |             c = cs[i]
16 |             n = 0
17 |             while n < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
18 |                 z = z * z + c
19 |                 n = n + 1
20 |             output[i] = n
21 |     return output
22 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/parallel/cythonfn3.pyx:
--------------------------------------------------------------------------------
 1 | #cython: boundscheck=False
 2 | from cython.parallel import parallel, prange
 3 | import numpy as np
 4 | cimport numpy as np
 5 | 
 6 | def calculate_z(int maxiter, double complex[:] zs, double complex[:] cs):
 7 |     """Calculate output list using Julia update rule"""
 8 |     cdef unsigned int i, length
 9 |     cdef double complex z, c
10 |     cdef int[:] output = np.empty(len(zs), dtype=np.int32)
11 |     length = len(zs)
12 |     with nogil, parallel():
13 |         for i in prange(length, schedule="guided"):
14 |             z = zs[i]
15 |             c = cs[i]
16 |             output[i] = 0
17 |             while output[i] < maxiter and (z.real * z.real + z.imag * z.imag) < 4:
18 |                 z = z * z + c
19 |                 output[i] += 1
20 |     return output
21 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/parallel/julia1.py:
--------------------------------------------------------------------------------
 1 | """Julia set generator without optional PIL-based image drawing"""
 2 | import time
 3 | import numpy as np
 4 | import cythonfn 
 5 | 
 6 | # area of complex space to investigate
 7 | x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8
 8 | c_real, c_imag = -0.62772, -.42193
 9 | 
10 | 
11 | def calc_pure_python(desired_width, max_iterations):
12 |     """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display"""
13 |     x_step = (x2 - x1) / desired_width
14 |     y_step = (y1 - y2) / desired_width
15 |     x = []
16 |     y = []
17 |     ycoord = y2
18 |     while ycoord > y1:
19 |         y.append(ycoord)
20 |         ycoord += y_step
21 |     xcoord = x1
22 |     while xcoord < x2:
23 |         x.append(xcoord)
24 |         xcoord += x_step
25 |     # build a list of co-ordinates and the initial condition for each cell.
26 |     # Note that our initial condition is a constant and could easily be removed,
27 |     # we use it to simulate a real-world scenario with several inputs to our function
28 |     zs = []
29 |     cs = []
30 |     for ycoord in y:
31 |         for xcoord in x:
32 |             zs.append(complex(xcoord, ycoord))
33 |             cs.append(complex(c_real, c_imag))
34 | 
35 |     zs_np = np.array(zs, np.complex128)
36 |     cs_np = np.array(cs, np.complex128)
37 | 
38 |     print("Length of x:", len(x))
39 |     print("Total elements:", len(zs))
40 |     start_time = time.time()
41 |     output = cythonfn.calculate_z(max_iterations, zs_np, cs_np)
42 |     end_time = time.time()
43 |     secs = end_time - start_time
44 |     print(f"Took {secs:0.2f} seconds")
45 | 
46 |     validation_sum = sum(output)
47 |     print("Total sum of elements (for validation):", validation_sum)
48 | 
49 | 
50 | # Calculate the Julia set using a pure Python solution with
51 | # reasonable defaults for a laptop
52 | # set draw_output to True to use PIL to draw an image
53 | calc_pure_python(desired_width=1000, max_iterations=300)
54 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/parallel/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | import numpy as np
 4 | 
 5 | ext_modules = [Extension(
 6 |       "cythonfn",
 7 |       ["cythonfn.pyx"], 
 8 |       
 9 |       extra_compile_args=['-fopenmp'], 
10 |       extra_link_args=['-fopenmp'],
11 |       )]
12 | 
13 | from Cython.Build import cythonize
14 | setup(ext_modules=cythonize(ext_modules, compiler_directives={"language_level": "3"},),include_dirs=[np.get_include()]) 
15 | 
16 | 


--------------------------------------------------------------------------------
/07_compiling/julia/cython/nparray_memoryview/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | import numpy as np
3 | 
4 | from Cython.Build import cythonize
5 | setup(ext_modules=cythonize("cythonfn.pyx", compiler_directives={"language_level": "3"}),
6 |         include_dirs=[np.get_include()])
7 | 
8 | 


--------------------------------------------------------------------------------
/07_compiling/pytorch/compare.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from functools import partial
 3 | 
 4 | from tqdm import tqdm
 5 | 
 6 | import diffusion_numpy
 7 | import diffusion_pytorch
 8 | import numpy as np
 9 | import pylab as py
10 | 
11 | 
12 | def get_timings(fxn):
13 |     start = time.perf_counter()
14 |     fxn()
15 |     return time.perf_counter() - start
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     grid_sizes = (256, 512, 1024, 2048, 4096)
20 |     n_iter = 1000
21 | 
22 |     diffusion_pytorch.run_experiment(1, (24, 24), "cuda:0")
23 |     results_pytorch_gpu = []
24 |     for g in tqdm(grid_sizes, desc="pytorch gpu"):
25 |         r = get_timings(
26 |             partial(diffusion_pytorch.run_experiment, n_iter, (g, g), "cuda:0")
27 |         )
28 |         results_pytorch_gpu.append(r)
29 |     results_pytorch_gpu = np.asarray(results_pytorch_gpu)
30 | 
31 |     results_numpy = []
32 |     for g in tqdm(grid_sizes, desc="numpy"):
33 |         r = get_timings(partial(diffusion_numpy.run_experiment, n_iter, (g, g)))
34 |         results_numpy.append(r)
35 |     results_numpy = np.asarray(results_numpy)
36 | 
37 |     # diffusion_pytorch.run_experiment(1, (24, 24), 'cpu')
38 |     # results_pytorch_cpu = []
39 |     # for g in tqdm(grid_sizes, desc="pytorch cpu"):
40 |     # r = get_timings(partial(diffusion_pytorch.run_experiment, n_iter, (g, g), 'cpu'))
41 |     # results_pytorch_cpu.append(r)
42 |     # results_pytorch_cpu = np.asarray(results_pytorch_cpu)
43 | 
44 |     print(grid_sizes)
45 |     print(results_numpy / results_pytorch_gpu)
46 |     print()
47 | 
48 |     fig = py.figure()
49 |     py.plot(grid_sizes, results_numpy, "-v", label="Numpy")
50 |     py.plot(grid_sizes, results_pytorch_gpu, "-o", label="PyTorch GPU")
51 |     # py.plot(grid_sizes, results_pytorch_cpu, '-x', label="PyTorch CPU")
52 |     py.legend()
53 |     py.title("Runtime for various grid sizes")
54 |     py.xlabel("Grid size")
55 |     py.ylabel("Runtime (seconds)")
56 |     py.yscale("log")
57 |     py.savefig("../../../images/comparison_pytorch_vs_numpy.png")
58 |     py.close(fig)
59 | 


--------------------------------------------------------------------------------
/07_compiling/pytorch/diffusion_numpy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import timeit
 4 | 
 5 | from numpy import roll, zeros
 6 | 
 7 | GRID_SHAPE = (2048, 2048)
 8 | 
 9 | 
10 | def laplacian(grid):
11 |     return (
12 |         roll(grid, +1, 0)
13 |         + roll(grid, -1, 0)
14 |         + roll(grid, +1, 1)
15 |         + roll(grid, -1, 1)
16 |         - 4 * grid
17 |     )
18 | 
19 | 
20 | def evolve(grid, dt, D=1):
21 |     return grid + dt * D * laplacian(grid)
22 | 
23 | 
24 | def run_experiment(num_iterations, grid_shape=GRID_SHAPE):
25 |     grid = zeros(grid_shape)
26 | 
27 |     block_low = int(grid_shape[0] * 0.4)
28 |     block_high = int(grid_shape[0] * 0.5)
29 |     grid[block_low:block_high, block_low:block_high] = 0.005
30 | 
31 |     for i in range(num_iterations):
32 |         grid = evolve(grid, 0.1)
33 |     return grid
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     n_iter = 100
38 |     N, runtime = timeit.Timer(
39 |         f"run_experiment({n_iter})", globals=globals()
40 |     ).autorange()
41 |     print(f"Runtime with grid {grid_shape}: {runtime / N:0.4f}s")
42 | 


--------------------------------------------------------------------------------
/07_compiling/pytorch/random_access.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from functools import partial
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def timer(fxn, max_time=5):
 8 |     N = 0
 9 |     total_time = 0
10 |     fxn()
11 |     while total_time < max_time:
12 |         start = time.perf_counter()
13 |         fxn()
14 |         total_time += time.perf_counter() - start
15 |         N += 1
16 |     return total_time / N
17 | 
18 | 
19 | def task(A, target):
20 |     result = 0
21 |     i = 0
22 |     N = 0
23 |     while result < target:
24 |         r = A[i]
25 |         result += r
26 |         i = A[i]
27 |         N += 1
28 |     return N
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     N = 1000
33 |     print(f"Testing with array of length {N}")
34 | 
35 |     A_py = (torch.rand(N) * N).type(torch.int).to("cuda:0")
36 |     A_np = A_py.cpu().numpy()
37 | 
38 |     t_py = timer(partial(task, A_py, 500))
39 |     t_np = timer(partial(task, A_np, 500))
40 |     print(f"PyTorch took: {t_py:0.3e}s")
41 |     print(f"Numpy took:   {t_np:0.3e}s")
42 |     print(f"Numpy is {100 - t_np/t_py*100:0.2f}% faster")
43 | 


--------------------------------------------------------------------------------
/07_compiling/pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | pytorch
2 | 


--------------------------------------------------------------------------------
/08_concurrency/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/.notempty


--------------------------------------------------------------------------------
/08_concurrency/cralwer/asyncio/crawler.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import random
 3 | import string
 4 | 
 5 | import aiohttp
 6 | 
 7 | 
 8 | def generate_urls(base_url, num_urls):
 9 |     for i in range(num_urls):
10 |         yield base_url + "".join(random.sample(string.ascii_lowercase, 10))
11 | 
12 | 
13 | def chunked_http_client(num_chunks):
14 |     """
15 |     Returns a function that can fetch from a URL, ensuring that only
16 |     "num_chunks" of simultaneous connects are made.
17 |     """
18 |     semaphore = asyncio.Semaphore(num_chunks)  # <1>
19 | 
20 |     async def http_get(url, client_session):  # <2>
21 |         nonlocal semaphore
22 |         async with semaphore:
23 |             async with client_session.request("GET", url) as response:
24 |                 return await response.content.read()
25 | 
26 |     return http_get
27 | 
28 | 
29 | async def run_experiment(base_url, num_iter=1000):
30 |     urls = generate_urls(base_url, num_iter)
31 |     http_client = chunked_http_client(100)
32 |     responses_sum = 0
33 |     async with aiohttp.ClientSession() as client_session:
34 |         tasks = [http_client(url, client_session) for url in urls]  # <3>
35 |         for future in asyncio.as_completed(tasks):  # <4>
36 |             data = await future
37 |             responses_sum += len(data)
38 |     return responses_sum
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     import time
43 | 
44 |     loop = asyncio.get_event_loop()
45 |     delay = 100
46 |     num_iter = 1000
47 | 
48 |     start = time.time()
49 |     result = loop.run_until_complete(
50 |         run_experiment(
51 |             f"http://127.0.0.1:8080/add?name=asyncio&delay={delay}&", num_iter
52 |         )
53 |     )
54 |     end = time.time()
55 |     print(f"Result: {result}, Time: {end - start}")
56 | 


--------------------------------------------------------------------------------
/08_concurrency/cralwer/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ulimit -n 2048
 4 | 
 5 | (python server.py --port=8080 &> /dev/null) &
 6 | server_pid=$!
 7 | sleep 1 # wait for server to be ready
 8 | 
 9 | for i in asyncio gevent tornado serial
10 |     do 
11 |         pushd $i
12 |         python crawler.py
13 |         popd
14 | done
15 | 
16 | curl "localhost:8080/add?flush=True"
17 | kill $server_pid
18 | 
19 | mkdir images
20 | python visualize.py
21 | 


--------------------------------------------------------------------------------
/08_concurrency/cralwer/gevent/crawler.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import string
 3 | import urllib.error
 4 | import urllib.parse
 5 | import urllib.request
 6 | from contextlib import closing
 7 | 
 8 | import gevent
 9 | from gevent import monkey
10 | from gevent.lock import Semaphore
11 | 
12 | monkey.patch_socket()
13 | 
14 | 
15 | def generate_urls(base_url, num_urls):
16 |     for i in range(num_urls):
17 |         yield base_url + "".join(random.sample(string.ascii_lowercase, 10))
18 | 
19 | 
20 | def download(url, semaphore):
21 |     with semaphore:  # <2>
22 |         with closing(urllib.request.urlopen(url)) as data:
23 |             return data.read()
24 | 
25 | 
26 | def chunked_requests(urls, chunk_size=100):
27 |     """
28 |     Given an iterable of urls, this function will yield back the contents of the
29 |     URLs. The requests will be batched up in "chunk_size" batches using a
30 |     semaphore
31 |     """
32 |     semaphore = Semaphore(chunk_size)  # <1>
33 |     requests = [gevent.spawn(download, u, semaphore) for u in urls]  # <3>
34 |     for response in gevent.iwait(requests):
35 |         yield response
36 | 
37 | 
38 | def run_experiment(base_url, num_iter=1000):
39 |     urls = generate_urls(base_url, num_iter)
40 |     response_futures = chunked_requests(urls, 100)  # <4>
41 |     response_size = sum(len(r.value) for r in response_futures)
42 |     return response_size
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     import time
47 | 
48 |     delay = 100
49 |     num_iter = 1000
50 |     base_url = f"http://127.0.0.1:8080/add?name=gevent&delay={delay}&"
51 | 
52 |     start = time.time()
53 |     result = run_experiment(base_url, num_iter)
54 |     end = time.time()
55 |     print(f"Result: {result}, Time: {end - start}")
56 | 


--------------------------------------------------------------------------------
/08_concurrency/cralwer/images/asyncio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/asyncio.png


--------------------------------------------------------------------------------
/08_concurrency/cralwer/images/gevent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/gevent.png


--------------------------------------------------------------------------------
/08_concurrency/cralwer/images/grequests.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/grequests.png


--------------------------------------------------------------------------------
/08_concurrency/cralwer/images/parallel_requests.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/parallel_requests.png


--------------------------------------------------------------------------------
/08_concurrency/cralwer/images/serial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/serial.png


--------------------------------------------------------------------------------
/08_concurrency/cralwer/images/tornado.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/cralwer/images/tornado.png


--------------------------------------------------------------------------------
/08_concurrency/cralwer/parallel_requests.json:
--------------------------------------------------------------------------------
1 | {"50": [[1, 25.65538716316223], [26, 1.0850036144256592], [51, 0.6126880645751953], [76, 0.4636659622192383], [101, 0.40830278396606445], [126, 0.3793010711669922], [151, 0.37442588806152344], [176, 0.38240480422973633], [201, 0.3782355785369873], [226, 0.3731365203857422], [251, 0.35320305824279785], [276, 0.363983154296875], [301, 0.3743610382080078], [326, 0.39357542991638184], [351, 0.38543701171875], [376, 0.39388275146484375], [401, 1.178931713104248], [426, 1.2373404502868652], [451, 1.2409429550170898], [476, 1.2445108890533447]], "300": [[1, 150.9001648426056], [26, 6.09351110458374], [51, 3.1123225688934326], [76, 2.1965770721435547], [101, 1.651137351989746], [126, 1.3618719577789307], [151, 1.2949111461639404], [176, 1.0845177173614502], [201, 1.0513508319854736], [226, 1.0078027248382568], [251, 0.8154821395874023], [276, 1.4151887893676758], [301, 0.8222417831420898], [326, 0.8313257694244385], [351, 0.765545129776001], [376, 0.7526142597198486], [401, 0.7465243339538574], [426, 0.735400915145874], [451, 1.464097499847412], [476, 1.4736013412475586]], "550": [[1, 276.0473265647888], [26, 11.085755825042725], [51, 5.614370822906494], [76, 3.968106269836426], [101, 2.901707887649536], [126, 2.3537800312042236], [151, 2.3039634227752686], [176, 1.8280274868011475], [201, 1.7996525764465332], [226, 1.7501981258392334], [251, 1.342069387435913], [276, 1.3174073696136475], [301, 1.3279151916503906], [326, 1.323756456375122], [351, 1.6952769756317139], [376, 1.2549707889556885], [401, 1.257312536239624], [426, 1.68515944480896], [451, 1.7300856113433838], [476, 1.7483623027801514]], "800": [[1, 401.2360224723816], [26, 16.11333131790161], [51, 8.121342658996582], [76, 5.696434736251831], [101, 4.1515562534332275], [126, 3.358698844909668], [151, 3.2921557426452637], [176, 2.590240478515625], [201, 2.5426173210144043], [226, 2.4937210083007812], [251, 1.8391218185424805], [276, 1.813462734222412], [301, 1.8160362243652344], [326, 1.817732810974121], [351, 1.7900586128234863], [376, 1.7599964141845703], [401, 1.7514441013336182], [426, 1.7195136547088623], [451, 1.9931211471557617], [476, 1.9955008029937744]]}


--------------------------------------------------------------------------------
/08_concurrency/cralwer/serial/crawler.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import string
 3 | 
 4 | import requests
 5 | 
 6 | 
 7 | def generate_urls(base_url, num_urls):
 8 |     """
 9 |     We add random characters to the end of the URL to break any caching
10 |     mechanisms in the requests library or the server
11 |     """
12 |     for i in range(num_urls):
13 |         yield base_url + "".join(random.sample(string.ascii_lowercase, 10))
14 | 
15 | 
16 | def run_experiment(base_url, num_iter=1000):
17 |     response_size = 0
18 |     for url in generate_urls(base_url, num_iter):
19 |         response = requests.get(url)
20 |         response_size += len(response.text)
21 |     return response_size
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     import time
26 | 
27 |     delay = 100
28 |     num_iter = 1000
29 |     base_url = f"http://127.0.0.1:8080/add?name=serial&delay={delay}&"
30 | 
31 |     start = time.time()
32 |     result = run_experiment(base_url, num_iter)
33 |     end = time.time()
34 |     print(f"Result: {result}, Time: {end - start}")
35 | 


--------------------------------------------------------------------------------
/08_concurrency/cralwer/server.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | from collections import defaultdict
 4 | 
 5 | from tornado import gen, httpserver, ioloop, options, web
 6 | 
 7 | options.define("port", default=8080, help="Port to serve on")
 8 | 
 9 | 
10 | class AddMetric(web.RequestHandler):
11 |     metric_data = defaultdict(list)
12 | 
13 |     async def get(self):
14 |         if self.get_argument("flush", False):
15 |             json.dump(self.metric_data, open("metric_data.json", "w+"))
16 |         else:
17 |             name = self.get_argument("name")
18 |             try:
19 |                 delay = int(self.get_argument("delay", 1024))
20 |             except ValueError:
21 |                 raise web.HTTPError(400, reason="Invalid value for delay")
22 | 
23 |             start = time.time()
24 |             await gen.sleep(delay / 1000.0)
25 |             self.write(".")
26 |             self.finish()
27 |             end = time.time()
28 |             self.metric_data[name].append(
29 |                 {"start": start, "end": end, "dt": end - start}
30 |             )
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     options.parse_command_line()
35 |     port = options.options.port
36 | 
37 |     application = web.Application([(r"/add", AddMetric)])
38 | 
39 |     http_server = httpserver.HTTPServer(application)
40 |     http_server.listen(port)
41 |     print(("Listening on port: {}".format(port)))
42 |     ioloop.IOLoop.instance().start()
43 | 


--------------------------------------------------------------------------------
/08_concurrency/cralwer/tornado/crawler.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import random
 3 | import string
 4 | from functools import partial
 5 | 
 6 | from tornado.httpclient import AsyncHTTPClient
 7 | 
 8 | AsyncHTTPClient.configure(
 9 |     "tornado.curl_httpclient.CurlAsyncHTTPClient", max_clients=100  # <1>
10 | )
11 | 
12 | 
13 | def generate_urls(base_url, num_urls):
14 |     for i in range(num_urls):
15 |         yield base_url + "".join(random.sample(string.ascii_lowercase, 10))
16 | 
17 | 
18 | async def run_experiment(base_url, num_iter=1000):
19 |     http_client = AsyncHTTPClient()
20 |     urls = generate_urls(base_url, num_iter)
21 |     response_sum = 0
22 |     tasks = [http_client.fetch(url) for url in urls]  # <2>
23 |     for task in asyncio.as_completed(tasks):  # <3>
24 |         response = await task  # <4>
25 |         response_sum += len(response.body)
26 |     return response_sum
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     import time
31 | 
32 |     delay = 100
33 |     num_iter = 1000
34 |     run_func = partial(
35 |         run_experiment,
36 |         f"http://127.0.0.1:8080/add?name=tornado&delay={delay}&",
37 |         num_iter,
38 |     )
39 | 
40 |     start = time.time()
41 |     result = asyncio.run(run_func)  # <5>
42 |     end = time.time()
43 |     print(f"Result: {result}, Time: {end - start}")
44 | 


--------------------------------------------------------------------------------
/08_concurrency/cralwer/tornado_callback/crawler.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import string
 3 | from functools import partial
 4 | 
 5 | from tornado import ioloop
 6 | from tornado.httpclient import AsyncHTTPClient
 7 | 
 8 | AsyncHTTPClient.configure(
 9 |     "tornado.curl_httpclient.CurlAsyncHTTPClient", max_clients=100
10 | )
11 | 
12 | 
13 | def generate_urls(base_url, num_urls):
14 |     for i in range(num_urls):
15 |         yield base_url + "".join(random.sample(string.ascii_lowercase, 10))
16 | 
17 | 
18 | def fetch_urls(urls, callback):
19 |     http_client = AsyncHTTPClient()
20 |     urls = list(urls)
21 |     responses = []
22 | 
23 |     def _finish_fetch_urls(result):
24 |         responses.append(result)
25 |         if len(responses) == len(urls):
26 |             callback(responses)
27 | 
28 |     for url in urls:
29 |         http_client.fetch(url, callback=_finish_fetch_urls)
30 | 
31 | 
32 | def run_experiment(base_url, num_iter=500, callback=None):
33 |     urls = generate_urls(base_url, num_iter)
34 |     callback_passthrou = partial(_finish_run_experiment, callback=callback)
35 |     fetch_urls(urls, callback_passthrou)
36 | 
37 | 
38 | def _finish_run_experiment(responses, callback):
39 |     response_sum = sum(len(r.body) for r in responses)
40 |     print(response_sum)
41 |     callback()
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     import time
46 | 
47 |     delay = 100
48 |     num_iter = 500
49 |     base_url = "http://127.0.0.1:8080/add?name=tornado_callback&delay={}&".format(delay)
50 | 
51 |     _ioloop = ioloop.IOLoop.instance()
52 |     _ioloop.add_callback(run_experiment, base_url, num_iter, _ioloop.stop)
53 | 
54 |     start = time.time()
55 |     _ioloop.start()
56 |     end = time.time()
57 |     print((end - start))
58 | 


--------------------------------------------------------------------------------
/08_concurrency/cralwer/visualize.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pylab as py
 4 | 
 5 | if __name__ == "__main__":
 6 |     raw_data = json.load(open("metric_data.json"))
 7 | 
 8 |     for name, values in raw_data.items():
 9 |         print(name)
10 |         py.figure()
11 |         py.title("Call timeline for {}".format(name))
12 |         min_t = values[0]["start"]
13 |         for i, data in enumerate(values):
14 |             py.plot([data["start"] - min_t, data["end"] - min_t], [i, i])
15 |         py.xlabel("Time")
16 |         py.ylabel("Request Number")
17 |         py.savefig("images/{}.png".format(name))
18 | 


--------------------------------------------------------------------------------
/08_concurrency/requirements.txt:
--------------------------------------------------------------------------------
 1 | bcrypt
 2 | 
 3 | uvloop
 4 | 
 5 | tornado
 6 | pycurl
 7 | 
 8 | aiohttp
 9 | aiodns
10 | cchardet
11 | 
12 | gevent
13 | 
14 | requests
15 | 


--------------------------------------------------------------------------------
/08_concurrency/workload/images/async_callgraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/async_callgraph.png


--------------------------------------------------------------------------------
/08_concurrency/workload/images/workload_async_batches_no-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_async_batches_no-IO.png


--------------------------------------------------------------------------------
/08_concurrency/workload/images/workload_async_batches_no-IO_serial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_async_batches_no-IO_serial.png


--------------------------------------------------------------------------------
/08_concurrency/workload/images/workload_async_no-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_async_no-IO.png


--------------------------------------------------------------------------------
/08_concurrency/workload/images/workload_batches_no-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_batches_no-IO.png


--------------------------------------------------------------------------------
/08_concurrency/workload/images/workload_file-IO_no-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_file-IO_no-IO.png


--------------------------------------------------------------------------------
/08_concurrency/workload/images/workload_no-IO_serial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/08_concurrency/workload/images/workload_no-IO_serial.png


--------------------------------------------------------------------------------
/08_concurrency/workload/server.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from tornado import httpserver, ioloop, options, web
 4 | 
 5 | options.define("port", default=8080, help="Port to serve on")
 6 | response = '{{"result": "{result}", "t": 0.5}}'
 7 | 
 8 | 
 9 | class Addresult(web.RequestHandler):
10 |     async def post(self):
11 |         result = self.request.body
12 |         await asyncio.sleep(0.1)
13 |         self.write(response.format(result=result))
14 |         self.set_header("Content-Type", "application/json")
15 |         self.finish()
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     options.parse_command_line()
20 |     port = options.options.port
21 | 
22 |     application = web.Application([(r"/add", Addresult)])
23 | 
24 |     http_server = httpserver.HTTPServer(application)
25 |     http_server.listen(port)
26 |     print(("Listening on port: {}".format(port)))
27 |     ioloop.IOLoop.instance().start()
28 | 


--------------------------------------------------------------------------------
/08_concurrency/workload/workloads.json:
--------------------------------------------------------------------------------
1 | {"async": [[600, 8, 9.769921675004298], [400, 10, 25.225514382997062], [400, 11, 50.13304948201403], [400, 12, 99.9148739350203]], "serial": [[600, 8, 71.0296336459869], [400, 10, 66.04494852598873], [400, 11, 90.86424219899345], [400, 12, 140.64069702799316]], "no IO": [[600, 8, 9.369766311021522], [400, 10, 24.93677038900205], [400, 11, 49.76722630299628], [400, 12, 99.49409197299974]], "file IO": [[600, 8, 9.36683275300311], [400, 10, 24.921224841004005], [400, 11, 49.74984498100821], [400, 12, 99.56621096501476]], "batches": [[600, 8, 10.217370290978579], [400, 10, 25.483805308002047], [400, 11, 50.374549086991465], [400, 12, 100.2910156819853]], "async+uvloop": [[600, 8, 9.759301794023486], [400, 10, 25.212604185013333], [400, 11, 50.07677851500921], [400, 12, 99.95755646598991]]}


--------------------------------------------------------------------------------
/09_multiprocessing/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/09_multiprocessing/.notempty


--------------------------------------------------------------------------------
/09_multiprocessing/locking/ex1_lock.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import os
 3 | import fasteners
 4 | # python -m timeit -s "import ex1_lock" "ex1_lock.run_workers()"
 5 | # 400ms
 6 | 
 7 | 
 8 | MAX_COUNT_PER_PROCESS = 1000
 9 | FILENAME = "count.txt"
10 | 
11 | 
12 | def work_smaller_chunks(filename, max_count):
13 |     @fasteners.interprocess_locked('/tmp/tmp_lock')
14 |     def work_write(filename):
15 |         f = open(filename, "r")
16 |         try:
17 |             nbr = int(f.read())
18 |         except ValueError as err:
19 |             print("File is empty, starting to count from 0, error: " + str(err))
20 |             nbr = 0
21 |         #print(os.getpid())
22 |         f = open(filename, "w")
23 |         f.write(str(nbr + 1) + '\n')
24 |         f.close()
25 | 
26 |     for n in range(max_count):
27 |         work_write(filename)
28 | 
29 | @fasteners.interprocess_locked('/tmp/tmp_lock')
30 | def work(filename, max_count):
31 |     for n in range(max_count):
32 |         f = open(filename, "r")
33 |         try:
34 |             nbr = int(f.read())
35 |         except ValueError as err:
36 |             print("File is empty, starting to count from 0, error: " + str(err))
37 |             nbr = 0
38 |         f = open(filename, "w")
39 |         f.write(str(nbr + 1) + '\n')
40 |         f.close()
41 | 
42 | 
43 | def run_workers():
44 |     NBR_PROCESSES = 4
45 |     total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS
46 |     print("Starting {} process(es) to count to {}".format(NBR_PROCESSES, total_expected_count))
47 |     # reset counter
48 |     f = open(FILENAME, "w")
49 |     f.close()
50 | 
51 |     processes = []
52 |     for process_nbr in range(NBR_PROCESSES):
53 |         p = multiprocessing.Process(target=work, args=(FILENAME, MAX_COUNT_PER_PROCESS))
54 |         p.start()
55 |         processes.append(p)
56 | 
57 |     for p in processes:
58 |         p.join()
59 | 
60 |     print("Expecting to see a count of {}".format(total_expected_count))
61 |     print("{} contains:".format(FILENAME))
62 |     os.system('more ' + FILENAME)
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     run_workers()
67 | 


--------------------------------------------------------------------------------
/09_multiprocessing/locking/ex1_nolock1.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import os
 3 | # python -m timeit -s "import ex1_nolock" "ex1_nolock.run_workers()"
 4 | # 71ms
 5 | 
 6 | MAX_COUNT_PER_PROCESS = 1000
 7 | FILENAME = "count.txt"
 8 | 
 9 | 
10 | def work(filename, max_count):
11 |     for n in range(max_count):
12 |         f = open(filename, "r")
13 |         try:
14 |             nbr = int(f.read())
15 |         except ValueError as err:
16 |             print("File is empty, starting to count from 0, error: " + str(err))
17 |             nbr = 0
18 |         f = open(filename, "w")
19 |         f.write(str(nbr + 1) + '\n')
20 |         f.close()
21 | 
22 | 
23 | def run_workers():
24 |     NBR_PROCESSES = 1
25 |     total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS
26 |     print("Starting {} process(es) to count to {}".format(NBR_PROCESSES, total_expected_count))
27 |     # reset counter
28 |     f = open(FILENAME, "w")
29 |     f.close()
30 | 
31 |     processes = []
32 |     for process_nbr in range(NBR_PROCESSES):
33 |         p = multiprocessing.Process(target=work, args=(FILENAME, MAX_COUNT_PER_PROCESS))
34 |         p.start()
35 |         processes.append(p)
36 | 
37 |     for p in processes:
38 |         p.join()
39 | 
40 |     print("Expecting to see a count of {}".format(total_expected_count))
41 |     print("{} contains:".format(FILENAME))
42 |     os.system('more ' + FILENAME)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     run_workers()
47 | 


--------------------------------------------------------------------------------
/09_multiprocessing/locking/ex1_nolock4.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import os
 3 | # python -m timeit -s "import ex1_nolock" "ex1_nolock.run_workers()"
 4 | # 71ms
 5 | 
 6 | MAX_COUNT_PER_PROCESS = 1000
 7 | FILENAME = "count.txt"
 8 | 
 9 | 
10 | def work(filename, max_count):
11 |     for n in range(max_count):
12 |         f = open(filename, "r")
13 |         try:
14 |             nbr = int(f.read())
15 |         except ValueError as err:
16 |             print("File is empty, starting to count from 0, error: " + str(err))
17 |             nbr = 0
18 |         f = open(filename, "w")
19 |         f.write(str(nbr + 1) + '\n')
20 |         f.close()
21 | 
22 | 
23 | def run_workers():
24 |     NBR_PROCESSES = 4
25 |     total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS
26 |     print("Starting {} process(es) to count to {}".format(NBR_PROCESSES, total_expected_count))
27 |     # reset counter
28 |     f = open(FILENAME, "w")
29 |     f.close()
30 | 
31 |     processes = []
32 |     for process_nbr in range(NBR_PROCESSES):
33 |         p = multiprocessing.Process(target=work, args=(FILENAME, MAX_COUNT_PER_PROCESS))
34 |         p.start()
35 |         processes.append(p)
36 | 
37 |     for p in processes:
38 |         p.join()
39 | 
40 |     print("Expecting to see a count of {}".format(total_expected_count))
41 |     print("{} contains:".format(FILENAME))
42 |     os.system('more ' + FILENAME)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     run_workers()
47 | 


--------------------------------------------------------------------------------
/09_multiprocessing/locking/ex2_lock.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | # python -m timeit -s "import ex2_lock" "ex2_lock.run_workers()"
 3 | # 19ms using lock.acquire
 4 | # 21ms using with.lock
 5 | 
 6 | 
 7 | def work(value, max_count, lock):
 8 |     for n in range(max_count):
 9 |         with lock:
10 |             value.value += 1
11 |         #lock.acquire()
12 |         #value.value += 1
13 |         #lock.release()
14 | 
15 | 
16 | def run_workers():
17 |     NBR_PROCESSES = 4
18 |     MAX_COUNT_PER_PROCESS = 1000
19 |     total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS
20 |     processes = []
21 |     lock = multiprocessing.Lock()
22 |     value = multiprocessing.Value('i', 0)
23 |     for process_nbr in range(NBR_PROCESSES):
24 |         p = multiprocessing.Process(target=work, args=(value, MAX_COUNT_PER_PROCESS, lock))
25 |         p.start()
26 |         processes.append(p)
27 | 
28 |     # wait for the processes to finish
29 |     for p in processes:
30 |         p.join()
31 | 
32 |     # print the final value
33 |     print("Expecting to see a count of {}".format(total_expected_count))
34 |     print("We have counted to {}".format(value.value))
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     run_workers()
39 | 


--------------------------------------------------------------------------------
/09_multiprocessing/locking/ex2_lock_rawvalue.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | # python -m timeit -s "import ex2_lock_rawvalue" "ex2_lock_rawvalue.run_workers()"
 3 | # 18.5ms (slightly faster?)
 4 | # 5ms  lock.acquire
 5 | # 11ms with lock
 6 | 
 7 | 
 8 | def work(value, max_count, lock):
 9 |     for n in range(max_count):
10 |         with lock:
11 |             value.value += 1
12 |         #lock.acquire()
13 |         #value.value += 1
14 |         #lock.release()
15 | 
16 | 
17 | def run_workers():
18 |     NBR_PROCESSES = 4
19 |     MAX_COUNT_PER_PROCESS = 1000
20 |     total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS
21 |     processes = []
22 |     lock = multiprocessing.Lock()
23 |     value = multiprocessing.RawValue('i', 0)
24 |     for process_nbr in range(NBR_PROCESSES):
25 |         p = multiprocessing.Process(target=work, args=(value, MAX_COUNT_PER_PROCESS, lock))
26 |         p.start()
27 |         processes.append(p)
28 | 
29 |     # wait for the processes to finish
30 |     for p in processes:
31 |         p.join()
32 | 
33 |     # print the final value
34 |     print("Expecting to see a count of {}".format(total_expected_count))
35 |     print("We have counted to {}".format(value.value))
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     run_workers()
40 | 


--------------------------------------------------------------------------------
/09_multiprocessing/locking/ex2_nolock.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | # python -m timeit -s "import ex2_nolock" "ex2_nolock.run_workers()"
 3 | # 12ms
 4 | 
 5 | 
 6 | def work(value, max_count):
 7 |     for n in range(max_count):
 8 |         value.value += 1
 9 | 
10 | 
11 | def run_workers():
12 |     NBR_PROCESSES = 4
13 |     MAX_COUNT_PER_PROCESS = 1000
14 |     total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS
15 |     processes = []
16 |     value = multiprocessing.Value('i', 0)
17 |     for process_nbr in range(NBR_PROCESSES):
18 |         p = multiprocessing.Process(target=work, args=(value, MAX_COUNT_PER_PROCESS))
19 |         p.start()
20 |         processes.append(p)
21 | 
22 |     # wait for the processes to finish
23 |     for p in processes:
24 |         p.join()
25 | 
26 |     # print the final value
27 |     print("Expecting to see a count of {}".format(total_expected_count))
28 |     print("We have counted to {}".format(value.value))
29 | 
30 | if __name__ == "__main__":
31 |     run_workers()
32 | 


--------------------------------------------------------------------------------
/09_multiprocessing/locking/ex3_redis.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import redis
 3 | # python -m timeit -s "import ex3_redis" "ex3_redis.run_workers()"
 4 | # 81 ms
 5 | 
 6 | 
 7 | rds = redis.StrictRedis()
 8 | REDIS_KEY = "ex3_redis_key"
 9 | 
10 | 
11 | def work(max_count):
12 |     for n in range(max_count):
13 |         rds.incr(REDIS_KEY)
14 | 
15 | 
16 | def run_workers():
17 |     NBR_PROCESSES = 4
18 |     MAX_COUNT_PER_PROCESS = 1000
19 |     total_expected_count = NBR_PROCESSES * MAX_COUNT_PER_PROCESS
20 | 
21 |     rds[REDIS_KEY] = 0
22 | 
23 |     processes = []
24 |     for process_nbr in range(NBR_PROCESSES):
25 |         p = multiprocessing.Process(target=work, args=(MAX_COUNT_PER_PROCESS,))
26 |         p.start()
27 |         processes.append(p)
28 | 
29 |     for p in processes:
30 |         p.join()
31 | 
32 |     # print the final value
33 |     print("Expecting to see a count of {}".format(total_expected_count))
34 |     print("We have counted to {}".format(rds[REDIS_KEY]))
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     run_workers()
39 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_lists_parallel/pi_graph_speed_tests.py:
--------------------------------------------------------------------------------
 1 | """Graph execution time for serial, threaded and processes forms of Pi estimation with lists"""
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # timings generated using
 6 | #  pi_lists_series, pi_lists_parallel 1 2 4 8, pi_lists_parallel --processes 1 2 4 8
 7 | speeds = np.array([[71.1],
 8 |                    [71.1, 71.0, 70.7, 71.0],
 9 |                    [71.0, 37.1, 18.1, 18.7]])
10 | 
11 | nbr_cores = np.array([[1],
12 |                       [1, 2, 4, 8],
13 |                       [1, 2, 4, 8]])
14 | 
15 | labels = np.array(["Serial", "Threads", "Processes"])
16 | 
17 | plt.figure(1)
18 | plt.clf()
19 | markers = ['-.o', '--x', '-x']
20 | for nc, sp, label, mk in zip(nbr_cores, speeds, labels, markers):
21 |     plt.plot(nc, sp, mk, label=label, linewidth=2)
22 | plt.annotate("Serial and Threads have similar execution time", (nbr_cores[0][0]+0.2, speeds[0][0]+0.9) )
23 | plt.legend(loc="lower left", framealpha=0.8)
24 | plt.ylim(0, 80)
25 | plt.xlim(0.5, 8.5)
26 | plt.ylabel("Execution time (seconds) - smaller is better")
27 | plt.xlabel("Number of workers")
28 | plt.title("Time to estimate Pi using objects with 100,000,000\ndart throws in series, threaded and with processes")
29 | #plt.grid()
30 | #plt.show()
31 | plt.tight_layout()
32 | plt.savefig("09_pi_lists_graph_speed_tests_threaded_processes.png")
33 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_lists_parallel/pi_lists_parallel_joblib.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import os
 3 | import time
 4 | import argparse
 5 | from joblib import Parallel, delayed
 6 | from pi_lists_parallel import estimate_nbr_points_in_quarter_circle
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     nbr_samples_in_total = int(1e8)
11 |     nbr_parallel_blocks = 8
12 | 
13 |     nbr_samples_per_worker = int(nbr_samples_in_total / nbr_parallel_blocks)
14 |     print("Making {:,} samples per {} worker".format(nbr_samples_per_worker, nbr_parallel_blocks))
15 |     t1 = time.time()
16 |     nbr_in_quarter_unit_circles = Parallel(n_jobs=nbr_parallel_blocks, verbose=1)(delayed(estimate_nbr_points_in_quarter_circle)(nbr_samples_per_worker) for sample_idx in range(nbr_parallel_blocks))
17 |     pi_estimate = sum(nbr_in_quarter_unit_circles) * 4 / float(nbr_samples_in_total)
18 |     print("Estimated pi", pi_estimate)
19 |     print("Delta:", time.time() - t1)
20 | 
21 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_lists_parallel/pi_lists_parallel_joblib_cache.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import os
 3 | import time
 4 | import argparse
 5 | from joblib import Parallel, delayed
 6 | from joblib import Memory
 7 | from pi_lists_parallel import estimate_nbr_points_in_quarter_circle as estimate_nbr_points_in_quarter_circle_orig
 8 | 
 9 | memory = Memory("./joblib_cache", verbose=0)
10 | 
11 | @memory.cache
12 | def estimate_nbr_points_in_quarter_circle_with_idx(nbr_estimates, idx):
13 |     print(f"Executing estimate_nbr_points_in_quarter_circle with {nbr_estimates} on sample {idx} on pid {os.getpid()}")
14 |     nbr_trials_in_quarter_unit_circle = 0
15 |     for step in range(int(nbr_estimates)):
16 |         x = random.uniform(0, 1)
17 |         y = random.uniform(0, 1)
18 |         is_in_unit_circle = x * x + y * y <= 1.0
19 |         nbr_trials_in_quarter_unit_circle += is_in_unit_circle
20 | 
21 |     return nbr_trials_in_quarter_unit_circle
22 | 
23 | 
24 | estimate_nbr_points_in_quarter_circle = memory.cache(estimate_nbr_points_in_quarter_circle_orig)
25 | 
26 | if __name__ == "__main__":
27 |     nbr_samples_in_total = int(1e8)
28 |     nbr_parallel_blocks = 8
29 | 
30 |     nbr_samples_per_worker = int(nbr_samples_in_total / nbr_parallel_blocks)
31 |     print("Making {:,} samples per {} worker".format(nbr_samples_per_worker, nbr_parallel_blocks))
32 |     t1 = time.time()
33 |     # beware if you don't have a sample_idx, you cache the same result!
34 |     nbr_in_quarter_unit_circles = Parallel(n_jobs=nbr_parallel_blocks)(delayed(estimate_nbr_points_in_quarter_circle_with_idx)(nbr_samples_per_worker, idx) for idx in range(nbr_parallel_blocks))
35 |     #nbr_in_quarter_unit_circles = Parallel(n_jobs=nbr_parallel_blocks)(delayed(estimate_nbr_points_in_quarter_circle)(nbr_samples_per_worker) for idx in range(nbr_parallel_blocks))
36 |     pi_estimate = sum(nbr_in_quarter_unit_circles) * 4 / float(nbr_samples_in_total)
37 |     print("Estimated pi", pi_estimate)
38 |     print("Delta:", time.time() - t1)
39 | 
40 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_lists_parallel/pi_lists_series.py:
--------------------------------------------------------------------------------
 1 | """Estimate Pi using 1 large array"""
 2 | import time
 3 | import numpy as np
 4 | import pi_lists_parallel
 5 | 
 6 | nbr_samples_in_total = int(1e8)
 7 | 
 8 | t1 = time.time()
 9 | nbr_in_circle = pi_lists_parallel.estimate_nbr_points_in_quarter_circle(nbr_samples_in_total)
10 | print("Took {0:2f}s".format(time.time() - t1))
11 | pi_estimate = float(nbr_in_circle) / nbr_samples_in_total * 4
12 | print("Estimated pi", pi_estimate)
13 | print("Pi", np.pi)
14 | 
15 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_monte_carlo_diagram/pi_plot_monte_carlo_example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | if __name__ == "__main__":
 6 |     np.random.seed(0)  # force repeatable plt
 7 |     nbr_items = int(1E4)
 8 |     xs = np.random.uniform(0, 1, nbr_items)
 9 |     ys = np.random.uniform(0, 1, nbr_items)
10 |     estimate_inside_quarter_unit_circle = (xs * xs + ys * ys) <= 1
11 |     nbr_trials_in_quarter_unit_circle = np.sum(estimate_inside_quarter_unit_circle)
12 |     pi = (nbr_trials_in_quarter_unit_circle * 4) / nbr_items  # estimate for the full circle
13 | 
14 |     plt.figure(1, figsize=(8, 8))
15 |     plt.clf()
16 |     plt.plot(xs[estimate_inside_quarter_unit_circle], ys[estimate_inside_quarter_unit_circle], 'bx')
17 |     plt.plot(xs[estimate_inside_quarter_unit_circle == False], ys[estimate_inside_quarter_unit_circle == False], 'g.')
18 | 
19 |     unit_circle_xs = np.arange(0, 1, 0.001)
20 |     unit_circle_ys = np.sin(np.arccos(unit_circle_xs))
21 |     plt.plot(unit_circle_xs, unit_circle_ys, linewidth=2, c="k")
22 |     plt.xticks([0.0, 1.0])
23 |     plt.yticks([0.0, 1.0])
24 |     plt.title("Pi estimated as {} using \n{:,} Monte Carlo dart throws".format(pi, int(nbr_items)))
25 |     #plt.show()
26 |     plt.tight_layout()
27 |     plt.savefig("09_pi_plot_monte_carlo_example.png")
28 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_processes_parallel/pi_graph_speed_tests.py:
--------------------------------------------------------------------------------
 1 | """Graph execution time for serial, threaded and processes forms of Pi estimation with numpy"""
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # timings generated using
 6 | # pi_numpy_serial_blocks.py
 7 | # (serial.py - same as serial blocks but for 1 large array only)
 8 | # pi_numpy_parallel_worker.py
 9 | speeds = np.array([[2.46],
10 |                    [2.46, 2.19, 2.13, 2.05],
11 |                    [2.46, 1.61, 0.88, 0.85]])
12 | 
13 | nbr_cores = np.array([[1],
14 |                       [1, 2, 4, 8],
15 |                       [1, 2, 4, 8]])
16 | 
17 | labels = np.array(["Serial", "Threads", "Processes"])
18 | 
19 | plt.figure(1)
20 | plt.clf()
21 | markers = ['-.x', '--x', '-x']
22 | for nc, sp, label, mk in zip(nbr_cores, speeds, labels, markers):
23 |     plt.plot(nc, sp, mk, label=label, linewidth=2)
24 | plt.legend(loc="lower left", framealpha=0.8)
25 | plt.ylim(0, 3)
26 | plt.xlim(0.5, 8.5)
27 | plt.ylabel("Execution time (seconds) - smaller is better")
28 | plt.xlabel("Number of workers")
29 | plt.title("Time to estimate Pi using numpy with 100,000,000\ndart throws in series, threaded and with processes")
30 | #plt.grid()
31 | #plt.show()
32 | plt.tight_layout()
33 | plt.savefig("09_pi_numpy_graph_speed_tests_threaded_processes.png")
34 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_processes_parallel/pi_numpy_serial.py:
--------------------------------------------------------------------------------
 1 | """Estimate Pi using 1 large array"""
 2 | import time
 3 | import numpy as np
 4 | import pi_numpy_parallel_worker
 5 | 
 6 | nbr_samples_in_total = int(1e8)
 7 | 
 8 | t1 = time.time()
 9 | nbr_in_circle = pi_numpy_parallel_worker.estimate_nbr_points_in_quarter_circle(nbr_samples_in_total)
10 | print("Took {}s".format(time.time() - t1))
11 | pi_estimate = float(nbr_in_circle) / nbr_samples_in_total * 4
12 | print("Estimated pi", pi_estimate)
13 | print("Pi", np.pi)
14 | 


--------------------------------------------------------------------------------
/09_multiprocessing/pi_estimation/pi_processes_parallel/pi_numpy_serial_blocks.py:
--------------------------------------------------------------------------------
 1 | """Estimate Pi using blocks of serial work on 1 CPU"""
 2 | import time
 3 | #from multiprocessing.dummy import Pool
 4 | import numpy as np
 5 | 
 6 | 
 7 | def estimate_nbr_points_in_circle(nbr_samples):
 8 |     # set random seed for numpy in each new process
 9 |     # else the fork will mean they all share the same state
10 |     np.random.seed()
11 |     xs = np.random.uniform(0, 1, nbr_samples)
12 |     ys = np.random.uniform(0, 1, nbr_samples)
13 |     estimate_inside_quarter_unit_circle = (xs * xs + ys * ys) <= 1
14 |     nbr_trials_in_quarter_unit_circle = np.sum(estimate_inside_quarter_unit_circle)
15 |     return nbr_trials_in_quarter_unit_circle
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     nbr_samples_in_total = (1e8)
20 | 
21 |     nbr_parallel_blocks = 4
22 |     nbr_samples_per_worker = int(nbr_samples_in_total / nbr_parallel_blocks)
23 |     print("Making {} samples per worker".format(nbr_samples_per_worker))
24 | 
25 |     t1 = time.time()
26 |     nbr_in_circle = 0
27 |     for npb in range(nbr_parallel_blocks):
28 |         nbr_in_circle += estimate_nbr_points_in_circle(nbr_samples_per_worker)
29 |     print("Took {}s".format(time.time() - t1))
30 |     pi_estimate = float(nbr_in_circle) / nbr_samples_in_total * 4
31 |     print("Estimated pi", pi_estimate)
32 |     print("Pi", np.pi)
33 | 


--------------------------------------------------------------------------------
/09_multiprocessing/prime_generation/plot_serial_vs_queue_times.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot as plt
 2 | 
 3 | 
 4 | plt.figure()
 5 | 
 6 | # primes_queue.py on job C
 7 | xs = [1, 2, 4, 8]
 8 | ys = [97, 97, 109, 111]
 9 | #plt.scatter(xs, ys, marker='x')
10 | plt.plot(xs, ys, '--x', label="Using Queues")
11 | plt.annotate("1 child process via Queues", (xs[0], ys[0]-5))
12 | 
13 | # primes_queue_less_work - not sure there's any point showing this?
14 | #xs = [1, 2, 4, 8]
15 | #ys = [57, 36, 48, 49]
16 | #plt.scatter(xs, ys, marker='v')
17 | 
18 | xs = [1]
19 | ys = [24]
20 | #plt.scatter(xs, ys, marker='o')
21 | plt.plot(xs, ys, '-o', label="No queue")
22 | plt.annotate("No queue", (xs[0], ys[0]))
23 | plt.xlim(0.5, 8.5)
24 | plt.ylim(0, 120)
25 | 
26 | plt.title("The overhead of Queues on lightweight tasks")
27 | plt.ylabel("Seconds (smaller is better)")
28 | plt.xlabel("Number of processes")
29 | plt.legend(loc="center right")
30 | 
31 | plt.draw()
32 | plt.tight_layout()
33 | plt.savefig("multiprocessing_serial_vs_queue_times.png")
34 | 


--------------------------------------------------------------------------------
/09_multiprocessing/prime_generation/primes.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import time
 3 | 
 4 | 
 5 | def check_prime(n):
 6 |     if n % 2 == 0:
 7 |         return False
 8 |     for i in range(3, int(math.sqrt(n)) + 1, 2):
 9 |         if n % i == 0:
10 |             return False
11 |     return True
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     primes = []
16 |     t1 = time.time()
17 |     #number_range = xrange(100000000, 100010000)  # A
18 |     #number_range = xrange(100000000, 100100000)  # B
19 |     number_range = range(100000000, 101000000)  # C
20 |     #number_range = xrange(1000000000, 1000100000)  # D
21 |     #number_range = xrange(100000000000, 100000100000)  # E
22 | 
23 |     for possible_prime in number_range:
24 |         if check_prime(possible_prime):
25 |             primes.append(possible_prime)
26 | 
27 |     print("Took:", time.time() - t1)
28 |     print(len(primes), primes[:10], primes[-10:])
29 | 


--------------------------------------------------------------------------------
/09_multiprocessing/prime_generation/primes_pool.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import time
 3 | import multiprocessing
 4 | #import numpy as np
 5 | import itertools
 6 | 
 7 | 
 8 | def check_prime(n):
 9 |     if n % 2 == 0:
10 |         return False
11 |     for i in xrange(3, int(math.sqrt(n)) + 1, 2):
12 |         if n % i == 0:
13 |             return False
14 |     return True
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     primes = []
19 |     NBR_PROCESSES = 4
20 |     pool = multiprocessing.Pool(processes=NBR_PROCESSES)
21 | 
22 |     t1 = time.time()
23 |     #number_range = xrange(100000000, 100010000)  # A
24 |     #number_range = xrange(100000000, 100100000)  # B
25 |     number_range = xrange(100000000, 101000000)  # C
26 |     #number_range = xrange(1000000000, 1000100000)  # D
27 |     #number_range = xrange(100000000000, 100000100000)  # E
28 | 
29 |     #are_primes = pool.map(check_prime, number_range)  # original
30 |     #primes = np.array(number_range)[np.array(are_primes)]  # original
31 |     #
32 |     # note using pool.map is fastest, but uses ram
33 |     # using pool.imap is slower but uses less ram
34 |     # pool.imap_unordered is even slower
35 |     are_primes = pool.map(check_prime, number_range)
36 |     primes = [p for p in itertools.compress(number_range, are_primes)]
37 | 
38 |     print "Took:", time.time() - t1
39 |     print len(primes), primes[:10], primes[-10:]
40 | 


--------------------------------------------------------------------------------
/09_multiprocessing/prime_validation/primes.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import timeit
 3 | 
 4 | 
 5 | def check_prime(n):
 6 |     if n % 2 == 0:
 7 |         return False
 8 |     from_i = 3
 9 |     to_i = math.sqrt(n) + 1
10 |     for i in range(from_i, int(to_i), 2):
11 |         if n % i == 0:
12 |             return False
13 |     return True
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     for label, nbr in [("trivial non-prime", 112272535095295),
18 |                        ("expensive non-prime18_1", 100109100129100369),
19 |                        ("expensive non-prime18_2", 100109100129101027),
20 |                        #("prime", 112272535095293)]:  # 15
21 |                        #("prime17",  10000000002065383)]
22 |                        ("prime18_1", 100109100129100151),
23 |                        ("prime18_2", 100109100129162907)]:
24 |                        #("prime23", 22360679774997896964091)]:
25 | 
26 |         time_costs = timeit.repeat(stmt="check_prime({})".format(nbr), repeat=20, number=1,
27 |                                    setup="from __main__ import check_prime")
28 |         print("{:24} ({}) {: 3.6f}s".format(label, nbr, min(time_costs)))
29 | 


--------------------------------------------------------------------------------
/09_multiprocessing/prime_validation/primes_factor_test.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import time
 3 | 
 4 | 
 5 | def check_prime(n):
 6 |     if n % 2 == 0:
 7 |         return False, 2
 8 |     for i in xrange(3, int(math.sqrt(n)) + 1):
 9 |         if n % i == 0:
10 |             return False, i
11 |     return True, None
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     primes = []
16 |     t1 = time.time()
17 | 
18 |     #  100109100129100151 big prime
19 |     # http://primes.utm.edu/curios/page.php/100109100129100151.html
20 |     #number_range = xrange(100109100129100153, 100109100129101238, 2)
21 |     number_range = xrange(100109100129101237, 100109100129201238, 2)
22 | 
23 |     # new expensive near-primes
24 |     # [(95362951, (100109100129100369, 7.254560947418213))
25 |     # (171656941, (100109100129101027, 13.052711009979248))
26 |     # (121344023, (100109100129101291, 8.994053840637207)
27 |     # note these two lines of timings look really wrong, they're about 4sec
28 |     # each really
29 |     # [(265687139, (100109100129102047, 19.642582178115845)), (219609683, (100109100129102277, 16.178056001663208)), (121344023, (100109100129101291, 8.994053840637207))]
30 |     # [(316096873, (100109100129126653, 23.480671882629395)), (313994287, (100109100129111617, 23.262380123138428)), (307151363, (100109100129140177, 22.80288815498352))]
31 |     # primes
32 |     # 100109100129162907
33 |     # 100109100129162947
34 | 
35 |     highest_factors = {}
36 |     for possible_prime in number_range:
37 |         t2 = time.time()
38 |         is_prime, factor = check_prime(possible_prime)
39 |         if is_prime:
40 |             primes.append(possible_prime)
41 |             print "GOT NEW PRIME", possible_prime
42 |         else:
43 |             highest_factors[factor] = (possible_prime, time.time() - t2)
44 |             hf = highest_factors.items()
45 |             hf.sort(reverse=True)
46 |             print hf[:3]
47 |     print "Took:", time.time() - t1
48 |     print len(primes), primes[:10], primes[-10:]
49 | 


--------------------------------------------------------------------------------
/09_multiprocessing/prime_validation/primes_pool_per_number1.py:
--------------------------------------------------------------------------------
 1 | """Check primality by splitting the list of factors"""
 2 | import math
 3 | import timeit
 4 | from multiprocessing import Pool
 5 | import create_range
 6 | 
 7 | 
 8 | def check_prime_in_range(n_from_i_to_i):
 9 |     (n, (from_i, to_i)) = n_from_i_to_i
10 |     if n % 2 == 0:
11 |         return False
12 |     assert from_i % 2 != 0
13 |     for i in range(from_i, int(to_i), 2):
14 |         if n % i == 0:
15 |             return False
16 |     return True
17 | 
18 | 
19 | def check_prime(n, pool, nbr_processes):
20 |     from_i = 3
21 |     to_i = int(math.sqrt(n)) + 1
22 |     ranges_to_check = create_range.create(from_i, to_i, nbr_processes)
23 |     ranges_to_check = list(zip(len(ranges_to_check) * [n], ranges_to_check))
24 |     assert len(ranges_to_check) == nbr_processes
25 |     results = pool.map(check_prime_in_range, ranges_to_check)
26 |     if False in results:
27 |         return False
28 |     return True
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     NBR_PROCESSES = 4
33 |     pool = Pool(processes=NBR_PROCESSES)
34 |     #import pdb; pdb.set_trace()
35 |     print("Testing with {} processes".format(NBR_PROCESSES))
36 |     for label, nbr in [("trivial non-prime", 112272535095295),
37 |                        ("expensive non-prime18_1", 100109100129100369),
38 |                        ("expensive non-prime18_2", 100109100129101027),
39 |                        #("prime", 112272535095293)]:  # 15
40 |                        #("prime17",  10000000002065383)]
41 |                        ("prime18_1", 100109100129100151),
42 |                        ("prime18_2", 100109100129162907)]:
43 |                        #("prime23", 22360679774997896964091)]:
44 |         time_costs = timeit.repeat(stmt="check_prime({}, pool, {})".format(nbr, NBR_PROCESSES), repeat=20, number=1,
45 |                                    setup="from __main__ import pool, check_prime")
46 |         print("{:19} ({}) {: 3.6f}s".format(label, nbr, min(time_costs)))
47 | 


--------------------------------------------------------------------------------
/09_multiprocessing/prime_validation/primes_pool_per_number2.py:
--------------------------------------------------------------------------------
 1 | """Check primality by splitting the list of factors with early prime check"""
 2 | import math
 3 | import timeit
 4 | from multiprocessing import Pool
 5 | import create_range
 6 | 
 7 | 
 8 | def check_prime_in_range(n_from_i_to_i):
 9 |     (n, (from_i, to_i)) = n_from_i_to_i
10 |     if n % 2 == 0:
11 |         return False
12 |     assert from_i % 2 != 0
13 |     for i in range(from_i, int(to_i), 2):
14 |         if n % i == 0:
15 |             return False
16 |     return True
17 | 
18 | 
19 | def check_prime(n, pool, nbr_processes):
20 |     # cheaply check high probability set of possible factors
21 |     from_i = 3
22 |     to_i = 21
23 |     if not check_prime_in_range((n, (from_i, to_i))):
24 |         return False
25 | 
26 |     from_i = to_i
27 |     to_i = int(math.sqrt(n)) + 1
28 |     ranges_to_check = create_range.create(from_i, to_i, nbr_processes)
29 |     ranges_to_check = list(zip(len(ranges_to_check) * [n], ranges_to_check))
30 |     assert len(ranges_to_check) == nbr_processes
31 |     results = pool.map(check_prime_in_range, ranges_to_check)
32 |     if False in results:
33 |         return False
34 |     return True
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     NBR_PROCESSES = 4
39 |     pool = Pool(processes=NBR_PROCESSES)
40 |     print("Testing with {} processes".format(NBR_PROCESSES))
41 |     for label, nbr in [("trivial non-prime", 112272535095295),
42 |                        ("expensive non-prime18_1", 100109100129100369),
43 |                        ("expensive non-prime18_2", 100109100129101027),
44 |                        #("prime", 112272535095293)]:  # 15
45 |                        #("prime17",  10000000002065383)]
46 |                        ("prime18_1", 100109100129100151),
47 |                        ("prime18_2", 100109100129162907)]:
48 |                        #("prime23", 22360679774997896964091)]:
49 | 
50 |         time_costs = timeit.repeat(stmt="check_prime({}, pool, {})".format(nbr, NBR_PROCESSES), repeat=20, number=1,
51 |                                    setup="from __main__ import pool, check_prime")
52 |         print("{:24} ({}) {: 3.6f}s".format(label, nbr, min(time_costs)))
53 | 


--------------------------------------------------------------------------------
/10_clusters/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/10_clusters/.notempty


--------------------------------------------------------------------------------
/10_clusters/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7
2 | 
3 | WORKDIR /usr/src/app
4 | COPY requirements.txt ./
5 | RUN pip install --no-cache-dir -r requirements.txt
6 | 
7 | COPY . .
8 | CMD python ./diffusion_numpy_memory2.py
9 | 


--------------------------------------------------------------------------------
/10_clusters/docker/Makefile:
--------------------------------------------------------------------------------
1 | build:
2 | 	docker build -t ${USER}/diffusion2d:numpy-memory2 -t micha/diffusion2d:latest .
3 | 
4 | run: build
5 | 	docker run ${USER}/diffusion2d:latest
6 | 


--------------------------------------------------------------------------------
/10_clusters/docker/diffusion_numpy_memory2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | import timeit
 5 | 
 6 | from numpy import add, copyto, multiply, zeros
 7 | 
 8 | try:
 9 |     profile
10 | except NameError:
11 |     profile = lambda x: x
12 | 
13 | grid_shape = (256, 256)
14 | 
15 | 
16 | def roll_add(rollee, shift, axis, out):
17 |     if shift == 1 and axis == 0:
18 |         out[1:, :] += rollee[:-1, :]
19 |         out[0, :] += rollee[-1, :]
20 |     elif shift == -1 and axis == 0:
21 |         out[:-1, :] += rollee[1:, :]
22 |         out[-1, :] += rollee[0, :]
23 |     elif shift == 1 and axis == 1:
24 |         out[:, 1:] += rollee[:, :-1]
25 |         out[:, 0] += rollee[:, -1]
26 |     elif shift == -1 and axis == 1:
27 |         out[:, :-1] += rollee[:, 1:]
28 |         out[:, -1] += rollee[:, 0]
29 | 
30 | 
31 | def laplacian(grid, out):
32 |     copyto(out, grid)
33 |     multiply(out, -4.0, out)
34 |     roll_add(grid, +1, 0, out)
35 |     roll_add(grid, -1, 0, out)
36 |     roll_add(grid, +1, 1, out)
37 |     roll_add(grid, -1, 1, out)
38 | 
39 | 
40 | @profile
41 | def evolve(grid, dt, out, D=1):
42 |     laplacian(grid, out)
43 |     multiply(out, D * dt, out)
44 |     add(out, grid, out)
45 | 
46 | 
47 | def run_experiment(num_iterations):
48 |     scratch = zeros(grid_shape)
49 |     grid = zeros(grid_shape)
50 | 
51 |     block_low = int(grid_shape[0] * 0.4)
52 |     block_high = int(grid_shape[0] * 0.5)
53 |     grid[block_low:block_high, block_low:block_high] = 0.005
54 | 
55 |     start = time.time()
56 |     for i in range(num_iterations):
57 |         evolve(grid, 0.1, scratch)
58 |         grid, scratch = scratch, grid
59 |     return time.time() - start
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     n_runs = 100
64 |     runtime = timeit.timeit(f"run_experiment({n_runs})", number=25, globals=globals())
65 |     print(f"Runtime for {n_runs} with grid {grid_shape}: {runtime:0.4f}s")
66 | 


--------------------------------------------------------------------------------
/10_clusters/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.18.0
2 | 


--------------------------------------------------------------------------------
/10_clusters/ipython_parallel/pi_ipython_cluster.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import ipyparallel as ipp
 3 | from ipyparallel import require
 4 | 
 5 | 
 6 | @require('random')
 7 | def estimate_nbr_points_in_quarter_circle(nbr_estimates):
 8 |     """Monte carlo estimate of the number of points in a      
 9 |        quarter circle using pure Python"""
10 |     print(f"Executing estimate_nbr_points_in_quarter_circlewith {nbr_estimates:,} on pid {os.getpid()}")   
11 |     nbr_trials_in_quarter_unit_circle = 0
12 |     for step in range(int(nbr_estimates)):
13 |         x = random.uniform(0, 1)
14 |         y = random.uniform(0, 1)
15 |         is_in_unit_circle = x * x + y * y <= 1.0
16 |         nbr_trials_in_quarter_unit_circle += is_in_unit_circle
17 |     return nbr_trials_in_quarter_unit_circle
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     c = ipp.Client()
22 |     nbr_engines = len(c.ids)
23 |     print("We're using {} engines".format(nbr_engines))
24 |     nbr_samples_in_total = 1e8
25 |     nbr_parallel_blocks = 4
26 | 
27 |     dview = c[:]
28 | 
29 |     nbr_samples_per_worker = nbr_samples_in_total / nbr_parallel_blocks
30 |     t1 = time.time()
31 |     nbr_in_quarter_unit_circles = dview.apply_sync(estimate_nbr_points_in_quarter_circle, \
32 |                                                    nbr_samples_per_worker)
33 |     print("Estimates made:", nbr_in_quarter_unit_circles)
34 | 
35 |     nbr_jobs = len(nbr_in_quarter_unit_circles)
36 |     pi_estimate = sum(nbr_in_quarter_unit_circles) * 4 / nbr_samples_in_total
37 |     print("Estimated pi", pi_estimate)
38 |     print("Delta:", time.time() - t1)
39 | 


--------------------------------------------------------------------------------
/10_clusters/nsq/nsq_worker.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from functools import partial
 3 | from math import sqrt
 4 | 
 5 | import nsq
 6 | 
 7 | 
 8 | def is_prime(number):
 9 |     if number % 2 == 0:
10 |         return False
11 |     for i in range(3, int(sqrt(number)) + 1, 2):
12 |         if number % i == 0:
13 |             return False
14 |     return True
15 | 
16 | 
17 | def write_message(topic, data, writer):
18 |     response = writer.pub(topic, data)
19 |     if isinstance(response, nsq.Error):
20 |         print("Error with Message: {}: {}".format(data, response))
21 |         return write_message(data, writer)
22 |     else:
23 |         print("Published Message: ", data)
24 | 
25 | 
26 | def calculate_prime(message, writer):
27 |     data = json.loads(message.body)
28 | 
29 |     prime = is_prime(data["number"])
30 |     data["prime"] = prime
31 |     if prime:
32 |         topic = "prime"
33 |     else:
34 |         topic = "non_prime"
35 | 
36 |     output_message = json.dumps(data).encode("utf8")
37 |     write_message(topic, output_message, writer)
38 |     message.finish()  # <1>
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     writer = nsq.Writer(["127.0.0.1:4150"])
43 |     handler = partial(calculate_prime, writer=writer)
44 |     reader = nsq.Reader(
45 |         message_handler=handler,
46 |         nsqd_tcp_addresses=["127.0.0.1:4150"],
47 |         topic="numbers",
48 |         channel="worker_group_a",
49 |     )
50 |     nsq.run()
51 | 


--------------------------------------------------------------------------------
/11_less_ram/.notempty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/.notempty


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/plot_example.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | 
 4 | labels = ['list_bisect', 'set', 'Marisa Trie', 'DAWG']
 5 | ram_used = [835, 1260, 37, 73]
 6 | build_time = [20, 24, 35, 31]
 7 | lookup_time = [0.01, 0.002, 0.01, 0.005]
 8 | 
 9 | #labels = ['list_bisect', 'set', 'Marisa Trie', 'DAWG', 'HAT Trie']
10 | #ram_used = [920, 1112, 293, 958, 244]
11 | #build_time = [47, 31, 55, 63, 44]
12 | #lookup_time = [0.02, 0.003, 0.01, 0.004, 0.005]
13 | 
14 | # make the build-time circles much larger
15 | build_time = [bt * 5 for bt in build_time]
16 | 
17 | plt.figure(1)
18 | plt.clf()
19 | plt.scatter(ram_used, lookup_time, s=build_time)
20 | 
21 | for ram, lookup, label in zip(ram_used, lookup_time, labels):
22 |     plt.annotate(label, (ram+15, lookup+0.0005))
23 | 
24 | plt.xlabel('RAM used (MB - lower is better)')
25 | plt.ylabel("Look-up time (seconds - lower is better)")
26 | plt.title("Container behavior for 11 million tokens\nsize represents build time (smaller is better)")
27 | plt.xlim(xmin=0)
28 | plt.ylim((-0.0005, 0.012))
29 | plt.tight_layout()
30 | plt.savefig("less_ram_tries_dawg_text_11m_tokens.png")
31 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example.py:
--------------------------------------------------------------------------------
 1 | import codecs
 2 | 
 3 | # "Moby Words lists by Grady Ward"
 4 | # http://www.gutenberg.org/ebooks/3201
 5 | #SUMMARISED_FILE = "all_unique_words.txt"  # 500k approx
 6 | #CODEC = 'Windows-1252'
 7 | 
 8 | CODEC = 'utf-8'
 9 | SUMMARISED_FILE = "all_unique_words_wikipedia_via_gensim.txt"
10 | 
11 | 
12 | def read_words(filename):
13 |     # return words from filename using a generator
14 |     try:
15 |         with codecs.open(filename, 'r', CODEC) as f:
16 |             for line_nbr, line in enumerate(f):
17 |                 items = line.strip().split()
18 |                 for item in items:
19 |                     yield item
20 |     except UnicodeDecodeError:
21 |         print("UnicodeDecodeError for {} near line {} and word {}".format(filename, line_nbr, line))
22 | 
23 | readers = read_words(SUMMARISED_FILE)
24 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_clean_list.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import codecs
 3 | import glob
 4 | 
 5 | # Clean the raw wordlists into a unique 500,000(ish) word single file
 6 | 
 7 | # "Moby Words lists by Grady Ward"
 8 | # http://www.gutenberg.org/ebooks/3201
 9 | LONG_FILES = glob.glob('mword10/*')
10 | SUMMARISED_FILE = "all_unique_words.txt"
11 | 
12 | 
13 | def read_words(filename):
14 |     # return words from filename using a generator
15 |     try:
16 |         with codecs.open(filename, 'r', 'Windows-1252') as f:
17 |             for line_nbr, line in enumerate(f):
18 |                 items = line.strip().split()
19 |                 for item in items:
20 |                     yield item
21 |     except UnicodeDecodeError:
22 |         print("UnicodeDecodeError for {} near line {} and word {}".format(filename, line_nbr, line))
23 | 
24 | readers = itertools.chain(*(read_words(lf) for lf in LONG_FILES))
25 | 
26 | if __name__ == "__main__":
27 |     words_set = set(readers)
28 |     print("Summarising input files into one output set of {} words".format(len(words_set)))
29 |     with codecs.open(SUMMARISED_FILE, 'w', 'Windows-1252') as f:
30 |         for word in words_set:
31 |             f.write(word + "\n")
32 | 
33 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_clean_list_wikipedia_gensim.py:
--------------------------------------------------------------------------------
 1 | import gensim
 2 | FILENAME = "/home/ian/workspace/personal_projects/high_performance_python_book_2e/high-performance-python-2e/examples_ian/ian/12_lessram/compressing_text/wikipedia_dump/enwiki_11M/_wordids.txt.bz2"
 3 | 
 4 | id2word = gensim.corpora.Dictionary.load_from_text(FILENAME) 
 5 | #print(len([w for w in iter(id2word.values())]))
 6 | print(len(id2word))
 7 | 
 8 | SUMMARISED_FILE = "all_unique_words_wikipedia_via_gensim.txt"
 9 | 
10 | print("Summarising input files into one output set of {} words".format(len(id2word)))
11 | with open(SUMMARISED_FILE, 'w') as f:
12 |     for word in id2word.values():
13 |         f.write(word + "\n")
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_dawg.py:
--------------------------------------------------------------------------------
 1 | #pip instal DAWG failed
 2 | #https://github.com/pytries/DAWG/issues/31
 3 | #$ python text_example_dawg.py 
 4 | #pip install DAWG-Python
 5 | #installs ok, but it is a read-only version of a wrapper to DAWG
 6 | #https://pypi.org/project/DAWG-Python/
 7 | 
 8 | import time
 9 | import timeit
10 | import text_example
11 | import memory_profiler
12 | import dawg # 
13 | 
14 | if __name__ == "__main__":
15 |     print(("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0])))
16 |     # avoid building a temporary list of words in Python, store directly in the
17 |     # DAWG
18 |     t1 = time.time()
19 |     words_dawg = dawg.DAWG(text_example.readers)
20 |     t2 = time.time()
21 |     print(("RAM after creating dawg {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1)))
22 | 
23 |     assert 'Zwiebel' in words_dawg
24 |     time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_dawg",
25 |                                   setup="from __main__ import words_dawg",
26 |                                   number=1,
27 |                                   repeat=10000))
28 |     print(("Summed time to lookup word {:0.4f}s".format(time_cost)))
29 | 
30 |     t1 = time.time()
31 |     words_dawg.save('words_dawg.saved')
32 |     t2 = time.time()
33 |     d = dawg.DAWG()
34 |     with open('words_dawg.saved', 'rb') as f:
35 |         words_dawg2 = d.read(f)
36 |     t3 = time.time()
37 |     print(t2 - t1, t3-t2)
38 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_dawg_load_only.py:
--------------------------------------------------------------------------------
 1 | #pip instal DAWG failed
 2 | #https://github.com/pytries/DAWG/issues/31
 3 | #$ python text_example_dawg.py 
 4 | #pip install DAWG-Python
 5 | #installs ok, but it is a read-only version of a wrapper to DAWG
 6 | #https://pypi.org/project/DAWG-Python/
 7 | 
 8 | import time
 9 | import timeit
10 | import text_example
11 | import memory_profiler
12 | import dawg # 
13 | 
14 | if __name__ == "__main__":
15 |     print(("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0])))
16 |     t2 = time.time()
17 |     words_dawg = dawg.DAWG()
18 |     with open('words_dawg.saved', 'rb') as f:
19 |         words_dawg.read(f)
20 |     t3 = time.time()
21 |     print(t3-t2)
22 |     print(("RAM after load {:0.1f}MiB".format(memory_profiler.memory_usage()[0])))
23 | 
24 |     assert 'Zwiebel' in words_dawg
25 |     time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_dawg",
26 |                                   setup="from __main__ import words_dawg",
27 |                                   number=1,
28 |                                   repeat=10000))
29 |     print(("Summed time to lookup word {:0.4f}s".format(time_cost)))
30 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_list.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import timeit
 3 | import text_example
 4 | import memory_profiler
 5 | 
 6 | if __name__ == "__main__":
 7 |     print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))
 8 |     t1 = time.time()
 9 |     words = [w for w in text_example.readers]
10 |     print("Loading {} words".format(len(words)))
11 |     t2 = time.time()
12 |     print("RAM after creating list {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1))
13 | 
14 |     assert 'Zwiebel' in words
15 |     time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words",
16 |                                   setup="from __main__ import words",
17 |                                   number=1,
18 |                                   repeat=100))
19 |     time_cost *= 100 # to make it equivalent to 10_000 calls
20 |     print("Summed time to lookup word {:0.4f}s".format(time_cost))
21 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_list_bisect.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import timeit
 3 | import text_example
 4 | import memory_profiler
 5 | import bisect
 6 | 
 7 | 
 8 | def index(a, x):
 9 |     'Locate the leftmost value exactly equal to x'
10 |     i = bisect.bisect_left(a, x)
11 |     if i != len(a) and a[i] == x:
12 |         return i
13 |     raise ValueError
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))
18 |     t1 = time.time()
19 |     words = [w for w in text_example.readers]
20 |     print("Loading {} words".format(len(words)))
21 |     t2 = time.time()
22 |     print("RAM after creating list {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1))
23 |     print("The list contains {} words".format(len(words)))
24 |     words.sort()
25 |     t3 = time.time()
26 |     print("Sorting list took {:0.1f}s".format(t3 - t2))
27 | 
28 |     assert 'Zwiebel' in words
29 |     time_cost = sum(timeit.repeat(stmt="index(words, u'Zwiebel')",
30 |                                   setup="from __main__ import words, index",
31 |                                   number=1,
32 |                                   repeat=10000))
33 |     print("Summed time to lookup word {:0.4f}s".format(time_cost))
34 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_set.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import timeit
 3 | import text_example
 4 | import memory_profiler
 5 | 
 6 | if __name__ == "__main__":
 7 |     print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))
 8 |     # load the words directly into the set
 9 |     t1 = time.time()
10 |     words_set = set(text_example.readers)
11 |     t2 = time.time()
12 |     print("RAM after creating set {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1))
13 |     print("The set contains {} words".format(len(words_set)))
14 | 
15 |     assert 'Zwiebel' in words_set
16 |     time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_set",
17 |                                   setup="from __main__ import words_set",
18 |                                   number=1,
19 |                                   repeat=10000))
20 |     print("Summed time to lookup word {:0.4f}s".format(time_cost))
21 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_trie.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import timeit
 3 | import text_example
 4 | import memory_profiler
 5 | import marisa_trie
 6 | 
 7 | if __name__ == "__main__":
 8 |     print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))
 9 |     # avoid building a temporary list of words in Python, store directly in the
10 |     # Trie
11 |     t1 = time.time()
12 |     words_trie = marisa_trie.Trie(text_example.readers)
13 |     t2 = time.time()
14 |     print("RAM after creating trie {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1))
15 |     print("The trie contains {} words".format(len(words_trie)))
16 | 
17 |     assert 'Zwiebel' in words_trie
18 |     time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_trie",
19 |                                   setup="from __main__ import words_trie",
20 |                                   number=1,
21 |                                   repeat=10000))
22 |     print("Summed time to lookup word {:0.4f}s".format(time_cost))
23 | 
24 |     t1 = time.time()
25 |     words_trie.save('words_trie.saved')
26 |     del words_trie
27 |     print("RAM before loading from disk {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))
28 |     t2 = time.time()
29 |     d = marisa_trie.Trie()
30 |     with open('words_trie.saved', 'rb') as f:
31 |         words_trie2 = d.read(f)
32 |     t3 = time.time()
33 |     print("RAM after loading trie from disk {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t2 - t1))
34 |     print("The trie contains {} words".format(len(words_trie2)))
35 |     print(f"time to save {t2 - t1:f}s, time to load {t3-t2:f}s")
36 |     assert 'Zwiebel' in words_trie2
37 |     time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_trie2",
38 |                                   setup="from __main__ import words_trie2",
39 |                                   number=1,
40 |                                   repeat=10000))
41 |     print("Summed time to lookup word {:0.4f}s".format(time_cost))
42 |     
43 | 


--------------------------------------------------------------------------------
/11_less_ram/compressing_text/text_example_trie_load_only.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import timeit
 3 | import text_example
 4 | import memory_profiler
 5 | import marisa_trie
 6 | 
 7 | if __name__ == "__main__":
 8 | 
 9 |     print("RAM at start {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))
10 |     print("RAM before loading from disk {:0.1f}MiB".format(memory_profiler.memory_usage()[0]))
11 |     t2 = time.time()
12 |     d = marisa_trie.Trie()
13 |     with open('words_trie.saved', 'rb') as f:
14 |         words_trie = d.read(f)
15 |     t3 = time.time()
16 |     print("RAM after loading trie from disk {:0.1f}MiB, took {:0.1f}s".format(memory_profiler.memory_usage()[0], t3 - t2))
17 |     print("The trie contains {} words".format(len(words_trie)))
18 |     print(f"time to load {t3-t2:f}s")
19 |     assert 'Zwiebel' in words_trie
20 |     time_cost = sum(timeit.repeat(stmt="u'Zwiebel' in words_trie",
21 |                                   setup="from __main__ import words_trie",
22 |                                   number=1,
23 |                                   repeat=10000))
24 |     print("Summed time to lookup word {:0.4f}s".format(time_cost))
25 |     
26 | 


--------------------------------------------------------------------------------
/11_less_ram/morris_counter_example/morris_counter.py:
--------------------------------------------------------------------------------
 1 | """Approximate Morris Counter supporting many counters"""
 2 | import math
 3 | import random
 4 | import array
 5 | 
 6 | SMALLEST_UNSIGNED_INTEGER = 'B' # unsighed char, typically 1 byte
 7 | 
 8 | 
 9 | class MorrisCounter(object):
10 |     """Approximate counter, stores exponent and counts approximately 2^exponent
11 | 
12 |     https://en.wikipedia.org/wiki/Approximate_counting_algorithm"""
13 |     def __init__(self, type_code=SMALLEST_UNSIGNED_INTEGER, nbr_counters=1):
14 |         self.exponents = array.array(type_code, [0] * nbr_counters)
15 | 
16 |     def __len__(self):
17 |         return len(self.exponents)
18 | 
19 |     def add_counter(self):
20 |         """Add a new zeroed counter"""
21 |         self.exponents.append(0)
22 | 
23 |     def get(self, counter=0):
24 |         """Calculate approximate value represented by counter"""
25 |         return math.pow(2, self.exponents[counter])
26 | 
27 |     def add(self, counter=0):
28 |         """Probabilistically add 1 to counter"""
29 |         value = self.get(counter)
30 |         probability = 1.0 / value
31 |         if random.uniform(0, 1) < probability:
32 |             self.exponents[counter] += 1
33 | 
34 | if __name__ == "__main__":
35 |     mc = MorrisCounter()
36 |     print("MorrisCounter has {} counters".format(len(mc)))
37 |     for n in range(10):
38 |         print("Iteration %d, MorrisCounter has: %d" % (n, mc.get()))
39 |         mc.add()
40 | 
41 |     for n in range(990):
42 |         mc.add()
43 |     print("Iteration 1000, MorrisCounter has: %d" % (mc.get()))
44 | 


--------------------------------------------------------------------------------
/11_less_ram/numexpr_pandas/make_cross_entropy_picture.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | #nbr_items = 200_000_000
 6 | 
 7 | nbr_items = 99
 8 | yp = np.linspace(0.01, 0.99, nbr_items)
 9 | yt = np.ones(nbr_items)
10 | answer = -(yt * np.log(yp) + ((1-yt) * (np.log(1-yp))))
11 | 
12 | yt0 = np.zeros(nbr_items)
13 | answer0 = -(yt0 * np.log(yp) + ((1-yt0) * (np.log(1-yp))))
14 | 
15 | df = pd.DataFrame({'yp': yp, 'yt': yt, 'cross_entropy': answer, 'cross_entropy0': answer0})
16 | 
17 | fig, axs = plt.subplots(ncols=2)
18 | ax = axs[0]
19 | df.plot(x='yp', y='cross_entropy', ax=ax, label='Error for yt==1')
20 | df.plot(x='yp', y='cross_entropy0', ax=ax, label='Error for yt==0', linestyle='--')
21 | ax.set_ylabel('Cross Entropy or Error (smaller is better)')
22 | ax.set_xlabel('Predicted Probability (yp)')
23 | ax.set_title('Cross Entropy error for targets yt 0 and 1')
24 | 
25 | ax = axs[1]
26 | logs = np.log(yp)
27 | df_log = pd.DataFrame({'yp': yp, 'log_yp': logs})
28 | df_log.plot(x='yp', y='log_yp', ax=ax)
29 | ax.set_title('Natural Log for x==[0, 1]')
30 | ax.set_xlabel('Predicted Probability (yp)')
31 | 
32 | plt.tight_layout()
33 | plt.savefig('cross_entropy.png')
34 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/probabilistic_datastructures/__init__.py


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/_benchmark.clean.pkl:
--------------------------------------------------------------------------------
 1 | (lp1
 2 | (dp2
 3 | S'estimate'
 4 | p3
 5 | I1073741824
 6 | sS'size'
 7 | p4
 8 | I416
 9 | sS'name'
10 | p5
11 | S'Morris Counter'
12 | p6
13 | sS'time'
14 | p7
15 | F750.91865468025208
16 | sa(dp8
17 | g3
18 | I1048576
19 | sg4
20 | I416
21 | sg5
22 | S'Log Log Register'
23 | p9
24 | sg7
25 | F1690.4389050006866
26 | sa(dp10
27 | g3
28 | I4522232
29 | sg4
30 | I23121520
31 | sg5
32 | S'LogLog'
33 | p11
34 | sg7
35 | F2112.2002909183502
36 | sa(dp12
37 | g3
38 | I1628946
39 | sg4
40 | I23121520
41 | sg5
42 | S'SuperLogLog'
43 | p13
44 | sg7
45 | F2416.4608347415924
46 | sa(dp14
47 | g3
48 | I4983171
49 | sg4
50 | I577880
51 | sg5
52 | S'HyperLogLog'
53 | p15
54 | sg7
55 | F2906.5975527763367
56 | sa(dp16
57 | g3
58 | I4912818
59 | sg4
60 | L18704L
61 | sg5
62 | S'KMinValues'
63 | p17
64 | sg7
65 | F3502.6565506458282
66 | sa(dp18
67 | g3
68 | I4949358
69 | sg4
70 | I6936
71 | sg5
72 | S'ScalingBloom'
73 | p19
74 | sg7
75 | F10392.013652801514
76 | sa(dp20
77 | S'estimate'
78 | p21
79 | I4956262
80 | sS'size'
81 | p22
82 | I1148708949
83 | sS'name'
84 | p23
85 | S'Baseline'
86 | p24
87 | sS'time'
88 | p25
89 | F4036.6814231899998
90 | sa.


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/_benchmark.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/probabilistic_datastructures/_benchmark.pkl


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/_benchmark.py:
--------------------------------------------------------------------------------
 1 | import ctypes
 2 | import pickle
 3 | import time
 4 | from contextlib import contextmanager
 5 | from pprint import pprint
 6 | 
 7 | from tqdm import tqdm
 8 | 
 9 | from countmemaybe import HyperLogLog, KMinValues
10 | from ll import LL
11 | from llregister import LLRegister
12 | from morriscounter import MorrisCounter
13 | from scalingbloomfilter import ScalingBloomFilter
14 | from superll import SuperLL
15 | 
16 | methods = [
17 |     {"name": "LogLog", "obj": LL(16)},
18 |     {"name": "SuperLogLog", "obj": SuperLL(16)},
19 |     {"name": "Morris Counter", "obj": MorrisCounter()},
20 |     {"name": "Log Log Register", "obj": LLRegister()},
21 |     {"name": "HyperLogLog", "obj": HyperLogLog(b=16)},
22 |     {"name": "KMinValues", "obj": KMinValues(k=1 << 16)},
23 |     {"name": "ScalingBloom", "obj": ScalingBloomFilter(1048576)},
24 | ]
25 | 
26 | 
27 | @contextmanager
28 | def TimerBlock(name):
29 |     start = time.time()
30 |     t = ctypes.c_double()
31 |     try:
32 |         yield t
33 |     finally:
34 |         t.value = time.time() - start
35 |         print(f"[{name}] took {t.value} seconds")
36 | 
37 | 
38 | def wikireader(filename, buffering=1 << 10):
39 |     total = 1148708949
40 |     with open(filename, "r", buffering=buffering) as fd:
41 |         for line in tqdm(fd, desc="Reading Wiki Data", total=total):
42 |             yield line.strip()
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     filename = "/data/datasets/internet/wikipedia/enwiki-20140404-pages-articles.tokens"
47 |     print("baseline reading measurement")
48 |     with TimerBlock("Iterate File") as baseline:
49 |         tmp = 0
50 |         for line in wikireader(filename):
51 |             tmp += len(line)
52 | 
53 |     for method in methods:
54 |         print((method["name"]))
55 |         obj = method["obj"]
56 |         with TimerBlock("Iterate File") as bench:
57 |             for line in wikireader(filename):
58 |                 obj.add(line)
59 |         method["time"] = bench.value - baseline.value
60 |         method["estimate"] = obj.__len__()
61 | 
62 |     pprint(methods)
63 |     pickle.dump(methods, open("_benchmark.pkl", "wb+"))
64 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/bloomfilter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import bitarray
 4 | import mmh3
 5 | 
 6 | 
 7 | class BloomFilter:
 8 |     def __init__(self, capacity, error=0.005):
 9 |         """
10 |         Initialize a Bloom filter with given capacity and false positive rate
11 |         """
12 |         self.capacity = capacity
13 |         self.error = error
14 |         self.num_bits = int((-capacity * math.log(error)) // math.log(2) ** 2 + 1)
15 |         self.num_hashes = int((self.num_bits * math.log(2)) // capacity + 1)
16 |         self.data = bitarray.bitarray(self.num_bits)
17 | 
18 |     def _indexes(self, key):
19 |         h1, h2 = mmh3.hash64(key)
20 |         for i in range(self.num_hashes):
21 |             yield (h1 + i * h2) % self.num_bits
22 | 
23 |     def add(self, key):
24 |         for index in self._indexes(key):
25 |             self.data[index] = True
26 | 
27 |     def __contains__(self, key):
28 |         return all(self.data[index] for index in self._indexes(key))
29 | 
30 |     def __len__(self):
31 |         bit_off_num = self.data.count(True)
32 |         bit_off_percent = 1.0 - bit_off_num / self.num_bits
33 |         length = -1.0 * self.num_bits * math.log(bit_off_percent) / self.num_hashes
34 |         return int(length)
35 | 
36 |     @staticmethod
37 |     def union(bloom_a, bloom_b):
38 |         assert bloom_a.capacity == bloom_b.capacity, "Capacities must be equal"
39 |         assert bloom_a.error == bloom_b.error, "Error rates must be equal"
40 | 
41 |         bloom_union = BloomFilter(bloom_a.capacity, bloom_a.error)
42 |         bloom_union.data = bloom_a.data | bloom_b.data
43 |         return bloom_union
44 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/hyperloglog.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from ll import LL
 4 | 
 5 | 
 6 | class HyperLogLog(LL):
 7 |     def __len__(self):
 8 |         indicator = sum(2 ** -m.counter for m in self.registers)
 9 |         E = self.alpha * (self.num_registers ** 2) / indicator
10 | 
11 |         if E <= 5.0 / 2.0 * self.num_registers:
12 |             V = sum(1 for m in self.registers if m.counter == 0)
13 |             if V != 0:
14 |                 Estar = self.num_registers * math.log(self.num_registers / (1.0 * V), 2)
15 |             else:
16 |                 Estar = E
17 |         else:
18 |             if E <= 2 ** 32 / 30.0:
19 |                 Estar = E
20 |             else:
21 |                 Estar = -2 ** 32 * math.log(1 - E / 2 ** 32, 2)
22 |         return int(Estar)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     import mmh3
27 | 
28 |     hll = HyperLogLog(8)
29 |     for i in range(100000):
30 |         hll.add(mmh3.hash(str(i)))
31 |     print(len(hll))
32 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/kminvalues.py:
--------------------------------------------------------------------------------
 1 | import mmh3
 2 | from blist import sortedset
 3 | 
 4 | 
 5 | class KMinValues:
 6 |     def __init__(self, num_hashes):
 7 |         self.num_hashes = num_hashes
 8 |         self.data = sortedset()
 9 | 
10 |     def add(self, item):
11 |         item_hash = mmh3.hash(item)
12 |         self.data.add(item_hash)
13 |         if len(self.data) > self.num_hashes:
14 |             self.data.pop()
15 | 
16 |     def __len__(self):
17 |         if len(self.data) <= 2:
18 |             return 0
19 |         length = (self.num_hashes - 1) * (2 ** 32 - 1) / (self.data[-2] + 2 ** 31 - 1)
20 |         return int(length)
21 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/ll.py:
--------------------------------------------------------------------------------
 1 | import mmh3
 2 | from llregister import LLRegister
 3 | 
 4 | 
 5 | class LL:
 6 |     def __init__(self, p):
 7 |         self.p = p
 8 |         self.num_registers = 2 ** p
 9 |         self.registers = [LLRegister() for i in range(int(2 ** p))]
10 |         self.alpha = 0.7213 / (1.0 + 1.079 / self.num_registers)
11 | 
12 |     def add(self, item):
13 |         item_hash = mmh3.hash(str(item))
14 |         register_index = item_hash & (self.num_registers - 1)
15 |         register_hash = item_hash >> self.p
16 |         self.registers[register_index]._add(register_hash)
17 | 
18 |     def __len__(self):
19 |         register_sum = sum(h.counter for h in self.registers)
20 |         length = (
21 |             self.num_registers * self.alpha * 2 ** (register_sum / self.num_registers)
22 |         )
23 |         return int(length)
24 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/llregister.py:
--------------------------------------------------------------------------------
 1 | import mmh3
 2 | from utils import trailing_zeros
 3 | 
 4 | 
 5 | class LLRegister:
 6 |     counter = 0
 7 | 
 8 |     def add(self, item):
 9 |         item_hash = mmh3.hash(str(item))
10 |         return self._add(item_hash)
11 | 
12 |     def _add(self, item_hash):
13 |         bit_index = trailing_zeros(item_hash)
14 |         if bit_index > self.counter:
15 |             self.counter = bit_index
16 | 
17 |     def __len__(self):
18 |         return int(2 ** self.counter)
19 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/morriscounter.py:
--------------------------------------------------------------------------------
 1 | from random import random
 2 | 
 3 | 
 4 | class MorrisCounter:
 5 |     counter = 0
 6 | 
 7 |     def add(self, *args):
 8 |         if random() < 1.0 / (2 ** self.counter):
 9 |             self.counter += 1
10 | 
11 |     def __len__(self):
12 |         return int(2 ** self.counter)
13 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/requirements.txt:
--------------------------------------------------------------------------------
1 | bitarray
2 | mmh3
3 | blist
4 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/results/unique.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mynameisfiber/high_performance_python_2e/0dbb66c5bb0f1b2e9b65c469b5223403a4b1037c/11_less_ram/probabilistic_datastructures/results/unique.pkl


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/scalingbloomfilter.py:
--------------------------------------------------------------------------------
 1 | from bloomfilter import BloomFilter
 2 | 
 3 | 
 4 | class ScalingBloomFilter:
 5 |     def __init__(self, capacity, error=0.005, max_fill=0.8, error_tightening_ratio=0.5):
 6 |         self.capacity = capacity
 7 |         self.base_error = error
 8 |         self.max_fill = max_fill
 9 |         self.items_until_scale = int(capacity * max_fill)
10 |         self.error_tightening_ratio = error_tightening_ratio
11 |         self.bloom_filters = []
12 |         self.current_bloom = None
13 |         self._add_bloom()
14 | 
15 |     def _add_bloom(self):
16 |         new_error = self.base_error * self.error_tightening_ratio ** len(
17 |             self.bloom_filters
18 |         )
19 |         new_bloom = BloomFilter(self.capacity, new_error)
20 |         self.bloom_filters.append(new_bloom)
21 |         self.current_bloom = new_bloom
22 |         return new_bloom
23 | 
24 |     def add(self, key):
25 |         if key in self:
26 |             return True
27 |         self.current_bloom.add(key)
28 |         self.items_until_scale -= 1
29 |         if self.items_until_scale == 0:
30 |             bloom_size = len(self.current_bloom)
31 |             bloom_max_capacity = int(self.current_bloom.capacity * self.max_fill)
32 | 
33 |             # We may have been adding many duplicate values into the Bloom, so
34 |             # we need to check if we actually need to scale or if we still have
35 |             # space
36 |             if bloom_size >= bloom_max_capacity:
37 |                 self._add_bloom()
38 |                 self.items_until_scale = bloom_max_capacity
39 |             else:
40 |                 self.items_until_scale = int(bloom_max_capacity - bloom_size)
41 |         return False
42 | 
43 |     def __contains__(self, key):
44 |         return any(key in bloom for bloom in self.bloom_filters)
45 | 
46 |     def __len__(self):
47 |         return int(sum(len(bloom) for bloom in self.bloom_filters))
48 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/superll.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from ll import LL
 4 | 
 5 | 
 6 | class SuperLL(LL):
 7 |     NMAX = 1000000000
 8 | 
 9 |     def __len__(self):
10 |         # truncation
11 |         register_num1 = int(self.num_registers * 0.7)
12 |         registers_truncated1 = sorted(h.counter for h in self.registers)[:register_num1]
13 | 
14 |         # restriction
15 |         B = math.ceil(math.log(self.NMAX / self.num_registers) / math.log(2.0) + 3)
16 |         registers_truncated2 = [v for v in registers_truncated1 if v <= B]
17 |         register_num2 = len(registers_truncated2)
18 |         register_sum = sum(registers_truncated2)
19 | 
20 |         alpha = 0.7213 / (1.0 + 1.079 / register_num2)
21 |         length = 2 ** (register_sum / register_num2) * register_num2 * alpha
22 |         return int(length)
23 | 


--------------------------------------------------------------------------------
/11_less_ram/probabilistic_datastructures/utils.py:
--------------------------------------------------------------------------------
 1 | def trailing_zeros(number):
 2 |     """
 3 |     Returns the 1-based index of the first bit set to 1 from the right side of
 4 |     a 32bit integer
 5 |     >>> trailing_zeros(0)
 6 |     32
 7 |     >>> trailing_zeros(0b1000)
 8 |     4
 9 |     >>> trailing_zeros(0b10000000)
10 |     8
11 |     """
12 |     if not number:
13 |         return 32
14 |     index = 0
15 |     while (number >> index) & 1 == 0:
16 |         index += 1
17 |     return index + 1
18 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | "These code examples are excerpted from High Performance Python 2ed by Micha
 2 | Gorelick and Ian Ozsvald published by O’Reilly Media, Inc. (ISBN 1492055026)
 3 | which you can purchase here: http://shop.oreilly.com/product/0636920268505.do.
 4 | You may use these code examples in your programs and documentation. You do not
 5 | need to contact O'Reilly for permission unless you’re reproducing a significant
 6 | portion of the code. For example, writing a program that uses several chunks of
 7 | this example code does not require permission. Selling or distributing a set of
 8 | examples from O'Reilly books does require permission. Answering a question by
 9 | citing the book and quoting example code does not require permission.
10 | Incorporating a significant amount of this example code into your product’s
11 | documentation does require permission.  We appreciate, but do not require,
12 | attribution. An attribution usually includes the title, author, publisher, and
13 | ISBN. For example: “High Performance Python 2ed by Micha Gorelick and Ian Ozsvald
14 | (O’Reilly). Copyright 2014 Micha Gorelick and Ian Ozsvald. 978-1-4920-5502-0”
15 | 
16 | If you feel your use of these code examples falls outside fair use or the
17 | permission given here, feel free to contact O'Reilly at
18 | permissions@oreilly.com."
19 | 


--------------------------------------------------------------------------------
/figures/bandwidth.csv:
--------------------------------------------------------------------------------
 1 | Speed (Gbit/s),Name
 2 | 0.326,LTE
 3 | 1.3,Wireless 802.11ac
 4 | 5,USB 3.0
 5 | 7,Wireless 802.11ad
 6 | 10,10 Gigabit Ethernet
 7 | 10,USB 3.1
 8 | 16,SATAe
 9 | 40,Thunderbolt3
10 | 40,USB 4
11 | 100,100 Gigabit Ethernet
12 | 204.8,DDR4 SDRAM
13 | 256,PCIe 3.0
14 | 409.6,DDR5 SDRAM
15 | 


--------------------------------------------------------------------------------
/figures/bandwidth.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | from collections import defaultdict
 3 | 
 4 | import numpy as np
 5 | import pylab as py
 6 | 
 7 | 
 8 | def autolabel(rects):
 9 |     """Attach a text label above each bar in *rects*, displaying its height."""
10 |     for rect in rects:
11 |         height = rect.get_height()
12 |         ax.annotate(
13 |             "{}".format(height),
14 |             xy=(rect.get_x() + rect.get_width() / 2, height),
15 |             xytext=(0, 3),  # 3 points vertical offset
16 |             textcoords="offset points",
17 |             ha="center",
18 |             va="bottom",
19 |         )
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     data = list(csv.DictReader(open("bandwidth.csv")))
24 | 
25 |     N = len(data)
26 |     ind = np.arange(N)
27 |     width = 0.35
28 | 
29 |     ax = py.gca()
30 |     bar = ax.bar(ind, [float(d["Speed (Gbit/s)"]) for d in data], width, color="r")
31 |     autolabel(bar)
32 |     py.ylim(ymin=0)
33 |     ax.set_ylabel("Speed (Gbit/s)")
34 |     ax.set_xticks(ind + width)
35 |     ax.set_xticklabels(
36 |         [x["Name"].replace(" ", "\n") for x in data], rotation=45, ha="right"
37 |     )
38 | 
39 |     py.title("Bandwidth for Common Interfaces")
40 | 
41 |     py.savefig("../bandwidth.png")
42 | 


--------------------------------------------------------------------------------
/figures/diffusion_1d.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | 
 3 | import numpy as np
 4 | import pylab as py
 5 | 
 6 | grid_size = (512,)
 7 | 
 8 | 
 9 | def laplacian(grid):
10 |     return np.roll(grid, +1) + np.roll(grid, -1) - 2 * grid
11 | 
12 | 
13 | def evolve(grid, dt, D=1):
14 |     return grid + dt * D * laplacian(grid)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     grid = np.zeros(grid_size)
19 |     max_val = 1.0
20 | 
21 |     block_low = int(grid_size[0] * 0.4)
22 |     block_high = int(grid_size[0] * 0.6)
23 |     grid[block_low:block_high] = max_val
24 | 
25 |     t = 0
26 |     grids = [(t, grid.copy())]
27 |     for i in range(3):
28 |         for i in range(5000 * (4 ** i) + 1):
29 |             grid = evolve(grid, 0.1)
30 |         t += i * 0.1
31 |         grids.append((t, grid.copy()))
32 | 
33 |     py.figure()
34 |     for i, (t, grid) in enumerate(grids):
35 |         py.subplot(len(grids), 1, i + 1)
36 |         py.plot(grid)
37 |         py.ylabel("t = %0.0f" % t)
38 |         py.ylim(ymin=0, ymax=max_val * 1.1)
39 |         py.xlim(xmin=0, xmax=grid_size[0])
40 | 
41 |     py.xlabel("Position")
42 | 
43 |     py.subplot(len(grids), 1, 1)
44 |     py.title("1D Diffusion of a square function")
45 | 
46 |     py.tight_layout()
47 |     py.savefig("../diffusion_1d.png")
48 | 


--------------------------------------------------------------------------------
/figures/hll_single_reg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import countmemaybe
 4 | import numpy as np
 5 | import pylab as py
 6 | 
 7 | 
 8 | def leading_set_bit(number):
 9 |     number_binary = bin(number)
10 |     return len(number_binary) - number_binary.rfind("1")
11 | 
12 | 
13 | class HLL(object):
14 |     max_index = 0
15 | 
16 |     def add(self, number):
17 |         index = leading_set_bit(number)
18 |         self.max_index = max(self.max_index, index)
19 | 
20 |     def __len__(self):
21 |         return 2 ** self.max_index
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     data_list = []
26 |     h1 = HLL()
27 |     h = countmemaybe.HyperLogLog()
28 |     for i in range(100000):
29 |         item = "seee%seeeed234rsdaf" % i
30 |         x = h._hash(item)
31 |         h1.add(x)
32 |         h.add(x)
33 |         data_list.append((i + 1, len(h1), len(h)))
34 | 
35 |     data_numpy = np.asarray(data_list)
36 |     py.plot(data_numpy[:, 0], data_numpy[:, 1], ":", label="Single HLL Register")
37 |     py.plot(data_numpy[:, 0], data_numpy[:, 2], "--", label="HLL with 16 registers")
38 |     py.plot(data_numpy[:, 0], data_numpy[:, 0], label="Actual Size")
39 |     py.legend(loc="upper left")
40 | 
41 |     py.title("Performance of a single HLL Register")
42 |     py.xlabel("Size of the set")
43 |     py.ylabel("Predicted size of the set")
44 | 
45 |     # py.show()
46 |     py.savefig("../hll_single_reg.png")
47 | 


--------------------------------------------------------------------------------
/figures/kmv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | import pylab as py
 7 | from kminvalues import KMinValues
 8 | 
 9 | sys.path.append(os.path.abspath("../../examples/probabilistic_datastructures/"))
10 | 
11 | 
12 | 
13 | def plot(kmv):
14 |     py.scatter(
15 |         [d / float(2 ** 32 - 1) for d in kmv.data[:-1]],
16 |         [0] * (len(kmv.data) - 1),
17 |         alpha=0.25,
18 |     )
19 |     py.axvline(x=(kmv.data[-2] / float(2 ** 32 - 1)), c="r")
20 |     py.gca().get_yaxis().set_visible(False)
21 |     py.gca().get_xaxis().set_ticklabels([])
22 |     py.gca().get_xaxis().set_ticks([x / 10.0 for x in range(11)])
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     k = 20
27 |     num_panels = 20
28 |     kmv = KMinValues(k)
29 |     for i in range(k * num_panels + 1):
30 |         if i % k == 0 and i != 0:
31 |             py.subplot(num_panels, 1, i // k)
32 |             if i == k:
33 |                 py.title("Hash space density for KMV with k=%d" % k)
34 |             plot(kmv)
35 |             py.xlim((0, 1))
36 |         kmv.add(str(i))
37 |         print("added")
38 | 
39 |     py.gca().get_xaxis().set_ticks([x / 10.0 for x in range(11)])
40 |     py.gca().get_xaxis().set_ticklabels([x / 10.0 for x in range(11)])
41 | 
42 |     py.tight_layout()
43 |     py.savefig("../kmv.png")
44 | 


--------------------------------------------------------------------------------
/figures/list_overallocation.py:
--------------------------------------------------------------------------------
 1 | from itertools import islice
 2 | 
 3 | import pylab as py
 4 | 
 5 | 
 6 | # coding: utf-8
 7 | def overalloc_dict():
 8 |     o = list_overalloc()
 9 |     i = 1
10 |     s, e, _ = next(o)
11 |     while True:
12 |         if i > e:
13 |             s, e, _ = next(o)
14 |         yield e - i
15 |         i += 1
16 | 
17 | 
18 | def list_overalloc():
19 |     s = 1
20 |     while True:
21 |         e = alloc = s + overalloc(s)
22 |         yield s, e, alloc
23 |         s = e + 1
24 | 
25 | 
26 | overalloc = lambda N: (N >> 3) + (3 if N < 9 else 6)
27 | 
28 | py.scatter(list(range(1, 10000)), list(islice(overalloc_dict(), 10000 - 1)))
29 | py.ylim(0, 10000 - 1)
30 | py.xlim(0, 10000 - 1)
31 | py.ylim(0, 2000)
32 | py.ylim(0, 1500)
33 | py.ylim(0, 1400)
34 | py.ylim(0, 1300)
35 | py.xlabel("Size of the list")
36 | py.ylabel("Number of elements overallocated")
37 | py.title("Overallocation in lists")
38 | py.savefig("../list_overallocation.png")
39 | 


--------------------------------------------------------------------------------
/figures/matrix_method_speed.csv:
--------------------------------------------------------------------------------
1 | method,256,512,1024,2048,4096
2 | python,0.00,0.00,0.00,0.00,0.00
3 | python+memory,1.06,1.07,1.07,1.07,1.07
4 | numpy,170.59,116.16,60.49,44.80,45.80
5 | numpy+memory,185.97,140.10,69.67,44.43,45.36
6 | numpy+memory+laplace,203.66,208.15,86.41,90.91,90.53
7 | numpy+memory+laplace+numexpr,97.41,167.49,102.38,105.69,105.25
8 | numpy+memory+scipy,52.27,42.00,36.44,24.70,7.43
9 | 


--------------------------------------------------------------------------------
/figures/matrix_method_speed.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | 
 3 | import csv
 4 | from itertools import cycle
 5 | 
 6 | import numpy as np
 7 | import pylab as py
 8 | 
 9 | markers = cycle("h*o>Dxsp8")
10 | linestyles = cycle(["-", ":", "--", "-."])
11 | 
12 | if __name__ == "__main__":
13 |     data_raw = csv.DictReader(open("matrix_method_speed.csv"))
14 |     data = []
15 |     max_speedup = 0
16 |     for item in data_raw:
17 |         name = item.pop("method")
18 |         if name != "python":
19 |             values = np.asarray(sorted((int(k), float(v)) for k, v in item.items()))
20 |             data.append((name, values))
21 |             max_speedup = max(max_speedup, values[:, 1].max())
22 | 
23 |     py.figure()
24 |     for name, values in data:
25 |         py.plot(
26 |             values[:, 0],
27 |             values[:, 1],
28 |             linestyle=next(linestyles),
29 |             marker=next(markers),
30 |             label=name,
31 |             linewidth=4,
32 |         )
33 | 
34 |     py.ylim(ymin=0, ymax=max_speedup * 1.1)
35 |     py.legend(
36 |         loc="upper center",
37 |         ncol=3,
38 |         mode="expand",
39 |         borderaxespad=0.0,
40 |         labelspacing=0.2,
41 |         fontsize=12,
42 |         handlelength=5,
43 |     )
44 | 
45 |     ax = py.gca()
46 |     ticks = data[0][1][:, 0]
47 |     ax.set_xticks(ticks)
48 |     ax.set_xticklabels(["%dx%d" % (x, x) for x in ticks], rotation=25, ha="right")
49 |     py.xlim(xmin=ticks.min(), xmax=ticks.max())
50 | 
51 |     py.title("Summary of code performance")
52 |     py.ylabel("Speedup from pure python (larger is better)")
53 |     py.xlabel("Grid Size")
54 |     py.tight_layout()
55 | 
56 |     py.savefig("../matrix_method_speed.png")
57 | 


--------------------------------------------------------------------------------
/figures/memory_types_data.csv:
--------------------------------------------------------------------------------
1 | type,min_size (byte),max_size (byte),min_read (Mb/s),max_read (Mb/s),min_write (Mb/s),max_write (Mb/s),min_read_latency (ms),max_read_latency (ms)
2 | spinning hard drive,1e12,1.6e13,50,255,50,255,6.7,23
3 | solid state drive,1.28e+11,8e+12,96,3500,40,2500,0.03,.54
4 | RAM,1e+9,6.4e+10,6400,25600,6400,25600,6.5e-5,.0004
5 | L1/L2 Cache,16384,6.4e+7,26214.4,4000000,26214.4,4000000,1.3e-6,.0001
6 | 


--------------------------------------------------------------------------------
/figures/norm_squared.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | 
 3 | import os
 4 | import sys
 5 | from itertools import cycle
 6 | 
 7 | import matplotlib
 8 | import norm_array
 9 | import norm_numpy
10 | import norm_numpy_dot
11 | import norm_python
12 | import norm_python_comprehension
13 | import numpy as np
14 | import pylab as py
15 | 
16 | sys.path.append(os.path.abspath("../../examples/matrix/norm/"))
17 | 
18 | 
19 | 
20 | 
21 | methods = {k: v for k, v in globals().items() if k.startswith("norm")}
22 | 
23 | markers = cycle("h*o>Dxsp8")
24 | linestyles = cycle(["-", ":", "--", "-."])
25 | 
26 | if __name__ == "__main__":
27 |     timings = {k: [] for k in methods}
28 |     for exponent in range(12, 35):
29 |         N = int(1.5 ** exponent)
30 |         print("exponent:", exponent)
31 |         print("N:", N)
32 |         for name, method in methods.items():
33 |             t = method.run_experiment(N, num_iter=5) * 1000.0
34 |             timings[name].append((N, t))
35 |             print("%s: %f" % (name, t))
36 | 
37 |     for name, data in timings.items():
38 |         d = np.asarray(data)
39 |         py.plot(
40 |             d[:, 0],
41 |             d[:, 1],
42 |             label=name,
43 |             marker=next(markers),
44 |             linestyle=next(linestyles),
45 |             linewidth=4,
46 |         )
47 | 
48 |     py.title("Runtime for various norm squared routines")
49 |     py.xlabel("Vector length")
50 |     py.ylabel("Runtime (miliseconds) -- less is better")
51 |     py.yscale("log")
52 |     py.xscale("log")
53 |     ax = py.gca()
54 |     ax.get_xaxis().set_major_formatter(matplotlib.ticker.FormatStrFormatter("%d"))
55 | 
56 |     ax.xaxis.grid(True, which="minor", alpha=0.4)
57 |     ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
58 |     ax.yaxis.grid(True, which="minor", alpha=0.4)
59 |     py.legend(loc="upper left", handlelength=5)
60 | 
61 |     py.tight_layout()
62 |     py.savefig("../norm_squared.png")
63 | 


--------------------------------------------------------------------------------
/figures/processor_clock.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | import dateutil
 4 | import matplotlib.dates as mdates
 5 | import pylab as py
 6 | 
 7 | if __name__ == "__main__":
 8 |     data = list(csv.DictReader(open("processor.csv")))
 9 |     dates = [dateutil.parser.parse(x["date"]) for x in data if x["date"]]
10 |     clock = [x["clock"] for x in data if x["date"]]
11 | 
12 |     ax = py.gca()
13 |     fig = py.gcf()
14 | 
15 |     ax.scatter(mdates.date2num(dates), clock, alpha=0.5)
16 |     ax.set_xticklabels(
17 |         [d.strftime("%Y") for d in mdates.num2date(ax.get_xticks())],
18 |         rotation=15,
19 |         ha="right",
20 |     )
21 |     ax.set_yscale("log")
22 | 
23 |     ax.set_ylabel("Clock speed (MHz)")
24 |     ax.set_xlabel("Date of CPU Release")
25 |     ax.set_title("Historical growth of CPU clock speed")
26 | 
27 |     py.savefig("../processor_clock.png")
28 | 


--------------------------------------------------------------------------------
/fix_cpu_modes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | NO_TURBO=1
 4 | MODE=performance
 5 | 
 6 | if [[ "$1" == 'disable' ]]; then
 7 |     NO_TURBO=0
 8 |     MODE=powersave
 9 | fi
10 | 
11 | echo "Setting mode to: $MODE"
12 | echo "Setting no_turbo to: $NO_TURBO"
13 | 
14 | for CPUFREQ in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor;
15 | do
16 |     [ -f $CPUFREQ ] || continue;
17 |     echo -n $MODE > $CPUFREQ;
18 | done
19 | 
20 | echo ${NO_TURBO} > /sys/devices/system/cpu/intel_pstate/no_turbo
21 | 


--------------------------------------------------------------------------------