├── .gitignore ├── .gitmodules ├── ParallelPython.ipynb ├── ParallelPython.slides.html ├── README.md ├── dask ├── download.sh ├── taxi_cab.py └── taxi_cab.sh ├── data ├── charing_cross.jpg ├── euston.jpg ├── fenchurch.jpg ├── kings_cross.jpg ├── liverpool_street.jpg ├── london_bridge.jpg ├── paddington.jpg ├── st_pancras.jpg ├── victoria.jpg └── waterloo.jpg ├── docs ├── index.html ├── parallel_python_qr.pdf └── survey.png ├── environment.yml ├── gpu ├── cpu_test.py └── gpu_test.py ├── jobfile.txt ├── make_docs ├── mpi ├── mpi4py.pdf ├── mpi_message.py ├── mpi_reduce.py ├── mpi_scatter.py ├── mpi_send.py └── mpi_simple.py ├── parallel_python.png └── survey.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # PDF outputs 107 | *.pdf 108 | 109 | .DS_Store 110 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "reveal.js"] 2 | path = reveal.js 3 | url = https://github.com/hakimel/reveal.js.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parallel Programming with Python Tutorial 2 | 3 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/ycrc/parallel_python/master) 4 | 5 | This is a tutorial introducing parallel programming concepts and their implementation in Python. 6 | 7 | ## Outline and Overview 8 | 9 | - Introduction to parallel concepts 10 | - Classes of parallel problems 11 | - Python implementations of parallel processesing 12 | - Tools for further exploration 13 | 14 | ## Modules and tools 15 | 16 | - Language: Python 3.8 17 | - Modules: `pandas`, `numpy`, `multiprocessing`, `PIL` (for imamge processing), `mpi4py`, `matplotlib`, `cupy` (for GPU parallelism) 18 | - Jupyter notebook 19 | 20 | 21 | -------------------------------------------------------------------------------- /dask/download.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | for i in $(seq -f "%02g" 1 12); 4 | 5 | do 6 | wget https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-$i.csv; 7 | done; 8 | 9 | -------------------------------------------------------------------------------- /dask/taxi_cab.py: -------------------------------------------------------------------------------- 1 | # Load in Dask utilities 2 | from dask.distributed import Client 3 | from dask_jobqueue import SLURMCluster 4 | import dask.dataframe as dd 5 | import dask.array as da 6 | 7 | # Load other modules 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import glob, time 11 | 12 | # Define single unit of the Dask Distributed "Cluster" 13 | cluster = SLURMCluster(queue='admintest', cores=1, memory="10GB") 14 | # Scale up the cluster to have 10 members 15 | cluster.scale(10) 16 | # Initialize the "client" so that the script is connected to the Cluster 17 | client = Client(cluster) 18 | 19 | print(client) 20 | 21 | # Get the list of NYC taxi cab data 22 | # https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page 23 | file_list = glob.glob('/home/tl397/ycrc/workshops/taxi/*2017*csv') 24 | print(f"{len(file_list)} files selected") 25 | 26 | # Prep Dask to load the data 27 | data = dd.read_csv(file_list) 28 | 29 | # Use the DaskArray Histogram function to visualize the % tip 30 | h, bins = da.histogram(np.divide(data['tip_amount'], data['fare_amount']), bins=200, range=[0, 2]) 31 | # Activate the lazy-computing to calculate the results. 32 | h.compute() 33 | # Plot the results and save the file as a PDF 34 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6,4)) 35 | plt.step(bins[0:-1], h, where='post') 36 | plt.xlabel('Tip (%)') 37 | plt.ylabel('Counts') 38 | plt.yscale('log') 39 | plt.savefig('tip_percentage.pdf') 40 | 41 | # Repeat the above steps looking at trip distance (miles) 42 | h, bins = da.histogram(data['trip_distance'], bins= np.logspace(-1,2,50)) 43 | h.compute() 44 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6,4)) 45 | plt.step(bins[0:-1], h, where='post') 46 | plt.xlabel('Distance (mi)') 47 | plt.ylabel('Counts') 48 | plt.yscale('log') 49 | plt.xscale('log') 50 | plt.savefig('trip_distance.pdf') 51 | -------------------------------------------------------------------------------- /dask/taxi_cab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH -J dask 4 | #SBATCH -c1 -p admintest 5 | 6 | module load dask 7 | 8 | python taxi_cab.py 9 | 10 | -------------------------------------------------------------------------------- /data/charing_cross.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/charing_cross.jpg -------------------------------------------------------------------------------- /data/euston.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/euston.jpg -------------------------------------------------------------------------------- /data/fenchurch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/fenchurch.jpg -------------------------------------------------------------------------------- /data/kings_cross.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/kings_cross.jpg -------------------------------------------------------------------------------- /data/liverpool_street.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/liverpool_street.jpg -------------------------------------------------------------------------------- /data/london_bridge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/london_bridge.jpg -------------------------------------------------------------------------------- /data/paddington.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/paddington.jpg -------------------------------------------------------------------------------- /data/st_pancras.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/st_pancras.jpg -------------------------------------------------------------------------------- /data/victoria.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/victoria.jpg -------------------------------------------------------------------------------- /data/waterloo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/waterloo.jpg -------------------------------------------------------------------------------- /docs/parallel_python_qr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/docs/parallel_python_qr.pdf -------------------------------------------------------------------------------- /docs/survey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/docs/survey.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: parallel 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python>3.8 6 | - numpy 7 | - pillow 8 | - matplotlib 9 | - dask 10 | - distributed 11 | - joblib 12 | - cupy 13 | - jupyter 14 | - jupyterlab 15 | - pip: 16 | - jupyterlab-rise 17 | -------------------------------------------------------------------------------- /gpu/cpu_test.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | import numpy as np 3 | 4 | # Create 2D numpy arrays 5 | a = np.random.random(100000000) 6 | a = a.reshape(10000,10000) 7 | 8 | b = np.random.random(100000000) 9 | b = b.reshape(10000,10000) 10 | 11 | # Matrix Mult 12 | out = np.matmul(a,b) 13 | 14 | -------------------------------------------------------------------------------- /gpu/gpu_test.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | import numpy as np 3 | 4 | # Create 2D numpy arrays 5 | a = np.random.random(100000000) 6 | a = a.reshape(10000,10000) 7 | 8 | b = np.random.random(100000000) 9 | b = b.reshape(10000,10000) 10 | 11 | # Move to GPU 12 | g = cp.asarray(a) 13 | h = cp.asarray(b) 14 | 15 | # Matrix Mult 16 | out = cp.matmul(g,h) 17 | 18 | -------------------------------------------------------------------------------- /jobfile.txt: -------------------------------------------------------------------------------- 1 | python image_flipper.py ./data/waterloo.jpg 2 | python image_flipper.py ./data/victoria.jpg 3 | python image_flipper.py ./data/paddington.jpg 4 | python image_flipper.py ./data/charing_cross.jpg 5 | python image_flipper.py ./data/euston.jpg 6 | python image_flipper.py ./data/kings_cross.jpg 7 | python image_flipper.py ./data/fenchurch.jpg 8 | python image_flipper.py ./data/liverpool_street.jpg 9 | python image_flipper.py ./data/st_pancras.jpg 10 | python image_flipper.py ./data/london_bridge.jpg 11 | -------------------------------------------------------------------------------- /make_docs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Convert the notebook to slides 4 | jupyter nbconvert --to slides ParallelPython.ipynb --output-dir ./docs/ --reveal-prefix "https://cdn.jsdelivr.net/npm/reveal.js@5.0.2 " 5 | 6 | # rename to index.html 7 | mv ./docs/ParallelPython.slides.html ./docs/index.html 8 | 9 | -------------------------------------------------------------------------------- /mpi/mpi4py.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/mpi/mpi4py.pdf -------------------------------------------------------------------------------- /mpi/mpi_message.py: -------------------------------------------------------------------------------- 1 | 2 | from mpi4py import MPI 3 | 4 | comm = MPI.COMM_WORLD 5 | size = comm.Get_size() 6 | rank = comm.Get_rank() 7 | 8 | if rank == 0: 9 | data = {'key1' : [7, 2.72, 2+3j], 10 | 'key2' : ( 'abc', 'xyz')} 11 | else: 12 | data = None 13 | data = comm.bcast(data, root=0) 14 | 15 | if data != None: 16 | data['key1'].append(rank) 17 | 18 | print(f"Rank: {rank}, data: {data}") 19 | -------------------------------------------------------------------------------- /mpi/mpi_reduce.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI 2 | import numpy as np 3 | 4 | comm = MPI.COMM_WORLD 5 | rank = comm.Get_rank() 6 | 7 | # Create some np arrays on each process: 8 | # For this demo, the arrays have only one 9 | # entry that is assigned to be the rank of the processor 10 | #value = np.array(rank,'d') 11 | value = np.random.normal(loc=rank, scale=.5, size=4) 12 | 13 | print(' Rank: ',rank, ' value = ', value) 14 | 15 | # initialize the np arrays that will store the results: 16 | #value_sum = np.array(0.0,'d') 17 | #value_max = np.array(0.0,'d') 18 | 19 | value_sum = np.zeros(4,'d') 20 | value_max = np.zeros(4,'d') 21 | 22 | # perform the reductions: 23 | comm.Reduce(value, value_sum, op=MPI.SUM, root=0) 24 | comm.Reduce(value, value_max, op=MPI.MAX, root=0) 25 | 26 | if rank == 0: 27 | print(' Rank 0: value_sum = ',value_sum) 28 | print(' Rank 0: value_max = ',value_max) 29 | 30 | -------------------------------------------------------------------------------- /mpi/mpi_scatter.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI 2 | import numpy as np 3 | 4 | comm = MPI.COMM_WORLD 5 | size = comm.Get_size() # new: gives number of ranks in comm 6 | rank = comm.Get_rank() 7 | 8 | numData = 100000000 9 | data = None 10 | if rank == 0: 11 | data = np.random.normal(loc=10, scale=5, size=numData) 12 | 13 | partial = np.empty(int(numData/size), dtype='d') 14 | comm.Scatter(data, partial, root=0) 15 | 16 | reduced = None 17 | if rank == 0: 18 | reduced = np.empty(size, dtype='d') 19 | 20 | comm.Gather(np.average(partial), reduced, root=0) 21 | 22 | if rank == 0: 23 | print('Full Average:',np.average(reduced)) 24 | 25 | -------------------------------------------------------------------------------- /mpi/mpi_send.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI 2 | comm = MPI.COMM_WORLD 3 | size = comm.Get_size() 4 | rank = comm.Get_rank() 5 | 6 | if rank == 0: 7 | msg = 'Hello, world' 8 | comm.send(msg, dest=1) 9 | elif rank == 1: 10 | s = comm.recv() 11 | print(f"rank {rank}: {s}") 12 | 13 | -------------------------------------------------------------------------------- /mpi/mpi_simple.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI 2 | import os 3 | 4 | # instantize the communication world 5 | comm = MPI.COMM_WORLD 6 | 7 | # get the size of the communication world 8 | size = comm.Get_size() 9 | 10 | # get this particular processes' `rank` ID 11 | rank = comm.Get_rank() 12 | 13 | PID = os.getpid() 14 | 15 | print(f'rank: {rank} has PID: {PID}') 16 | 17 | 18 | -------------------------------------------------------------------------------- /parallel_python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/parallel_python.png -------------------------------------------------------------------------------- /survey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/survey.png --------------------------------------------------------------------------------