├── .gitignore
├── .gitmodules
├── ParallelPython.ipynb
├── ParallelPython.slides.html
├── README.md
├── dask
    ├── download.sh
    ├── taxi_cab.py
    └── taxi_cab.sh
├── data
    ├── charing_cross.jpg
    ├── euston.jpg
    ├── fenchurch.jpg
    ├── kings_cross.jpg
    ├── liverpool_street.jpg
    ├── london_bridge.jpg
    ├── paddington.jpg
    ├── st_pancras.jpg
    ├── victoria.jpg
    └── waterloo.jpg
├── docs
    ├── index.html
    ├── parallel_python_qr.pdf
    └── survey.png
├── environment.yml
├── gpu
    ├── cpu_test.py
    └── gpu_test.py
├── jobfile.txt
├── make_docs
├── mpi
    ├── mpi4py.pdf
    ├── mpi_message.py
    ├── mpi_reduce.py
    ├── mpi_scatter.py
    ├── mpi_send.py
    └── mpi_simple.py
├── parallel_python.png
└── survey.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # PDF outputs
107 | *.pdf
108 | 
109 | .DS_Store
110 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "reveal.js"]
2 | 	path = reveal.js
3 | 	url = https://github.com/hakimel/reveal.js.git
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Parallel Programming with Python Tutorial
 2 | 
 3 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/ycrc/parallel_python/master)
 4 | 
 5 | This is a tutorial introducing parallel programming concepts and their implementation in Python. 
 6 | 
 7 | ## Outline and Overview
 8 | 
 9 | - Introduction to parallel concepts
10 | - Classes of parallel problems
11 | - Python implementations of parallel processesing
12 | - Tools for further exploration
13 | 
14 | ## Modules and tools
15 | 
16 | - Language: Python 3.8
17 | - Modules: `pandas`, `numpy`, `multiprocessing`, `PIL` (for imamge processing), `mpi4py`, `matplotlib`, `cupy` (for GPU parallelism)
18 | - Jupyter notebook
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/dask/download.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | 
3 | for i in $(seq -f "%02g" 1 12);
4 | 
5 | do
6 |     wget https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-$i.csv;
7 | done;
8 | 
9 | 


--------------------------------------------------------------------------------
/dask/taxi_cab.py:
--------------------------------------------------------------------------------
 1 | # Load in Dask utilities
 2 | from dask.distributed import Client
 3 | from dask_jobqueue import SLURMCluster
 4 | import dask.dataframe as dd
 5 | import dask.array as da
 6 | 
 7 | # Load other modules
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import glob, time
11 | 
12 | # Define single unit of the Dask Distributed "Cluster"
13 | cluster = SLURMCluster(queue='admintest', cores=1, memory="10GB")
14 | # Scale up the cluster to have 10 members
15 | cluster.scale(10)
16 | # Initialize the "client" so that the script is connected to the Cluster
17 | client = Client(cluster)
18 | 
19 | print(client)
20 | 
21 | # Get the list of NYC taxi cab data
22 | # https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
23 | file_list = glob.glob('/home/tl397/ycrc/workshops/taxi/*2017*csv')
24 | print(f"{len(file_list)} files selected")
25 | 
26 | # Prep Dask to load the data
27 | data = dd.read_csv(file_list)
28 | 
29 | # Use the DaskArray Histogram function to visualize the % tip
30 | h, bins = da.histogram(np.divide(data['tip_amount'], data['fare_amount']), bins=200, range=[0, 2])
31 | # Activate the lazy-computing to calculate the results.
32 | h.compute()
33 | # Plot the results and save the file as a PDF
34 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6,4))
35 | plt.step(bins[0:-1], h, where='post')
36 | plt.xlabel('Tip (%)')
37 | plt.ylabel('Counts')
38 | plt.yscale('log')
39 | plt.savefig('tip_percentage.pdf')
40 | 
41 | # Repeat the above steps looking at trip distance (miles)
42 | h, bins = da.histogram(data['trip_distance'], bins= np.logspace(-1,2,50))
43 | h.compute()
44 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6,4))
45 | plt.step(bins[0:-1], h, where='post')
46 | plt.xlabel('Distance (mi)')
47 | plt.ylabel('Counts')
48 | plt.yscale('log')
49 | plt.xscale('log')
50 | plt.savefig('trip_distance.pdf')
51 | 


--------------------------------------------------------------------------------
/dask/taxi_cab.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH -J dask
 4 | #SBATCH -c1 -p admintest
 5 | 
 6 | module load dask
 7 | 
 8 | python taxi_cab.py  
 9 | 
10 | 


--------------------------------------------------------------------------------
/data/charing_cross.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/charing_cross.jpg


--------------------------------------------------------------------------------
/data/euston.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/euston.jpg


--------------------------------------------------------------------------------
/data/fenchurch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/fenchurch.jpg


--------------------------------------------------------------------------------
/data/kings_cross.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/kings_cross.jpg


--------------------------------------------------------------------------------
/data/liverpool_street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/liverpool_street.jpg


--------------------------------------------------------------------------------
/data/london_bridge.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/london_bridge.jpg


--------------------------------------------------------------------------------
/data/paddington.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/paddington.jpg


--------------------------------------------------------------------------------
/data/st_pancras.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/st_pancras.jpg


--------------------------------------------------------------------------------
/data/victoria.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/victoria.jpg


--------------------------------------------------------------------------------
/data/waterloo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/data/waterloo.jpg


--------------------------------------------------------------------------------
/docs/parallel_python_qr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/docs/parallel_python_qr.pdf


--------------------------------------------------------------------------------
/docs/survey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/docs/survey.png


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: parallel
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python>3.8
 6 |   - numpy
 7 |   - pillow
 8 |   - matplotlib
 9 |   - dask
10 |   - distributed
11 |   - joblib
12 |   - cupy
13 |   - jupyter
14 |   - jupyterlab
15 |   - pip:
16 |     - jupyterlab-rise
17 | 


--------------------------------------------------------------------------------
/gpu/cpu_test.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | import numpy as np
 3 | 
 4 | # Create 2D numpy arrays
 5 | a = np.random.random(100000000)
 6 | a = a.reshape(10000,10000)
 7 | 
 8 | b = np.random.random(100000000)
 9 | b = b.reshape(10000,10000)
10 | 
11 | # Matrix Mult
12 | out = np.matmul(a,b)
13 | 
14 | 


--------------------------------------------------------------------------------
/gpu/gpu_test.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | import numpy as np
 3 | 
 4 | # Create 2D numpy arrays
 5 | a = np.random.random(100000000)
 6 | a = a.reshape(10000,10000)
 7 | 
 8 | b = np.random.random(100000000)
 9 | b = b.reshape(10000,10000)
10 | 
11 | # Move to GPU
12 | g = cp.asarray(a)
13 | h = cp.asarray(b)
14 | 
15 | # Matrix Mult
16 | out = cp.matmul(g,h)
17 | 
18 | 


--------------------------------------------------------------------------------
/jobfile.txt:
--------------------------------------------------------------------------------
 1 | python image_flipper.py ./data/waterloo.jpg
 2 | python image_flipper.py ./data/victoria.jpg
 3 | python image_flipper.py ./data/paddington.jpg
 4 | python image_flipper.py ./data/charing_cross.jpg
 5 | python image_flipper.py ./data/euston.jpg
 6 | python image_flipper.py ./data/kings_cross.jpg
 7 | python image_flipper.py ./data/fenchurch.jpg
 8 | python image_flipper.py ./data/liverpool_street.jpg
 9 | python image_flipper.py ./data/st_pancras.jpg
10 | python image_flipper.py ./data/london_bridge.jpg
11 | 


--------------------------------------------------------------------------------
/make_docs:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Convert the notebook to slides 
4 | jupyter nbconvert --to slides ParallelPython.ipynb --output-dir ./docs/ --reveal-prefix "https://cdn.jsdelivr.net/npm/reveal.js@5.0.2 "
5 | 
6 | # rename to index.html
7 | mv ./docs/ParallelPython.slides.html ./docs/index.html
8 | 
9 | 


--------------------------------------------------------------------------------
/mpi/mpi4py.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/mpi/mpi4py.pdf


--------------------------------------------------------------------------------
/mpi/mpi_message.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from mpi4py import MPI
 3 | 
 4 | comm = MPI.COMM_WORLD
 5 | size = comm.Get_size()
 6 | rank = comm.Get_rank()
 7 | 
 8 | if rank == 0:
 9 |     data = {'key1' : [7, 2.72, 2+3j],
10 |             'key2' : ( 'abc', 'xyz')}
11 | else:
12 |     data = None
13 | data = comm.bcast(data, root=0)
14 | 
15 | if data != None:
16 |     data['key1'].append(rank)
17 | 
18 | print(f"Rank: {rank}, data: {data}")
19 | 


--------------------------------------------------------------------------------
/mpi/mpi_reduce.py:
--------------------------------------------------------------------------------
 1 | from mpi4py import MPI
 2 | import numpy as np
 3 | 
 4 | comm = MPI.COMM_WORLD
 5 | rank = comm.Get_rank()
 6 | 
 7 | # Create some np arrays on each process:
 8 | # For this demo, the arrays have only one
 9 | # entry that is assigned to be the rank of the processor
10 | #value = np.array(rank,'d')
11 | value = np.random.normal(loc=rank, scale=.5, size=4)
12 | 
13 | print(' Rank: ',rank, ' value = ', value)
14 | 
15 | # initialize the np arrays that will store the results:
16 | #value_sum      = np.array(0.0,'d')
17 | #value_max      = np.array(0.0,'d')
18 | 
19 | value_sum      = np.zeros(4,'d')
20 | value_max      = np.zeros(4,'d')
21 | 
22 | # perform the reductions:
23 | comm.Reduce(value, value_sum, op=MPI.SUM, root=0)
24 | comm.Reduce(value, value_max, op=MPI.MAX, root=0)
25 | 
26 | if rank == 0:
27 |     print(' Rank 0: value_sum =    ',value_sum)
28 |     print(' Rank 0: value_max =    ',value_max)
29 | 
30 | 


--------------------------------------------------------------------------------
/mpi/mpi_scatter.py:
--------------------------------------------------------------------------------
 1 | from mpi4py import MPI
 2 | import numpy as np
 3 | 
 4 | comm = MPI.COMM_WORLD
 5 | size = comm.Get_size() # new: gives number of ranks in comm
 6 | rank = comm.Get_rank()
 7 | 
 8 | numData = 100000000
 9 | data = None
10 | if rank == 0:
11 |     data = np.random.normal(loc=10, scale=5, size=numData)
12 | 
13 | partial = np.empty(int(numData/size), dtype='d')
14 | comm.Scatter(data, partial, root=0)
15 | 
16 | reduced = None
17 | if rank == 0:
18 |     reduced = np.empty(size, dtype='d')
19 | 
20 | comm.Gather(np.average(partial), reduced, root=0)
21 | 
22 | if rank == 0:
23 |    print('Full Average:',np.average(reduced))
24 | 
25 | 


--------------------------------------------------------------------------------
/mpi/mpi_send.py:
--------------------------------------------------------------------------------
 1 | from mpi4py import MPI
 2 | comm = MPI.COMM_WORLD
 3 | size = comm.Get_size()
 4 | rank = comm.Get_rank()
 5 | 
 6 | if rank == 0:
 7 |     msg = 'Hello, world'
 8 |     comm.send(msg, dest=1)
 9 | elif rank == 1:
10 |     s = comm.recv()
11 |     print(f"rank {rank}: {s}")
12 | 
13 | 


--------------------------------------------------------------------------------
/mpi/mpi_simple.py:
--------------------------------------------------------------------------------
 1 | from mpi4py import MPI
 2 | import os
 3 | 
 4 | # instantize the communication world
 5 | comm = MPI.COMM_WORLD
 6 | 
 7 | # get the size of the communication world 
 8 | size = comm.Get_size()
 9 | 
10 | # get this particular processes' `rank` ID
11 | rank = comm.Get_rank()
12 | 
13 | PID = os.getpid()
14 | 
15 | print(f'rank: {rank} has PID: {PID}')
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/parallel_python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/parallel_python.png


--------------------------------------------------------------------------------
/survey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycrc/parallel_python/ef2857731d13168f2afeedcb358f237cf643f0d6/survey.png


--------------------------------------------------------------------------------