├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── Makefile
├── README.md
├── c++
├── CMakeLists.txt
├── mcpi-mpi.cpp
├── mcpi-par.cpp
└── mcpi-seq.cpp
├── cuda
├── CMakeLists.txt
└── mcpi-cuda.cu
├── jupyter
└── mcpi.ipynb
├── python
└── mcpi.py
└── scripts
├── run-mcpi-cuda.sh
├── run-mcpi-mpi.sh
├── run-mcpi-par.sh
└── run-mcpi-seq.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | build*
2 | bin
3 | *.out
4 | .vscode
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.18...3.20 FATAL_ERROR)
2 |
3 | project(vscode-hpc CXX)
4 |
5 | add_subdirectory(c++)
6 |
7 | include(CheckLanguage)
8 | check_language(CUDA)
9 | if (CMAKE_CUDA_COMPILER)
10 | add_subdirectory(cuda)
11 | endif()
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Armin Sobhani
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CXX=g++
2 | MPICXX=mpicxx
3 | CXX_FLAGS = -std=c++14 -O3 -Wall -DNDEBUG
4 | BIN=bin
5 | SRC=c++
6 |
7 | dir_guard=@mkdir -p $(BIN)
8 |
9 | all: mcpi-seq mcpi-par mcpi-mpi
10 |
11 | mcpi-seq: $(SRC)/mcpi-seq.cpp
12 | $(dir_guard)
13 | @$(CXX) -o $(BIN)/$@ $^ $(CXX_FLAGS)
14 |
15 | mcpi-par: $(SRC)/mcpi-par.cpp
16 | $(dir_guard)
17 | @$(CXX) -fopenmp -o $(BIN)/$@ $^ $(CXX_FLAGS)
18 |
19 | mcpi-mpi: $(SRC)/mcpi-mpi.cpp
20 | $(dir_guard)
21 | @$(MPICXX) -o $(BIN)/$@ $^ $(CXX_FLAGS)
22 |
23 | .PHONY: clean
24 |
25 | clean:
26 | rm -f $(BIN)/*
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # vscode-hpc
2 | This is a sample code accompanying SHARCNET General Interest Seminars entitled: *Remote Development on HPC Clusters with VSCode*. You can find recorded versions of the talks on the [SHARCNET YouTube channel](https://youtube.sharcnet.ca):
3 |
4 | * [Part I](https://www.youtube.com/watch?v=u9k6HikDyqk)
5 | * [Part II](https://www.youtube.com/watch?v=CsgBGpampvc)
6 |
7 | ## The setup
8 |
9 | * [Visual Studio Code (VSCode)](https://code.visualstudio.com/) on the system that you work on it along with the following extensions:
10 | - [C/C++ Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode.cpptools-extension-pack)
11 | - [Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python)
12 | * [Jupyter](https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter)
13 | * [Pylance](https://marketplace.visualstudio.com/items?itemName=ms-python.vscode-pylance) (*optional dependency*)
14 | * [Visual Studio IntelliCode](https://marketplace.visualstudio.com/items?itemName=VisualStudioExptTeam.vscodeintellicode) (*optional*)
15 | - [Nsight Visual Studio Code Edition](https://marketplace.visualstudio.com/items?itemName=NVIDIA.nsight-vscode-edition)
16 | - [Makefile Tools](https://marketplace.visualstudio.com/items?itemName=ms-vscode.makefile-tools) (*optional*)
17 | - [GitHub Pull Requests and Issues](https://marketplace.visualstudio.com/items?itemName=GitHub.vscode-pull-request-github) (*optional*)
18 | - [GitLens](https://marketplace.visualstudio.com/items?itemName=eamodio.gitlens) (*optional*)
19 |
20 | And the followings on the platform(s) that you want to do either *local* or *remote* development:
21 | ### The Alliance Clusters
22 | For remote development on the
23 | [Digital Research Alliance of Canada (the Alliance)](https://alliancecan.ca)
24 | clusters, the following ```module``` command will do the trick but often you have to add it at the end of your ```~/.bashrc``` file:
25 |
26 | ```
27 | module load cmake cuda scipy-stack/2022a ipykernel
28 | ```
29 | ### Linux
30 | * C++ compiler supporting the ```C++14``` standard (e.g. ```gcc``` 9.3)
31 | * [Python 3](https://www.python.org/downloads/)
32 | * [Git](https://git-scm.com/download/linux) for *Linux*
33 | * [CMake](https://cmake.org/) 3.18 or higher for *Linux*
34 | * An MPI implementation (e.g. ```OpenMPI``` or ```MPICH```)
35 | * [CUDA toolkit](https://developer.nvidia.com/cuda-downloads?target_os=Linux) for *Linux*
36 |
37 | ### Windows
38 | * [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/community/) with ```C++``` and ```Python``` support
39 | * [Git](https://git-scm.com/download/win) for *Windows*
40 | * [CMake](https://cmake.org/download/) 3.18 or higher for *Windows*
41 | * [Windows Terminal](https://aka.ms/terminal) or [Windows Terminal Preview](https://aka.ms/terminal-preview)
42 | * [MS-MPI](https://www.microsoft.com/en-us/download/details.aspx?id=100593) (both ```msmpisetup.exe``` and ```
43 | msmpisdk.msi```)
44 | * [CUDA toolkit](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64) for *Windows*
45 |
46 | On Windows systems, if you want to do both local development on *Windows* and remote development on [WSL2](https://docs.microsoft.com/en-us/windows/wsl/), you have to first install the [NVIDIA drivers for WSL with CUDA and DirectML support](https://developer.nvidia.com/cuda/wsl/download) on *Windows* and then follow these [instructions](https://docs.nvidia.com/cuda/wsl-user-guide/index.html#setting-up-linux-dev-env) in order to install ```CUDA toolkit``` on *WSL2*.
47 |
48 | ### macOS
49 | * C++ compiler supporting the ```C++14``` standard (e.g. ```clang``` 3.4)
50 | * [Python 3](https://www.python.org/downloads/)
51 | * [Git](https://git-scm.com/download/mac) for *macOS*
52 | * [CMake](https://cmake.org/download/) 3.18 or higher for *macOS*
53 | * An MPI implementation (e.g. ```OpenMPI``` or ```MPICH```)
54 | * [CUDA toolkit](https://developer.nvidia.com/nvidia-cuda-toolkit-developer-tools-mac-hosts) for *macOS*
55 |
56 | ## Get started
57 | Just run *VSCode* on the system that you work on it and then select ```Clone Git Repository...``` from ```Get Started``` page or type ```git: clone``` in the *command palette* (Ctrl+Shift+p or F1). Then paste ```https://github.com/sharcnet/vscode-hpc.git``` and hit ```Enter```.
58 |
--------------------------------------------------------------------------------
/c++/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(mcpi-seq mcpi-seq.cpp)
2 | target_compile_features(mcpi-seq PRIVATE cxx_std_14)
3 | install(TARGETS mcpi-seq RUNTIME DESTINATION bin COMPONENT mcpi-seq)
4 |
5 | find_package(OpenMP)
6 | if (OpenMP_FOUND)
7 | add_executable(mcpi-par mcpi-par.cpp)
8 | target_compile_features(mcpi-par PRIVATE cxx_std_14)
9 | target_link_libraries(mcpi-par PUBLIC OpenMP::OpenMP_CXX)
10 | install(TARGETS mcpi-par RUNTIME DESTINATION bin COMPONENT mcpi-par)
11 | endif()
12 |
13 | find_package(MPI)
14 | if (MPI_FOUND)
15 | add_executable(mcpi-mpi mcpi-mpi.cpp)
16 | target_compile_features(mcpi-mpi PRIVATE cxx_std_14)
17 | target_link_libraries(mcpi-mpi PUBLIC MPI::MPI_CXX)
18 | install(TARGETS mcpi-mpi RUNTIME DESTINATION bin COMPONENT mcpi-mpi)
19 | endif()
20 |
--------------------------------------------------------------------------------
/c++/mcpi-mpi.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 |
9 | using namespace std;
10 |
11 | typedef chrono::high_resolution_clock timer;
12 |
13 | // get input from the command line for total number of tosses
14 | size_t process_cmdline(int argc, char* argv[])
15 | {
16 | if (argc > 2)
17 | {
18 | cout << "Usage: "
19 | << argv[0]
20 | << " [number of tosses]"
21 | << endl;
22 | return 0;
23 | }
24 | else if (1 == argc)
25 | return 10'000'000;
26 | else
27 | return atoll(argv[1]);
28 | }
29 |
30 | // perform Monte Carlo version of tossing darts at a board
31 | size_t toss(size_t n, int size, int rank)
32 | {
33 | size_t in{};
34 | std::random_device rx, ry;
35 | std::uniform_real_distribution u(0, 1);
36 |
37 |
38 | for (size_t i{size_t(rank)}; i < n; i += size)
39 | {
40 | float x{u(rx)}, y{u(ry)}; // choose random x- and y-coordinates
41 | if (x * x + y * y <= 1.0) // is point in circle?
42 | ++in; // increase counter
43 | }
44 | return in;
45 | }
46 |
47 | int main(int argc, char* argv[])
48 | {
49 | // initialize MPI environment
50 | int size, rank;
51 | MPI_Init(&argc, &argv);
52 | MPI_Comm_size(MPI_COMM_WORLD, &size); // get total number of processes
53 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); // get rank of current process
54 |
55 | // read total number of tosses from the command line
56 | size_t n_tosses = process_cmdline(argc, argv);
57 | if (0 == n_tosses)
58 | return -1;
59 |
60 | if (0 == rank)
61 | cout << "Monte-Carlo Pi Estimator\n"
62 | << "Method: MPI -- "
63 | << size << " process(es)\n"
64 | << "Number of tosses: " << n_tosses << endl;
65 |
66 | // run the simulation and time it...
67 |
68 | timer::time_point start = timer::now();
69 | size_t n_partial = toss(n_tosses, size, rank);
70 | // calculate sum of all local variables 'in' and storre result
71 | // in 'in_all' on process 0
72 | size_t n_in_circle;
73 | MPI_Reduce(&n_partial, &n_in_circle, 1,
74 | MPI_UNSIGNED_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
75 |
76 | // ouput the results
77 | if (0 == rank)
78 | {
79 | timer::duration elapsed = timer::now() - start;
80 | const long double pi = 3.141592653589793238462643L; // 25-digit Pi
81 | long double pi_estimate = 4.0L * n_in_circle / n_tosses;
82 | cout << "Estimated Pi: " << fixed << setw(17) << setprecision(15)
83 | << pi_estimate << endl;
84 | cout << "Percent error: " << setprecision(3)
85 | << abs(pi_estimate - pi) / pi * 100.0 << '%' << endl;
86 | cout << "Elapsed time: "
87 | << chrono::duration_cast(elapsed).count()
88 | << " ms" << endl;
89 | }
90 |
91 | MPI_Finalize(); // quit MPI
92 | }
93 |
--------------------------------------------------------------------------------
/c++/mcpi-par.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 |
9 | using namespace std;
10 |
11 | typedef chrono::high_resolution_clock timer;
12 |
13 | // get input from the command line for total number of tosses
14 | size_t process_cmdline(int argc, char* argv[])
15 | {
16 | if (argc > 4)
17 | {
18 | cout << "Usage: "
19 | << argv[0]
20 | << " [number of tosses] [number of threads]"
21 | << endl;
22 | return 0;
23 | }
24 | else if (1 == argc)
25 | return 10'000'000;
26 | else
27 | return atoll(argv[1]);
28 | }
29 |
30 | // perform Monte Carlo version of tossing darts at a board
31 | size_t toss(size_t n)
32 | {
33 | size_t in{};
34 | std::random_device rx, ry;
35 | std::uniform_real_distribution u(0, 1);
36 |
37 | // distribute workload over all processes and make a global reduction
38 | #pragma omp parallel for reduction(+ : in)
39 | for (int i = 0; i < int(n); ++i)
40 | {
41 | const float x{u(rx)}, y{u(ry)}; // choose random x- and y-coords
42 | if (x * x + y * y <= 1.0) // is point in circle?
43 | ++in; // increase counter
44 | }
45 |
46 | return in;
47 | }
48 |
49 | int main(int argc, char* argv[])
50 | {
51 | // set the number of threads
52 | auto n_threads = omp_get_max_threads();
53 | if (3 == argc)
54 | {
55 | int n = atoi(argv[2]);
56 | if (0 == n)
57 | {
58 | cout << "Usage: "
59 | << argv[0]
60 | << " [number of tosses] [number of threads]"
61 | << endl;
62 | return -1;
63 | }
64 | if (n < n_threads)
65 | n_threads = n;
66 | }
67 |
68 | // read total number of tosses from the command line
69 | size_t n_tosses = process_cmdline(argc, argv);
70 | if (0 == n_tosses)
71 | return -1;
72 | cout << "Monte-Carlo Pi Estimator\n"
73 | << "Method: Parallel (OpenMP) -- "
74 | << n_threads << " thread(s)\n"
75 | << "Number of tosses: " << n_tosses << endl;
76 |
77 | // run the simulation and time it...
78 | omp_set_num_threads(n_threads);
79 | timer::time_point start = timer::now();
80 | size_t n_in_circle = toss(n_tosses);
81 | timer::duration elapsed = timer::now() - start;
82 |
83 | // ouput the results
84 | const long double pi = 3.141592653589793238462643L; // 25-digit Pi
85 | long double pi_estimate = 4.0L * n_in_circle / n_tosses;
86 | cout << "Estimated Pi: " << fixed << setw(17) << setprecision(15)
87 | << pi_estimate << endl
88 | << "Percent error: " << setprecision(3)
89 | << abs(pi_estimate - pi) / pi * 100.0 << '%' << endl
90 | << "Elapsed time: "
91 | << chrono::duration_cast(elapsed).count()
92 | << " ms" << endl;
93 | }
94 |
--------------------------------------------------------------------------------
/c++/mcpi-seq.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | using namespace std;
8 |
9 | typedef chrono::high_resolution_clock timer;
10 |
11 | // get input from the command line for total number of tosses
12 | size_t process_cmdline(int argc, char* argv[])
13 | {
14 | if (argc > 2)
15 | {
16 | cout << "Usage: "
17 | << argv[0]
18 | << " [number of tosses]"
19 | << endl;
20 | return 0;
21 | }
22 | else if (1 == argc)
23 | return 10'000'000;
24 | else
25 | return atoll(argv[1]);
26 | }
27 |
28 | // perform Monte Carlo version of tossing darts at a board
29 | size_t toss(size_t n)
30 | {
31 | size_t in{};
32 | std::random_device rx, ry;
33 | std::uniform_real_distribution u(0, 1);
34 | for (size_t i{}; i < n; ++i)
35 | {
36 | float x{u(rx)}, y{u(ry)}; // choose random x- and y-coordinates
37 | if (x * x + y * y <= 1.0) // is point in circle?
38 | ++in; // increase counter
39 | }
40 | return in;
41 | }
42 |
43 | int main(int argc, char* argv[])
44 | {
45 | // read total number of tosses from the command line
46 | size_t n_tosses = process_cmdline(argc, argv);
47 | if (0 == n_tosses)
48 | return -1;
49 | cout << "Monte-Carlo Pi Estimator\n"
50 | << "Method: Sequential\n"
51 | << "Number of tosses: " << n_tosses << endl;
52 |
53 | // run the simulation and time it...
54 | timer::time_point start = timer::now();
55 | size_t n_in_circle = toss(n_tosses);
56 | timer::duration elapsed = timer::now() - start;
57 |
58 | // ouput the results
59 | const long double pi = 3.141592653589793238462643L; // 25-digit Pi
60 | long double pi_estimate = 4.0L * n_in_circle / n_tosses;
61 | cout << "Estimated Pi: " << fixed << setw(17) << setprecision(15)
62 | << pi_estimate << endl
63 | << "Percent error: " << setprecision(3)
64 | << abs(pi_estimate - pi) / pi * 100.0 << '%' << endl
65 | << "Elapsed time: "
66 | << chrono::duration_cast(elapsed).count()
67 | << " ms" << endl;
68 | }
69 |
--------------------------------------------------------------------------------
/cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | enable_language(CUDA)
2 | find_package(CUDAToolkit REQUIRED)
3 | add_executable(mcpi-cuda mcpi-cuda.cu)
4 | target_compile_options(mcpi-cuda PRIVATE
5 | "$<$,$>:-G;-src-in-ptx>")
6 | target_link_libraries(mcpi-cuda PUBLIC CUDA::curand)
7 |
--------------------------------------------------------------------------------
/cuda/mcpi-cuda.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 | #include
9 |
10 | using namespace std;
11 |
12 | typedef chrono::high_resolution_clock timer;
13 |
14 | // check if there are any errors launching the kernel
15 | #define cuda_error_check() { cuda_assert(__FILE__, __LINE__); }
16 | inline void cuda_assert(const char *file, int line, bool abort = true)
17 | {
18 | auto error = cudaGetLastError();
19 | if (error != cudaSuccess)
20 | {
21 | cerr << "CUDA error: "
22 | << cudaGetErrorString(error)
23 | << " (" << error << ") -- "
24 | << file << " -- line: "
25 | << line << endl;
26 | if (abort) exit(error);
27 | }
28 | }
29 |
30 | // get input from the command line for total number of tosses
31 | size_t process_cmdline(int argc, char* argv[])
32 | {
33 | if (argc > 4)
34 | {
35 | cout << "Usage: "
36 | << argv[0]
37 | << " [number of tosses] [number of threads]"
38 | << endl;
39 | return 0;
40 | }
41 | else if (1 == argc)
42 | return 10'000'000;
43 | else
44 | return atoll(argv[1]);
45 | }
46 |
47 | // device kernel to perform Monte Carlo version of tossing darts at a board
48 | __global__ void cuda_toss(size_t n, size_t* in)
49 | {
50 | size_t rank = threadIdx.x;
51 | size_t size = blockDim.x;
52 |
53 | // Initialize RNG
54 | curandState_t rng;
55 | curand_init(clock64(), threadIdx.x + blockIdx.x * blockDim.x, 0, &rng);
56 |
57 | in[rank] = 0; // local number of points in circle
58 | for (size_t i = 0; i < n / size; ++i)
59 | {
60 | float x = curand_uniform(&rng); // Random x position in [0,1]
61 | float y = curand_uniform(&rng); // Random y position in [0,1]
62 | // if (x * x + y * y <= 1) // is point in circle?
63 | // ++in[rank]; // increase thread-local counter
64 | in[rank] += 1 - int(x * x + y * y); // no conditional version (faster)
65 | }
66 | }
67 |
68 | int main(int argc, char* argv[])
69 | {
70 | // querying device properties
71 | cudaDeviceProp prop;
72 | cudaGetDeviceProperties(&prop, 0);
73 | cuda_error_check();
74 |
75 | // set the number of threads
76 | size_t n_threads = 128;
77 | if (3 == argc)
78 | {
79 | int n = atoi(argv[2]);
80 | if (0 == n)
81 | {
82 | cout << "Usage: "
83 | << argv[0]
84 | << " [number of tosses] [number of threads]"
85 | << endl;
86 | return -1;
87 | }
88 | if (n <= prop.maxThreadsPerBlock)
89 | n_threads = n;
90 | }
91 |
92 | // read total number of tosses from the command line
93 | size_t n_tosses = process_cmdline(argc, argv);
94 | if (0 == n_tosses)
95 | return -1;
96 |
97 | cout << "Monte-Carlo Pi Estimator\n"
98 | << "Method: CUDA (GPU) -- "
99 | << n_threads << " thread(s)\n"
100 | << "Device name: " << prop.name
101 | << "\nNumber of tosses: " << n_tosses << endl;
102 |
103 | // run the simulation and time it...
104 | //------> start timer
105 | timer::time_point start = timer::now();
106 |
107 | // memory for thread local results
108 | size_t* in_device;
109 | cudaMalloc(&in_device, n_threads * sizeof(size_t));
110 | cuda_error_check();
111 | // start parallel Monte Carlo
112 | cuda_toss<<<1, n_threads>>>(n_tosses, in_device);
113 | cuda_error_check();
114 |
115 | // reducing...
116 | vector in(n_threads);
117 | cudaMemcpy(
118 | in.data()
119 | , in_device
120 | , n_threads * sizeof(size_t)
121 | , cudaMemcpyDeviceToHost);
122 | cuda_error_check();
123 | cudaFree(in_device);
124 | size_t n_in_circle{0};
125 | for (size_t i{0}; i < n_threads; ++i)
126 | n_in_circle += in[i];
127 |
128 | timer::duration elapsed = timer::now() - start;
129 | //------> end timer
130 |
131 | // ouput the results
132 | const long double pi = 3.141592653589793238462643L; // 25-digit Pi
133 | long double pi_estimate = 4.0L * n_in_circle / n_tosses;
134 | cout << "Estimated Pi: " << fixed << setw(17) << setprecision(15)
135 | << pi_estimate << endl
136 | << "Percent error: " << setprecision(3)
137 | << abs(pi_estimate - pi) / pi * 100.0 << '%' << endl
138 | << "Elapsed time: "
139 | << chrono::duration_cast(elapsed).count()
140 | << " ms" << endl;
141 | }
142 |
--------------------------------------------------------------------------------
/python/mcpi.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | import math
6 |
7 | from time import perf_counter
8 |
9 | n_tosses = 10000
10 |
11 | #<---- start timing
12 | start = perf_counter()
13 |
14 | x = np.random.rand(n_tosses)
15 | y = np.random.rand(n_tosses)
16 |
17 | n_in_circle = 0
18 | for i in range(0, n_tosses - 1):
19 | if (x[i]**2 + y[i]**2 <= 1):
20 | n_in_circle += 1
21 |
22 | end = perf_counter()
23 | #<---- end timing
24 |
25 | pi_estimate = 4 * (n_in_circle / n_tosses)
26 | print("Monte-Carlo Pi Estimator")
27 | print("Estimated π: ", pi_estimate)
28 | print("Percent error: ", 100 * math.fabs(math.pi - pi_estimate) / math.pi, "%")
29 | print("Elapsed time: ", end - start, "seconds")
30 |
31 | circle_x = x[np.sqrt(x**2 + y**2) <= 1]
32 | circle_y = y[np.sqrt(x**2 + y**2) <= 1]
33 |
34 | fig = plt.figure()
35 | plot = fig.add_subplot(111)
36 | plot.scatter(x, y, marker='.', color='blue')
37 | plot.scatter(circle_x, circle_y, marker='.', color='red')
38 |
39 | x = np.linspace(0, 1, 100)
40 | y = np.sqrt(1 - x**2)
41 | plot.plot(x, y, color='black')
42 |
43 | plot.set_aspect(1.0)
44 |
45 | plt.show()
--------------------------------------------------------------------------------
/scripts/run-mcpi-cuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=def-someuser
3 | #SBATCH --time=5
4 | #SBATCH --mem=4G
5 | #SBATCH --gres=gpu:1
6 | ./mcpi-cuda 100000000
--------------------------------------------------------------------------------
/scripts/run-mcpi-mpi.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=def-someuser
3 | #SBATCH --time=5
4 | #SBATCH --ntasks=64 # number of MPI processes
5 | #SBATCH --mem-per-cpu=1024M # memory; default unit is megabytes
6 | srun ./mcpi-mpi 100000000
--------------------------------------------------------------------------------
/scripts/run-mcpi-par.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=def-someuser
3 | #SBATCH --time=5
4 | #SBATCH --mem=4G
5 | #SBATCH --cpus-per-task=16
6 | export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
7 | ./mcpi-par 100000000 $SLURM_CPUS_PER_TASK
--------------------------------------------------------------------------------
/scripts/run-mcpi-seq.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=def-someuser
3 | #SBATCH --time=5
4 | #SBATCH --mem=4G
5 | ./mcpi-seq 100000000
--------------------------------------------------------------------------------