├── up_out_call
    ├── result.png
    └── code
    │   ├── kernel.h
    │   ├── kernel.cu
    │   ├── dev_array.h
    │   └── main.cpp
├── vanilla_call
    ├── result.png
    └── code
    │   ├── kernel.h
    │   ├── kernel.cu
    │   ├── dev_array.h
    │   └── main.cpp
├── one-asset ELS
    ├── result.png
    └── code
    │   ├── kernel.h
    │   ├── dev_array.h
    │   ├── kernel.cu
    │   └── main.cpp
├── three-asset ELS
    ├── result.png
    └── code
    │   ├── kernel.h
    │   ├── chol.h
    │   ├── dev_array.h
    │   ├── dev_matrix.h
    │   ├── kernel.cu
    │   └── main.cpp
├── two-asset ELS
    ├── result.png
    └── code
    │   ├── chol.h
    │   ├── kernel.h
    │   ├── dev_array.h
    │   ├── dev_matrix.h
    │   ├── kernel.cu
    │   └── main.cpp
├── LICENSE.md
└── README.md


/up_out_call/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ymh1989/CUDA_MC/HEAD/up_out_call/result.png


--------------------------------------------------------------------------------
/vanilla_call/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ymh1989/CUDA_MC/HEAD/vanilla_call/result.png


--------------------------------------------------------------------------------
/one-asset ELS/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ymh1989/CUDA_MC/HEAD/one-asset ELS/result.png


--------------------------------------------------------------------------------
/three-asset ELS/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ymh1989/CUDA_MC/HEAD/three-asset ELS/result.png


--------------------------------------------------------------------------------
/two-asset ELS/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ymh1989/CUDA_MC/HEAD/two-asset ELS/result.png


--------------------------------------------------------------------------------
/two-asset ELS/code/chol.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CHOL_H_
 2 | #define _CHOL_H_
 3 | 
 4 | // cholesky decomposition
 5 | void makeChol2(double dest[], const double src)
 6 | {
 7 | 	//chol2(dest, tmp_corr);
 8 | 	dest[0] = 1.0; dest[1] = 0.0;
 9 | 	dest[2] = src; dest[3] = sqrt(1.0 - src*src);
10 | }
11 | 
12 | #endif


--------------------------------------------------------------------------------
/vanilla_call/code/kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KERNEL_CUH_
 2 | #define _KERNEL_CUH_
 3 | #define CEIL(a, b) (((a)+(b)-1) / (b))
 4 | 
 5 | typedef struct optionData
 6 | {
 7 | 	double S0;
 8 | 	double r;
 9 | 	double T;
10 | 	double sig;
11 | 	double dt;
12 | 	double sqrdt;
13 | 
14 | 	double K;
15 | 
16 | 	// constructor
17 | 	optionData(double _S0,
18 | 		double _r,
19 | 		double _T,
20 | 		double _sig,
21 | 		double _dt,
22 | 		double _sqrdt,
23 | 		double _K = 0)
24 | 	{
25 | 		S0 = _S0; r = _r; T = _T;
26 | 		sig = _sig; dt = _dt;
27 | 		sqrdt = _sqrdt; K = _K;
28 | 	}
29 | 
30 | }optionData;
31 | 
32 | void Vanilla_Call_single(optionData option, double * d_s, double * d_normals, unsigned N_SIMULS, unsigned N_SIMULS);
33 | 
34 | #endif


--------------------------------------------------------------------------------
/up_out_call/code/kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KERNEL_CUH_
 2 | #define _KERNEL_CUH_
 3 | #define CEIL(a, b) (((a)+(b)-1) / (b))
 4 | 
 5 | typedef struct optionData
 6 | {
 7 | 	double S0;
 8 | 	double r;
 9 | 	double T;
10 | 	double sig;
11 | 	double dt;
12 | 	double sqrdt;
13 | 
14 | 	double K;
15 | 	double B;
16 | 
17 | 	// constructor
18 | 	optionData(double _S0,
19 | 		double _r,
20 | 		double _T,
21 | 		double _sig,
22 | 		double _dt,
23 | 		double _sqrdt,
24 | 		double _K = 0,
25 | 		double _B = 0)
26 | 	{
27 | 		S0 = _S0; r = _r; T = _T;
28 | 		sig = _sig; dt = _dt;
29 | 		sqrdt = _sqrdt; K = _K;
30 | 		B = _B;
31 | 	}
32 | 
33 | }optionData;
34 | 
35 | void up_out_barrier_single(optionData option, double * d_s, double * d_normals, unsigned N_SIMULS, unsigned N_SIMULS);
36 | 
37 | #endif


--------------------------------------------------------------------------------
/one-asset ELS/code/kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KERNEL_CUH_
 2 | #define _KERNEL_CUH_
 3 | #define length 6 // # of observation
 4 | #define CEIL(a, b) (((a)+(b)-1) / (b))
 5 | 
 6 | typedef struct optionData
 7 | {
 8 | 	double S0;
 9 | 	double S0_ref;
10 | 	double r;
11 | 	double discr;
12 | 	double T;
13 | 	double sigma;
14 | 	double dt;
15 | 	double sqrdt;
16 | 
17 | 	double B;
18 | 	double dummy;
19 | 
20 | 
21 | 	optionData(double _S0,
22 | 		double _S0_ref,
23 | 		double _r,
24 | 		double _discr,
25 | 		double _T,
26 | 		double _sigma,
27 | 		double _dt,
28 | 		double _sqrdt,
29 | 		double _B,
30 | 		double _dummy)
31 | 	{
32 | 		S0 = _S0; S0_ref = _S0_ref; r = _r;
33 | 		discr = _discr; T = _T;
34 | 		sigma = _sigma; dt = _dt;
35 | 		sqrdt = _sqrdt;
36 | 		B = _B; dummy = _dummy;
37 | 	}
38 | 
39 | }optionData;
40 | 
41 | void KiELS1(optionData option, double * d_s, double * stk, double * payment, double * date, double * d_normals, unsigned N_STEPS, unsigned N_SIMULS);
42 | 
43 | void fillRand(double *A, size_t rows_A, size_t cols_A);
44 | #endif


--------------------------------------------------------------------------------
/two-asset ELS/code/kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KERNEL_CUH_
 2 | #define _KERNEL_CUH_
 3 | #define length 6 // # of observation
 4 | #define CEIL(a, b) (((a)+(b)-1) / (b))
 5 | 
 6 | typedef struct optionData
 7 | {
 8 | 	double S0;
 9 | 	double S0_ref;
10 | 	double r;
11 | 	double discr;
12 | 	double T;
13 | 	double sigma;
14 | 	double dt;
15 | 	double sqrdt;
16 | 
17 | 	double B;
18 | 	double dummy;
19 | 
20 | 
21 | 	optionData(double _S0,
22 | 		double _S0_ref,
23 | 		double _r,
24 | 		double _discr,
25 | 		double _T,
26 | 		double _sigma,
27 | 		double _dt,
28 | 		double _sqrdt,
29 | 		double _B,
30 | 		double _dummy)
31 | 	{
32 | 		S0 = _S0; S0_ref = _S0_ref; r = _r; 
33 | 		discr = _discr; T = _T;
34 | 		sigma = _sigma; dt = _dt;
35 | 		sqrdt = _sqrdt;
36 | 		B = _B; dummy = _dummy;
37 | 	}
38 | 
39 | }optionData;
40 | 
41 | void ELS2(optionData option1, optionData option2, double * d_s, double * stk, double * payment, double * date, double * d_normals, unsigned N_STEPS, unsigned N_SIMULS);
42 | 
43 | void dev_fillRand(double *A, size_t rows_A, size_t cols_A);
44 | 
45 | void dev_mmul(const double *A, const double *B, double *C, const int m, const int k, const int n);
46 | #endif


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Minhyun Yoo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/three-asset ELS/code/kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KERNEL_CUH_
 2 | #define _KERNEL_CUH_
 3 | #define length 6 // # of observation
 4 |  
 5 | #define MIN_USERDEFINE(X, Y) (((X) < (Y)) ? (X) : (Y))
 6 | #define CEIL(a, b) (((a)+(b)-1) / (b))
 7 | typedef struct optionData
 8 | {
 9 | 	double S0;
10 | 	double S0_ref;
11 | 	double r;
12 | 	double discr;
13 | 	double T;
14 | 	double sigma;
15 | 	double dt;
16 | 	double sqrdt;
17 | 
18 | 	double B;
19 | 	double dummy;
20 | 
21 | 
22 | 	optionData(double _S0,
23 | 		double _S0_ref,
24 | 		double _r,
25 | 		double _discr,
26 | 		double _T,
27 | 		double _sigma,
28 | 		double _dt,
29 | 		double _sqrdt,
30 | 		double _B,
31 | 		double _dummy)
32 | 	{
33 | 		S0 = _S0; S0_ref = _S0_ref; r = _r;
34 | 		discr = _discr; T = _T;
35 | 		sigma = _sigma; dt = _dt;
36 | 		sqrdt = _sqrdt;
37 | 		B = _B; dummy = _dummy;
38 | 	}
39 | 
40 | }optionData;
41 | 
42 | void ELS3(optionData option1, optionData option2, optionData option3, double * d_s, double * stk, double * payment, double * date, double * d_normals, unsigned N_STEPS, unsigned N_SIMULS);
43 | 
44 | void dev_fillRand(double *A, size_t rows_A, size_t cols_A);
45 | 
46 | void dev_mmul(const double *A, const double *B, double *C, const int m, const int k, const int n);
47 | #endif


--------------------------------------------------------------------------------
/three-asset ELS/code/chol.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CHOL_H_
 2 | #define _CHOL_H_
 3 | 
 4 | // cholesky decomposition
 5 | void chol3(double dest[], double src[][3])
 6 | {
 7 | 	int i = 0, j = 0, k = 0, n = 3;
 8 | 	double tmparr[3][3] = { 0 };
 9 | 
10 | 	double sum1 = 0.0;
11 | 	double sum2 = 0.0;
12 | 	double sum3 = 0.0;
13 | 
14 | 	tmparr[0][0] = sqrt(src[0][0]);
15 | 	for (j = 1; j <= n - 1; j++)
16 | 		tmparr[j][0] = src[j][0] / tmparr[0][0];
17 | 	for (i = 1; i <= (n - 2); i++)
18 | 	{
19 | 		for (k = 0; k <= (i - 1); k++)
20 | 			sum1 += pow(tmparr[i][k], 2);
21 | 		tmparr[i][i] = sqrt(src[i][i] - sum1);
22 | 		for (j = (i + 1); j <= (n - 1); j++)
23 | 		{
24 | 			for (k = 0; k <= (i - 1); k++)
25 | 				sum2 += tmparr[j][k] * tmparr[i][k];
26 | 			tmparr[j][i] = (src[j][i] - sum2) / tmparr[i][i];
27 | 		}
28 | 	}
29 | 	for (k = 0; k <= (n - 2); k++)
30 | 		sum3 += pow(tmparr[n - 1][k], 2);
31 | 	tmparr[n - 1][n - 1] = sqrt(src[n - 1][n - 1] - sum3);
32 | 
33 | 	for (i = 0; i < 3; i++) {
34 | 		for (j = 0; j < 3; j++) {
35 | 			dest[i * 3 + j] = tmparr[i][j];
36 | 		}
37 | 	}
38 | }
39 | 
40 | void makeChol3(double dest[], const double * src)
41 | {
42 | 	int i, j;
43 | 	unsigned cnt = 0;
44 | 	double tmp_corr[3][3] = { 0 };
45 | 	for (i = 0; i < 3; i++){
46 | 		for (j = 0; j <= i; j++) {
47 | 			if (i == j) {
48 | 				tmp_corr[j][i] = 1.0;
49 | 			}
50 | 			else {
51 | 				tmp_corr[j][i] = src[cnt++];
52 | 				tmp_corr[i][j] = tmp_corr[j][i];
53 | 			}
54 | 		}
55 | 	}
56 | 
57 | 	chol3(dest, tmp_corr);
58 | }
59 | 
60 | #endif


--------------------------------------------------------------------------------
/vanilla_call/code/kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "kernel.h"
 2 | 
 3 | __global__ void Kernel_Vanilla_Call_single(
 4 | 	optionData data,
 5 | 	double * d_s,
 6 | 	double * d_normals,
 7 | 	unsigned N_STEPS,
 8 | 	unsigned N_SIMULS)
 9 | {
10 | 	int s_idx = threadIdx.x + blockIdx.x * blockDim.x; // thread index
11 | 	int n_idx = (s_idx)* N_STEPS; // for random number indexing
12 | 
13 | 	// check thread # < # of simuls
14 | 	if (s_idx < N_SIMULS) {
15 | 		int n = 0;
16 | 
17 | 		// Initialize
18 | 		double s_curr = data.S0;
19 | 		double T = data.T;
20 | 		double sig = data.sig;
21 | 		double r = data.r;
22 | 		double dt = data.dt;
23 | 		double sqrdt = data.sqrdt;
24 | 		double K = data.K;
25 | 
26 | 		double payoff = 0.0;
27 | 
28 | 		do {
29 | 			s_curr = s_curr * exp((r - (sig*sig)*0.5)*dt + sig*sqrdt*d_normals[n_idx]);
30 | 			
31 | 			n_idx++; // random number index
32 | 			n++; // time stepping
33 | 		} while (n < N_STEPS);
34 | 
35 | 		// payoff using ternary operator
36 | 		payoff = (s_curr > K)  ? (s_curr - K) : 0;
37 | 
38 | 		// to save results, sycronize threads
39 | 		__syncthreads();
40 | 
41 | 		// save payoff
42 | 		d_s[s_idx] = payoff;
43 | 	}
44 | }
45 | 
46 | void Vanilla_Call_single(
47 | 	optionData option,
48 | 	double * d_s,
49 | 	double * d_normals,
50 | 	unsigned N_STEPS,
51 | 	unsigned N_SIMULS) {
52 | 	const unsigned BLOCK_SIZE = 1024; // # of threads in a block (1-dimension threads & block)
53 | 	const unsigned GRID_SIZE = CEIL(N_SIMULS, BLOCK_SIZE); // # of block in a grid
54 | 	Kernel_Vanilla_Call_single << <GRID_SIZE, BLOCK_SIZE >> >
55 | 		(option, d_s, d_normals, N_STEPS, N_SIMULS);
56 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ##  Monte Carlo simulation to option pricing in CUDA
 2 | 
 3 | ### Introduction
 4 | This repo contains an implementation of pricing financial derivatives using Monte Calo simulation with CUDA(Compute Unified Device Architecture).
 5 | 
 6 | ### Environment
 7 | - GPU : NVIDIA GeForce GTX 650 @ 1.072GHZ GDDR5 1GB
 8 | - [CUDA toolkit 7.5](https://developer.nvidia.com/cuda-toolkit)
 9 | - CPU : Intel(R) Core(TM) i5-6400 @ 2.7GHZ 
10 | - RAM : DDR3L 16GB PC3-12800
11 | - Microsoft Visual Studio Community 2013
12 | 
13 | ### Result
14 | - In this repo, I compare the performance between CPU and GPU. The parameters can be modified freely.
15 | 
16 | | European call | UP&out call | ELS 1 asset<p>(price&greeks)| ELS 2 asset<p>(price&greeks) | ELS 3 asset<p>(price&greeks)
17 | ------------ | ------------- | ------------- | ------------- | -------------
18 | GPU | 88ms <p>(10<sup>7</sup> simuls)</p> | 251ms <p>(10<sup>5</sup> simuls)</p>| 129ms <p>(10<sup>4</sup> simuls)</p> | 223ms <p>(10<sup>4</sup> simuls) | 833ms <p>(10<sup>4</sup> simuls)
19 | CPU | 275ms <p>(10<sup>7</sup> simuls)</p> | 484ms <p>(10<sup>5</sup> simuls)</p>| N/A | N/A | N/A
20 | 
21 | ** As you can see in `Environment`, the GPU which I tested is old type(2012 late), however, the CPU is latest model(2016 early). So please understand that there is no marked difference in computational cost.
22 | 
23 | 
24 | ### Note
25 | - You need to add `curand.lib` files as linker input in the development environment.
26 | - Also, the platform you are targetting in VS configuration manager should be `x64`, since `curand.lib` is `x64` library.
27 | - If you're interested in my works, please [email](mailto:yoomh1989@gmail.com) me.
28 | 


--------------------------------------------------------------------------------
/up_out_call/code/kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "kernel.h"
 2 | 
 3 | // up-and-out call
 4 | __global__ void Kernel_up_out_barrier_single(
 5 | 	optionData data,
 6 | 	double * d_s,
 7 | 	double * d_normals,
 8 | 	unsigned N_SIMULS,
 9 | 	unsigned N_SIMULS)
10 | {
11 | 	int s_idx = threadIdx.x + blockIdx.x * blockDim.x; // thread index
12 | 	int n_idx = (s_idx)* N_SIMULS; // for random number indexing
13 | 
14 | 	// check thread # < # of simuls
15 | 	if (s_idx < N_SIMULS) {
16 | 		int n = 0;
17 | 
18 | 		double s_curr = data.S0;
19 | 		double T = data.T;
20 | 		double sig = data.sig;
21 | 		double r = data.r;
22 | 		double dt = data.dt;
23 | 		double sqrdt = data.sqrdt;
24 | 		double K = data.K;
25 | 		double B = data.B;
26 | 
27 | 		double payoff = 0.0;
28 | 		bool tag = 0; // tag for path-dependent property
29 | 		do {
30 | 			s_curr = s_curr * exp((r - (sig*sig)*0.5)*dt + sig*sqrdt*d_normals[n_idx]);
31 | 			
32 | 			tag = (s_curr > B) ? 1 : tag; // check knock-out (if s > B, tag = 1, otherwise tag is retained.)
33 | 
34 | 			n_idx++; // random number index
35 | 			n++; // time stepping
36 | 		} while (n < N_SIMULS);
37 | 
38 | 		// payoff using ternary operator
39 | 		payoff = tag ? 0 : ((s_curr > K) ? (s_curr - K) : 0);
40 | 
41 | 		// to save results, sycronize threads
42 | 		__syncthreads();
43 | 
44 | 		// save payoff
45 | 		d_s[s_idx] = payoff;
46 | 	}
47 | }
48 | 
49 | void up_out_barrier_single(
50 | 	optionData option,
51 | 	double * d_s,
52 | 	double * d_normals,
53 | 	unsigned N_SIMULS,
54 | 	unsigned N_SIMULS) {
55 | 	const unsigned BLOCK_SIZE = 1024; // # of threads in a block (1-dimension threads & block)
56 | 	const unsigned GRID_SIZE = CEIL(N_SIMULS, BLOCK_SIZE); // # of block in a grid
57 | 	Kernel_up_out_barrier_single << <GRID_SIZE, BLOCK_SIZE >> >
58 | 		(option, d_s, d_normals, N_SIMULS, N_SIMULS);
59 | }


--------------------------------------------------------------------------------
/up_out_call/code/dev_array.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DEV_ARRAY_H_
  2 | #define _DEV_ARRAY_H_
  3 | 
  4 | #include <stdexcept>
  5 | #include <algorithm>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | // This header is referred at follow link.
  9 | // https://www.quantstart.com/articles/Monte-Carlo-Simulations-In-CUDA-Barrier-Option-Pricing
 10 | // Easy form for CUDA syntax
 11 | 
 12 | template <typename T>
 13 | class dev_array
 14 | {
 15 | 	// public functions
 16 | public:
 17 | 	explicit dev_array()
 18 | 		: start_(0),
 19 | 		end_(0)
 20 | 	{}
 21 | 
 22 | 	// constructor
 23 | 	explicit dev_array(size_t size)
 24 | 	{
 25 | 		allocate(size);
 26 | 	}
 27 | 	// destructor
 28 | 	~dev_array()
 29 | 	{
 30 | 		free();
 31 | 	}
 32 | 
 33 | 	// resize the vector
 34 | 	void resize(size_t size)
 35 | 	{
 36 | 		free();
 37 | 		allocate(size);
 38 | 	}
 39 | 
 40 | 	// get the size of the array
 41 | 	size_t getSize() const
 42 | 	{
 43 | 		return end_ - start_;
 44 | 	}
 45 | 
 46 | 	// get data
 47 | 	const T* getData() const
 48 | 	{
 49 | 		return start_;
 50 | 	}
 51 | 
 52 | 	T* getData()
 53 | 	{
 54 | 		return start_;
 55 | 	}
 56 | 
 57 | 	// set
 58 | 	void set(const T* src, size_t size)
 59 | 	{
 60 | 		size_t min = std::min(size, getSize());
 61 | 		cudaError_t result = cudaMemcpy(start_, src, min * sizeof(T), cudaMemcpyHostToDevice);
 62 | 		if (result != cudaSuccess)
 63 | 		{
 64 | 			throw std::runtime_error("failed to copy to device memory");
 65 | 		}
 66 | 	}
 67 | 	// get
 68 | 	void get(T* dest, size_t size)
 69 | 	{
 70 | 		size_t min = std::min(size, getSize());
 71 | 		cudaError_t result = cudaMemcpy(dest, start_, min * sizeof(T), cudaMemcpyDeviceToHost);
 72 | 		if (result != cudaSuccess)
 73 | 		{
 74 | 			throw std::runtime_error("failed to copy to host memory");
 75 | 		}
 76 | 	}
 77 | 
 78 | 
 79 | 	// private functions
 80 | private:
 81 | 	// allocate memory on the device
 82 | 	void allocate(size_t size)
 83 | 	{
 84 | 		cudaError_t result = cudaMalloc((void**)&start_, size * sizeof(T));
 85 | 		if (result != cudaSuccess)
 86 | 		{
 87 | 			start_ = end_ = 0;
 88 | 			throw std::runtime_error("failed to allocate device memory");
 89 | 		}
 90 | 		end_ = start_ + size;
 91 | 	}
 92 | 
 93 | 	// free memory on the device
 94 | 	void free()
 95 | 	{
 96 | 		if (start_ != 0)
 97 | 		{
 98 | 			cudaFree(start_);
 99 | 			start_ = end_ = 0;
100 | 		}
101 | 	}
102 | 
103 | 	T* start_;
104 | 	T* end_;
105 | };
106 | 
107 | #endif


--------------------------------------------------------------------------------
/one-asset ELS/code/dev_array.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DEV_ARRAY_H_
  2 | #define _DEV_ARRAY_H_
  3 | 
  4 | #include <stdexcept>
  5 | #include <algorithm>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | // This header is referred at follow link.
  9 | // https://www.quantstart.com/articles/Monte-Carlo-Simulations-In-CUDA-Barrier-Option-Pricing
 10 | // Easy form for CUDA syntax
 11 | 
 12 | template <typename T>
 13 | class dev_array
 14 | {
 15 | 	// public functions
 16 | public:
 17 | 	explicit dev_array()
 18 | 		: start_(0),
 19 | 		end_(0)
 20 | 	{}
 21 | 
 22 | 	// constructor
 23 | 	explicit dev_array(size_t size)
 24 | 	{
 25 | 		allocate(size);
 26 | 	}
 27 | 	// destructor
 28 | 	~dev_array()
 29 | 	{
 30 | 		free();
 31 | 	}
 32 | 
 33 | 	// resize the vector
 34 | 	void resize(size_t size)
 35 | 	{
 36 | 		free();
 37 | 		allocate(size);
 38 | 	}
 39 | 
 40 | 	// get the size of the array
 41 | 	size_t getSize() const
 42 | 	{
 43 | 		return end_ - start_;
 44 | 	}
 45 | 
 46 | 	// get data
 47 | 	const T* getData() const
 48 | 	{
 49 | 		return start_;
 50 | 	}
 51 | 
 52 | 	T* getData()
 53 | 	{
 54 | 		return start_;
 55 | 	}
 56 | 
 57 | 	// set
 58 | 	void set(const T* src, size_t size)
 59 | 	{
 60 | 		size_t min = std::min(size, getSize());
 61 | 		cudaError_t result = cudaMemcpy(start_, src, min * sizeof(T), cudaMemcpyHostToDevice);
 62 | 		if (result != cudaSuccess)
 63 | 		{
 64 | 			throw std::runtime_error("failed to copy to device memory");
 65 | 		}
 66 | 	}
 67 | 	// get
 68 | 	void get(T* dest, size_t size)
 69 | 	{
 70 | 		size_t min = std::min(size, getSize());
 71 | 		cudaError_t result = cudaMemcpy(dest, start_, min * sizeof(T), cudaMemcpyDeviceToHost);
 72 | 		if (result != cudaSuccess)
 73 | 		{
 74 | 			throw std::runtime_error("failed to copy to host memory");
 75 | 		}
 76 | 	}
 77 | 
 78 | 
 79 | 	// private functions
 80 | private:
 81 | 	// allocate memory on the device
 82 | 	void allocate(size_t size)
 83 | 	{
 84 | 		cudaError_t result = cudaMalloc((void**)&start_, size * sizeof(T));
 85 | 		if (result != cudaSuccess)
 86 | 		{
 87 | 			start_ = end_ = 0;
 88 | 			throw std::runtime_error("failed to allocate device memory");
 89 | 		}
 90 | 		end_ = start_ + size;
 91 | 	}
 92 | 
 93 | 	// free memory on the device
 94 | 	void free()
 95 | 	{
 96 | 		if (start_ != 0)
 97 | 		{
 98 | 			cudaFree(start_);
 99 | 			start_ = end_ = 0;
100 | 		}
101 | 	}
102 | 
103 | 	T* start_;
104 | 	T* end_;
105 | };
106 | 
107 | #endif


--------------------------------------------------------------------------------
/three-asset ELS/code/dev_array.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DEV_ARRAY_H_
  2 | #define _DEV_ARRAY_H_
  3 | 
  4 | #include <stdexcept>
  5 | #include <algorithm>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | // This header is referred at follow link.
  9 | // https://www.quantstart.com/articles/Monte-Carlo-Simulations-In-CUDA-Barrier-Option-Pricing
 10 | // Easy form for CUDA syntax
 11 | 
 12 | template <typename T>
 13 | class dev_array
 14 | {
 15 | 	// public functions
 16 | public:
 17 | 	explicit dev_array()
 18 | 		: start_(0),
 19 | 		end_(0)
 20 | 	{}
 21 | 
 22 | 	// constructor
 23 | 	explicit dev_array(size_t size)
 24 | 	{
 25 | 		allocate(size);
 26 | 	}
 27 | 	// destructor
 28 | 	~dev_array()
 29 | 	{
 30 | 		free();
 31 | 	}
 32 | 
 33 | 	// resize the vector
 34 | 	void resize(size_t size)
 35 | 	{
 36 | 		free();
 37 | 		allocate(size);
 38 | 	}
 39 | 
 40 | 	// get the size of the array
 41 | 	size_t getSize() const
 42 | 	{
 43 | 		return end_ - start_;
 44 | 	}
 45 | 
 46 | 	// get data
 47 | 	const T* getData() const
 48 | 	{
 49 | 		return start_;
 50 | 	}
 51 | 
 52 | 	T* getData()
 53 | 	{
 54 | 		return start_;
 55 | 	}
 56 | 
 57 | 	// set
 58 | 	void set(const T* src, size_t size)
 59 | 	{
 60 | 		size_t min = std::min(size, getSize());
 61 | 		cudaError_t result = cudaMemcpy(start_, src, min * sizeof(T), cudaMemcpyHostToDevice);
 62 | 		if (result != cudaSuccess)
 63 | 		{
 64 | 			throw std::runtime_error("failed to copy to device memory");
 65 | 		}
 66 | 	}
 67 | 	// get
 68 | 	void get(T* dest, size_t size)
 69 | 	{
 70 | 		size_t min = std::min(size, getSize());
 71 | 		cudaError_t result = cudaMemcpy(dest, start_, min * sizeof(T), cudaMemcpyDeviceToHost);
 72 | 		if (result != cudaSuccess)
 73 | 		{
 74 | 			throw std::runtime_error("failed to copy to host memory");
 75 | 		}
 76 | 	}
 77 | 
 78 | 
 79 | 	// private functions
 80 | private:
 81 | 	// allocate memory on the device
 82 | 	void allocate(size_t size)
 83 | 	{
 84 | 		cudaError_t result = cudaMalloc((void**)&start_, size * sizeof(T));
 85 | 		if (result != cudaSuccess)
 86 | 		{
 87 | 			start_ = end_ = 0;
 88 | 			throw std::runtime_error("failed to allocate device memory");
 89 | 		}
 90 | 		end_ = start_ + size;
 91 | 	}
 92 | 
 93 | 	// free memory on the device
 94 | 	void free()
 95 | 	{
 96 | 		if (start_ != 0)
 97 | 		{
 98 | 			cudaFree(start_);
 99 | 			start_ = end_ = 0;
100 | 		}
101 | 	}
102 | 
103 | 	T* start_;
104 | 	T* end_;
105 | };
106 | 
107 | #endif


--------------------------------------------------------------------------------
/two-asset ELS/code/dev_array.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DEV_ARRAY_H_
  2 | #define _DEV_ARRAY_H_
  3 | 
  4 | #include <stdexcept>
  5 | #include <algorithm>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | // This header is referred at follow link.
  9 | // https://www.quantstart.com/articles/Monte-Carlo-Simulations-In-CUDA-Barrier-Option-Pricing
 10 | // Easy form for CUDA syntax
 11 | 
 12 | template <typename T>
 13 | class dev_array
 14 | {
 15 | 	// public functions
 16 | public:
 17 | 	explicit dev_array()
 18 | 		: start_(0),
 19 | 		end_(0)
 20 | 	{}
 21 | 
 22 | 	// constructor
 23 | 	explicit dev_array(size_t size)
 24 | 	{
 25 | 		allocate(size);
 26 | 	}
 27 | 	// destructor
 28 | 	~dev_array()
 29 | 	{
 30 | 		free();
 31 | 	}
 32 | 
 33 | 	// resize the vector
 34 | 	void resize(size_t size)
 35 | 	{
 36 | 		free();
 37 | 		allocate(size);
 38 | 	}
 39 | 
 40 | 	// get the size of the array
 41 | 	size_t getSize() const
 42 | 	{
 43 | 		return end_ - start_;
 44 | 	}
 45 | 
 46 | 	// get data
 47 | 	const T* getData() const
 48 | 	{
 49 | 		return start_;
 50 | 	}
 51 | 
 52 | 	T* getData()
 53 | 	{
 54 | 		return start_;
 55 | 	}
 56 | 
 57 | 	// set
 58 | 	void set(const T* src, size_t size)
 59 | 	{
 60 | 		size_t min = std::min(size, getSize());
 61 | 		cudaError_t result = cudaMemcpy(start_, src, min * sizeof(T), cudaMemcpyHostToDevice);
 62 | 		if (result != cudaSuccess)
 63 | 		{
 64 | 			throw std::runtime_error("failed to copy to device memory");
 65 | 		}
 66 | 	}
 67 | 	// get
 68 | 	void get(T* dest, size_t size)
 69 | 	{
 70 | 		size_t min = std::min(size, getSize());
 71 | 		cudaError_t result = cudaMemcpy(dest, start_, min * sizeof(T), cudaMemcpyDeviceToHost);
 72 | 		if (result != cudaSuccess)
 73 | 		{
 74 | 			throw std::runtime_error("failed to copy to host memory");
 75 | 		}
 76 | 	}
 77 | 
 78 | 
 79 | 	// private functions
 80 | private:
 81 | 	// allocate memory on the device
 82 | 	void allocate(size_t size)
 83 | 	{
 84 | 		cudaError_t result = cudaMalloc((void**)&start_, size * sizeof(T));
 85 | 		if (result != cudaSuccess)
 86 | 		{
 87 | 			start_ = end_ = 0;
 88 | 			throw std::runtime_error("failed to allocate device memory");
 89 | 		}
 90 | 		end_ = start_ + size;
 91 | 	}
 92 | 
 93 | 	// free memory on the device
 94 | 	void free()
 95 | 	{
 96 | 		if (start_ != 0)
 97 | 		{
 98 | 			cudaFree(start_);
 99 | 			start_ = end_ = 0;
100 | 		}
101 | 	}
102 | 
103 | 	T* start_;
104 | 	T* end_;
105 | };
106 | 
107 | #endif


--------------------------------------------------------------------------------
/vanilla_call/code/dev_array.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DEV_ARRAY_H_
  2 | #define _DEV_ARRAY_H_
  3 | 
  4 | #include <stdexcept>
  5 | #include <algorithm>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | // This header is referred at follow link.
  9 | // https://www.quantstart.com/articles/Monte-Carlo-Simulations-In-CUDA-Barrier-Option-Pricing
 10 | // Easy form for CUDA syntax
 11 | 
 12 | template <typename T>
 13 | class dev_array
 14 | {
 15 | 	// public functions
 16 | public:
 17 | 	explicit dev_array()
 18 | 		: start_(0),
 19 | 		end_(0)
 20 | 	{}
 21 | 
 22 | 	// constructor
 23 | 	explicit dev_array(size_t size)
 24 | 	{
 25 | 		allocate(size);
 26 | 	} 
 27 | 	// destructor
 28 | 	~dev_array()
 29 | 	{
 30 | 		free();
 31 | 	}
 32 | 
 33 | 	// resize the vector
 34 | 	void resize(size_t size)
 35 | 	{
 36 | 		free();
 37 | 		allocate(size);
 38 | 	}
 39 | 
 40 | 	// get the size of the array
 41 | 	size_t getSize() const
 42 | 	{
 43 | 		return end_ - start_;
 44 | 	}
 45 | 
 46 | 	// get data
 47 | 	const T* getData() const
 48 | 	{
 49 | 		return start_;
 50 | 	}
 51 | 
 52 | 	T* getData()
 53 | 	{
 54 | 		return start_;
 55 | 	}
 56 | 
 57 | 	// set
 58 | 	void set(const T* src, size_t size)
 59 | 	{
 60 | 		size_t min = std::min(size, getSize());
 61 | 		cudaError_t result = cudaMemcpy(start_, src, min * sizeof(T), cudaMemcpyHostToDevice);
 62 | 		if (result != cudaSuccess)
 63 | 		{
 64 | 			throw std::runtime_error("failed to copy to device memory");
 65 | 		}
 66 | 	}
 67 | 	// get
 68 | 	void get(T* dest, size_t size)
 69 | 	{
 70 | 		size_t min = std::min(size, getSize());
 71 | 		cudaError_t result = cudaMemcpy(dest, start_, min * sizeof(T), cudaMemcpyDeviceToHost);
 72 | 		if (result != cudaSuccess)
 73 | 		{
 74 | 			throw std::runtime_error("failed to copy to host memory");
 75 | 		}
 76 | 	}
 77 | 
 78 | 
 79 | 	// private functions
 80 | private:
 81 | 	// allocate memory on the device
 82 | 	void allocate(size_t size)
 83 | 	{
 84 | 		cudaError_t result = cudaMalloc((void**)&start_, size * sizeof(T));
 85 | 		if (result != cudaSuccess)
 86 | 		{
 87 | 			start_ = end_ = 0;
 88 | 			throw std::runtime_error("failed to allocate device memory");
 89 | 		}
 90 | 		end_ = start_ + size;
 91 | 	}
 92 | 
 93 | 	// free memory on the device
 94 | 	void free()
 95 | 	{
 96 | 		if (start_ != 0)
 97 | 		{
 98 | 			cudaFree(start_);
 99 | 			start_ = end_ = 0;
100 | 		}
101 | 	}
102 | 
103 | 	T* start_;
104 | 	T* end_;
105 | };
106 | 
107 | #endif


--------------------------------------------------------------------------------
/three-asset ELS/code/dev_matrix.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DEV_MATRIX_H_
  2 | #define _DEV_MATRIX_H_
  3 | 
  4 | #include <stdexcept>
  5 | #include <algorithm>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | // This header is referred at follow link.
  9 | // https://www.quantstart.com/articles/Monte-Carlo-Simulations-In-CUDA-Barrier-Option-Pricing
 10 | // modified by Minhyun Yoo
 11 | // Easy form for CUDA syntax
 12 | 
 13 | template <typename T>
 14 | class dev_matrix
 15 | {
 16 | 	// public functions
 17 | public:
 18 | 	explicit dev_matrix()
 19 | 		: start_(0),
 20 | 		end_(0)
 21 | 	{}
 22 | 
 23 | 	// constructor
 24 | 	explicit dev_matrix(size_t row, size_t col)
 25 | 	{
 26 | 		allocate(row*col);
 27 | 	}
 28 | 	// destructor
 29 | 	~dev_matrix()
 30 | 	{
 31 | 		free();
 32 | 	}
 33 | 
 34 | 	// resize the vector
 35 | 	void resize(size_t row, size_t col)
 36 | 	{
 37 | 		free();
 38 | 		allocate(row*col);
 39 | 	}
 40 | 
 41 | 	// get the size of the array
 42 | 	size_t getSize() const
 43 | 	{
 44 | 		return end_ - start_;
 45 | 	}
 46 | 
 47 | 	// get data
 48 | 	const T* getData() const
 49 | 	{
 50 | 		return start_;
 51 | 	}
 52 | 
 53 | 	T* getData()
 54 | 	{
 55 | 		return start_;
 56 | 	}
 57 | 
 58 | 	// set
 59 | 	void set(const T* src, size_t row, size_t col)
 60 | 	{
 61 | 		size_t min = std::min(row*col, getSize());
 62 | 		cudaError_t result = cudaMemcpy(start_, src, min * sizeof(T), cudaMemcpyHostToDevice);
 63 | 		if (result != cudaSuccess)
 64 | 		{
 65 | 			throw std::runtime_error("failed to copy to device memory");
 66 | 		}
 67 | 	}
 68 | 	// get
 69 | 	void get(T* dest, size_t row, size_t col)
 70 | 	{
 71 | 		size_t min = std::min(row*col, getSize());
 72 | 		cudaError_t result = cudaMemcpy(dest, start_, min * sizeof(T), cudaMemcpyDeviceToHost);
 73 | 		if (result != cudaSuccess)
 74 | 		{
 75 | 			throw std::runtime_error("failed to copy to host memory");
 76 | 		}
 77 | 	}
 78 | 
 79 | 
 80 | 	// private functions
 81 | private:
 82 | 	// allocate memory on the device
 83 | 	void allocate(size_t size)
 84 | 	{
 85 | 		cudaError_t result = cudaMalloc((void**)&start_, size * sizeof(T));
 86 | 		if (result != cudaSuccess)
 87 | 		{
 88 | 			start_ = end_ = 0;
 89 | 			throw std::runtime_error("failed to allocate device memory");
 90 | 		}
 91 | 		end_ = start_ + size;
 92 | 	}
 93 | 
 94 | 	// free memory on the device
 95 | 	void free()
 96 | 	{
 97 | 		if (start_ != 0)
 98 | 		{
 99 | 			cudaFree(start_);
100 | 			start_ = end_ = 0;
101 | 		}
102 | 	}
103 | 
104 | 	T* start_;
105 | 	T* end_;
106 | };
107 | 
108 | #endif


--------------------------------------------------------------------------------
/two-asset ELS/code/dev_matrix.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DEV_MATRIX_H_
  2 | #define _DEV_MATRIX_H_
  3 | 
  4 | #include <stdexcept>
  5 | #include <algorithm>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | // This header is referred at follow link.
  9 | // https://www.quantstart.com/articles/Monte-Carlo-Simulations-In-CUDA-Barrier-Option-Pricing
 10 | // modified by Minhyun Yoo
 11 | // Easy form for CUDA syntax
 12 | 
 13 | template <typename T>
 14 | class dev_matrix
 15 | {
 16 | 	// public functions
 17 | public:
 18 | 	explicit dev_matrix()
 19 | 		: start_(0),
 20 | 		end_(0)
 21 | 	{}
 22 | 
 23 | 	// constructor
 24 | 	explicit dev_matrix(size_t row, size_t col)
 25 | 	{
 26 | 		allocate(row*col);
 27 | 	}
 28 | 	// destructor
 29 | 	~dev_matrix()
 30 | 	{
 31 | 		free();
 32 | 	}
 33 | 
 34 | 	// resize the vector
 35 | 	void resize(size_t row, size_t col)
 36 | 	{
 37 | 		free();
 38 | 		allocate(row*col);
 39 | 	}
 40 | 
 41 | 	// get the size of the array
 42 | 	size_t getSize() const
 43 | 	{
 44 | 		return end_ - start_;
 45 | 	}
 46 | 
 47 | 	// get data
 48 | 	const T* getData() const
 49 | 	{
 50 | 		return start_;
 51 | 	}
 52 | 
 53 | 	T* getData()
 54 | 	{
 55 | 		return start_;
 56 | 	}
 57 | 
 58 | 	// set
 59 | 	void set(const T* src, size_t row, size_t col)
 60 | 	{
 61 | 		size_t min = std::min(row*col, getSize());
 62 | 		cudaError_t result = cudaMemcpy(start_, src, min * sizeof(T), cudaMemcpyHostToDevice);
 63 | 		if (result != cudaSuccess)
 64 | 		{
 65 | 			throw std::runtime_error("failed to copy to device memory");
 66 | 		}
 67 | 	}
 68 | 	// get
 69 | 	void get(T* dest, size_t row, size_t col)
 70 | 	{
 71 | 		size_t min = std::min(row*col, getSize());
 72 | 		cudaError_t result = cudaMemcpy(dest, start_, min * sizeof(T), cudaMemcpyDeviceToHost);
 73 | 		if (result != cudaSuccess)
 74 | 		{
 75 | 			throw std::runtime_error("failed to copy to host memory");
 76 | 		}
 77 | 	}
 78 | 
 79 | 
 80 | 	// private functions
 81 | private:
 82 | 	// allocate memory on the device
 83 | 	void allocate(size_t size)
 84 | 	{
 85 | 		cudaError_t result = cudaMalloc((void**)&start_, size * sizeof(T));
 86 | 		if (result != cudaSuccess)
 87 | 		{
 88 | 			start_ = end_ = 0;
 89 | 			throw std::runtime_error("failed to allocate device memory");
 90 | 		}
 91 | 		end_ = start_ + size;
 92 | 	}
 93 | 
 94 | 	// free memory on the device
 95 | 	void free()
 96 | 	{
 97 | 		if (start_ != 0)
 98 | 		{
 99 | 			cudaFree(start_);
100 | 			start_ = end_ = 0;
101 | 		}
102 | 	}
103 | 
104 | 	T* start_;
105 | 	T* end_;
106 | };
107 | 
108 | #endif


--------------------------------------------------------------------------------
/one-asset ELS/code/kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "kernel.h"
 2 | 
 3 | __global__ void KiELS1_kernel(
 4 | 	optionData data,
 5 | 	double * d_s,
 6 | 	double * stk, 
 7 | 	double * payment, 
 8 | 	double * date,
 9 | 	double * d_normals,
10 | 	unsigned N_STEPS,
11 | 	unsigned N_SIMULS
12 | 	)
13 | {
14 | 	int s_idx = threadIdx.x + blockIdx.x * blockDim.x; // thread index
15 | 	int n_idx = (s_idx)* N_STEPS; // for random number indexing
16 | 
17 | 	if (s_idx < N_SIMULS) {
18 | 		// Initialize
19 | 		double s_curr = data.S0;
20 | 		double s_ref = data.S0_ref;
21 | 		double sigma = data.sigma;
22 | 		double r = data.r;
23 | 		double dt = data.dt;
24 | 		double sqrdt = data.sqrdt;
25 | 		double B = data.B;
26 | 		double dummy = data.dummy;
27 | 		double s_curr_cal = -1.0;
28 | 
29 | 		double drift = (r - (sigma*sigma)*0.5)*dt;
30 | 		double sigsqdt = sigma*sqrdt;
31 | 
32 | 		double payoff = 0.0;
33 | 		unsigned int cnt1 = 0;
34 | 		unsigned int cnt2 = 0;
35 | 		double idx[length] = { 0 };
36 | 		int n = 0;
37 | 
38 | 		bool tag = 0;
39 | 		bool kievent = 0;
40 | 		s_curr_cal = s_curr / s_ref;
41 | 		do {
42 | 			// Geometric Brownian motion
43 | 			s_curr_cal = s_curr_cal * exp(drift + sigsqdt*d_normals[n_idx]);
44 | 
45 | 			// cheeck knock-in event
46 | 			kievent = (s_curr_cal < B) ? 1 : kievent;
47 | 
48 | 			// save underlying price at observation dates
49 | 			if ((n+1) == date[cnt1]) {
50 | 				idx[cnt1] = s_curr_cal;
51 | 				cnt1++;
52 | 			}
53 | 
54 | 			n_idx++; // random number index
55 | 			n++; // time stepping
56 | 		} while (n < N_STEPS);
57 | 
58 | 		// check observation dates (early redemption)
59 | 		for (int i = 0; i < length; i++) {
60 | 			if (idx[i] >= stk[i]) {
61 | 				payoff = payment[i];
62 | 				tag = 1;
63 | 				cnt2 = i;
64 | 				break;
65 | 			}
66 | 		}
67 | 		if (tag == 0) {
68 | 			// payoff using ternary operator
69 | 			payoff = 10000 * s_curr_cal;	
70 | 			payoff = (kievent == 0) ? ((s_curr_cal >= B) ? 10000 * (1 + dummy) : payoff) : payoff;
71 | 			cnt2 = length - 1;
72 | 		}
73 | 		// payoff using ternary operator (calendar convention : 360days)
74 | 		payoff = payoff * exp(-data.discr * date[cnt2] / 360.0);
75 | 
76 | 		// to save results, sycronize threads
77 | 		__syncthreads();
78 | 
79 | 		// save payoff
80 | 		d_s[s_idx] = payoff;
81 | 	}
82 | }
83 | 
84 | void KiELS1(
85 | 	optionData option,
86 | 	double * d_s,
87 | 	double * stk,
88 | 	double * payment,
89 | 	double * date,
90 | 	double * d_normals,
91 | 	unsigned N_STEPS,
92 | 	unsigned N_SIMULS
93 | 	) {
94 | 		const unsigned BLOCK_SIZE = 1024; // # of threads in a block (1-dimension threads & block)
95 | 		const unsigned GRID_SIZE = CEIL(N_SIMULS, BLOCK_SIZE); // # of block in a grid
96 | 		KiELS1_kernel << <GRID_SIZE, BLOCK_SIZE >> >(
97 | 			option, d_s, stk, payment, date, d_normals, N_STEPS, N_SIMULS);
98 | }


--------------------------------------------------------------------------------
/vanilla_call/code/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <vector>
  3 | #include <time.h>
  4 | #include <math.h>
  5 | #include <iostream>
  6 | #include <time.h>
  7 | #include <cuda_runtime.h>
  8 | #include "kernel.h"
  9 | #include "dev_array.h"
 10 | #include <curand.h>
 11 | 
 12 | using namespace std;
 13 | 
 14 | int main() {
 15 | 	try {
 16 | 		// declare variables and constants		
 17 | 		const size_t N_SIMULS = 10000000;
 18 | 		const size_t N_STEPS = 1;
 19 | 		
 20 | 		const double S0_1 = 100;
 21 | 		const double T = 1.0;
 22 | 		const double K = 100;
 23 | 		const double sig1 = 0.3;
 24 | 		const double r = 0.03;		
 25 | 
 26 | 		// make variables
 27 | 		const size_t N_NORMALS = N_SIMULS*N_STEPS;
 28 | 		double dt = double(T) / double(N_STEPS);
 29 | 		double sqrdt = sqrt(dt);
 30 | 		///////////////////////////////////////////////	
 31 | 		
 32 | 		// generate blank arrays
 33 | 		vector<double> s(N_SIMULS);
 34 | 		dev_array<double> d_s(N_SIMULS);
 35 | 		dev_array<double> d_normals(N_NORMALS);
 36 | 
 37 | 		// For calculating many derivatives
 38 | 		optionData o1(S0_1, r, T, sig1, dt, sqrdt, K);
 39 | 		
 40 | 		// generate random numbers (host API)
 41 | 		curandGenerator_t curandGenerator;
 42 | 		curandCreateGenerator(&curandGenerator, CURAND_RNG_PSEUDO_MTGP32);
 43 | 		curandSetPseudoRandomGeneratorSeed(curandGenerator, time(NULL));
 44 | 		curandGenerateNormalDouble(curandGenerator, d_normals.getData(), N_NORMALS, 0.0, 1.0);
 45 | 
 46 | 		// GPU start 
 47 | 		double t1 = double(clock()) / CLOCKS_PER_SEC;	
 48 | 
 49 | 		// call the kernel
 50 | 		Vanilla_Call_single(o1, d_s.getData(), d_normals.getData(), N_STEPS, N_SIMULS);
 51 | 
 52 | 		cudaDeviceSynchronize();	
 53 | 
 54 | 		// copy results from device to host
 55 | 		d_s.get(&s[0], N_SIMULS);
 56 | 
 57 | 		// compute the payoff average
 58 | 		double gpu_sum = 0.0;
 59 | 		for (size_t i = 0; i<N_SIMULS; i++) {
 60 | 			gpu_sum += s[i];
 61 | 		}
 62 | 		gpu_sum /= N_SIMULS;
 63 | 		gpu_sum *= exp(-r*T);
 64 | 		double t2 = double(clock()) / CLOCKS_PER_SEC;
 65 | 
 66 | 		// CPU start
 67 | 		vector<double> normals(N_NORMALS);
 68 | 
 69 | 		// Get random number from device to host
 70 | 		d_normals.get(&normals[0], N_NORMALS);
 71 | 
 72 | 		double cpu_sum = 0.0;
 73 | 		double s_curr = 0.0;
 74 | 		double payoff = 0.0;
 75 | 		int n_idx = 0;
 76 | 		int n = 0;
 77 | 		for (size_t i = 0; i < N_SIMULS; i++) {
 78 | 			n_idx = i*N_STEPS;
 79 | 
 80 | 			s_curr = S0_1;
 81 | 
 82 | 			n = 0;
 83 | 
 84 | 			do {
 85 | 				s_curr = s_curr * exp((r - (sig1*sig1)*0.5)*dt + sig1*sqrdt*normals[n_idx]);
 86 | 				n_idx++;
 87 | 				n++;
 88 | 			} while (n < N_STEPS);
 89 | 
 90 | 			payoff = (s_curr > K ? s_curr - K : 0.0);
 91 | 			cpu_sum += exp(-r*T) * payoff;
 92 | 		}
 93 | 
 94 | 		cpu_sum /= N_SIMULS;
 95 | 
 96 | 		double t3 = double(clock()) / CLOCKS_PER_SEC;
 97 | 
 98 | 		cout << "****************** INFO ******************\n";
 99 | 		cout << "S0 : ";
100 | 		cout << S0_1 << endl;
101 | 		cout << "Strike : ";
102 | 		cout << K << endl;
103 | 		cout << "Maturity : ";
104 | 		cout << T << " year(s)" << endl;
105 | 		cout << "Volatility : ";
106 | 		cout << sig1 << endl;
107 | 		cout << "Risk-free Interest Rate : ";
108 | 		cout << r << endl;
109 | 		cout << "Number of Simulations: " << N_SIMULS << "\n";
110 | 		cout << "Number of Steps: " << N_STEPS << "\n";
111 | 
112 | 		cout << "****************** PRICE ******************\n";
113 | 		cout << "Option Price (GPU): " << gpu_sum << "\n";
114 | 		cout << "Option Price (CPU): " << cpu_sum << "\n";
115 | 		cout << "******************* TIME *****************\n";
116 | 		cout << "GPU Monte Carlo Computation: " << (t2 - t1)*1e3 << " ms\n";
117 | 		cout << "CPU Monte Carlo Computation: " << (t3 - t2)*1e3 << " ms\n";
118 | 		cout << "******************* END *****************\n";
119 | 
120 | 		// destroy random number generator
121 | 		curandDestroyGenerator(curandGenerator);
122 | 		}	
123 | 	catch (exception& e) {
124 | 		cout << "exception: " << e.what() << "\n";
125 | 	}
126 | }


--------------------------------------------------------------------------------
/up_out_call/code/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <vector>
  3 | #include <time.h>
  4 | #include <math.h>
  5 | #include <iostream>
  6 | #include <time.h>
  7 | #include <cuda_runtime.h>
  8 | #include "kernel.h"
  9 | #include "dev_array.h"
 10 | #include <curand.h>
 11 | 
 12 | using namespace std;
 13 | 
 14 | int main() {
 15 | 	try {
 16 | 		// declare variables and constants		
 17 | 		const size_t N_SIMULS = 100000;
 18 | 		const size_t N_SIMULS = 360;
 19 | 		
 20 | 		const double S0_1 = 100;
 21 | 		const double T = 1.0;
 22 | 		const double K = 100;
 23 | 		const double sig1 = 0.3;
 24 | 		const double r = 0.03;
 25 | 		const double B = 130;
 26 | 
 27 | 		// make variables
 28 | 		const size_t N_NORMALS = N_SIMULS*N_SIMULS;
 29 | 		double dt = double(T) / double(N_SIMULS);
 30 | 		double sqrdt = sqrt(dt);
 31 | 		///////////////////////////////////////////////	
 32 | 		
 33 | 		// generate blank arrays
 34 | 		vector<double> s(N_SIMULS);
 35 | 		dev_array<double> d_s(N_SIMULS);
 36 | 		dev_array<double> d_normals(N_NORMALS);
 37 | 
 38 | 		// For calculating many derivatives
 39 | 		optionData o1(S0_1, r, T, sig1, dt, sqrdt, K, B);
 40 | 		
 41 | 		// generate random numbers (host API)
 42 | 		curandGenerator_t curandGenerator;
 43 | 		curandCreateGenerator(&curandGenerator, CURAND_RNG_PSEUDO_MTGP32);
 44 | 		curandSetPseudoRandomGeneratorSeed(curandGenerator, time(NULL));
 45 | 		curandGenerateNormalDouble(curandGenerator, d_normals.getData(), N_NORMALS, 0.0, 1.0);
 46 | 
 47 | 		// GPU start 
 48 | 		double t1 = double(clock()) / CLOCKS_PER_SEC;	
 49 | 
 50 | 		// call the kernel
 51 | 		up_out_barrier_single(o1, d_s.getData(), d_normals.getData(), N_SIMULS, N_SIMULS);
 52 | 
 53 | 		cudaDeviceSynchronize();	
 54 | 
 55 | 		// copy results from device to host
 56 | 		d_s.get(&s[0], N_SIMULS);
 57 | 
 58 | 		// compute the payoff average
 59 | 		double gpu_sum = 0.0;
 60 | 		for (size_t i = 0; i<N_SIMULS; i++) {
 61 | 			gpu_sum += s[i];
 62 | 		}
 63 | 		gpu_sum /= N_SIMULS;
 64 | 		gpu_sum *= exp(-r*T);
 65 | 		double t2 = double(clock()) / CLOCKS_PER_SEC;
 66 | 
 67 | 		// CPU start
 68 | 		vector<double> normals(N_NORMALS);
 69 | 
 70 | 		// Get random number from device to host
 71 | 		d_normals.get(&normals[0], N_NORMALS);
 72 | 
 73 | 		double cpu_sum = 0.0;
 74 | 		double s_curr = 0.0;
 75 | 		double payoff = 0.0;
 76 | 		bool tag = 0;
 77 | 		int n_idx = 0;
 78 | 		int n = 0;
 79 | 		for (size_t i = 0; i < N_SIMULS; i++) {
 80 | 			n_idx = i*N_SIMULS;
 81 | 
 82 | 			s_curr = S0_1;
 83 | 			tag = 0;
 84 | 			n = 0;
 85 | 
 86 | 			do {
 87 | 				s_curr = s_curr * exp((r - (sig1*sig1)*0.5)*dt + sig1*sqrdt*normals[n_idx]);
 88 | 
 89 | 				tag = (s_curr > B) ? 1 : tag; // check knock-out
 90 | 
 91 | 				n_idx++;
 92 | 				n++;
 93 | 			} while (n < N_SIMULS);
 94 | 
 95 | 			payoff = tag ? 0 : ((s_curr > K) ? (s_curr - K) : 0);
 96 | 			cpu_sum += exp(-r*T) * payoff;
 97 | 		}
 98 | 
 99 | 		cpu_sum /= N_SIMULS;
100 | 
101 | 		double t3 = double(clock()) / CLOCKS_PER_SEC;
102 | 
103 | 		cout << "************ KNOCK OUT (UP & OUT) CALL INFO ************\n";
104 | 		cout << "S0 : ";
105 | 		cout << S0_1 << endl;
106 | 		cout << "Strike : ";
107 | 		cout << K << endl;
108 | 		cout << "Barrier : ";
109 | 		cout << B << endl;
110 | 		cout << "Maturity : ";
111 | 		cout << T << " year(s)" << endl;
112 | 		cout << "Volatility : ";
113 | 		cout << sig1 << endl;
114 | 		cout << "Risk-free Interest Rate : ";
115 | 		cout << r << endl;
116 | 		cout << "Number of Simulations: " << N_SIMULS << "\n";
117 | 		cout << "Number of Steps: " << N_SIMULS << "\n";
118 | 
119 | 		cout << "****************** PRICE ******************\n";
120 | 		cout << "Option Price (GPU): " << gpu_sum << "\n";
121 | 		cout << "Option Price (CPU): " << cpu_sum << "\n";
122 | 		cout << "******************* TIME *****************\n";
123 | 		cout << "GPU Monte Carlo Computation: " << (t2 - t1)*1e3 << " ms\n";
124 | 		cout << "CPU Monte Carlo Computation: " << (t3 - t2)*1e3 << " ms\n";
125 | 		cout << "******************* END *****************\n";
126 | 
127 | 		// destroy random number generator
128 | 		curandDestroyGenerator(curandGenerator);
129 | 		}	
130 | 	catch (exception& e) {
131 | 		cout << "exception: " << e.what() << "\n";
132 | 	}
133 | }


--------------------------------------------------------------------------------
/two-asset ELS/code/kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "kernel.h"
  2 | //#include <stdio.h>
  3 | #include <curand.h>
  4 | #include <time.h>
  5 | #include <cublas_v2.h>
  6 | 
  7 | __global__ void KiELS2_kernel(
  8 | 	optionData data1,
  9 | 	optionData data2,
 10 | 	double * d_s,
 11 | 	double * stk, 
 12 | 	double * payment, 
 13 | 	double * date,
 14 | 	double * d_normals,
 15 | 	unsigned N_STEPS,
 16 | 	unsigned N_SIMULS)
 17 | {
 18 | 	int s_idx = threadIdx.x + blockIdx.x * blockDim.x; // thread index
 19 | 	int n_idx = (s_idx) * N_STEPS; // for random number indexing	
 20 | 
 21 | 	if (s_idx < N_SIMULS) {
 22 | 		int n = 0;
 23 | 
 24 | 		double s_curr1 = data1.S0, sigma1 = data1.sigma, r1 = data1.r, dt1 = data1.dt, sqrdt1 = data1.sqrdt, B1 = data1.B, dummy1 = data1.dummy;
 25 | 		double s_curr2 = data2.S0, sigma2 = data2.sigma, r2 = data2.r, dt2 = data2.dt, sqrdt2 = data2.sqrdt, B2 = data2.B, dummy2 = data2.dummy;
 26 | 		double ref_s1 = data1.S0_ref, ref_s2 = data2.S0_ref;
 27 | 
 28 | 		double s_curr_cal1 = -1.0, s_curr_cal2 = -1.0;
 29 | 		double s_curr_min = -1.0;
 30 | 		double payoff = 0.0;
 31 | 		unsigned int cnt1 = 0;
 32 | 		unsigned int cnt2 = 0;
 33 | 		double idx[length] = { 0 };
 34 | 
 35 | 		double drift1 = (r1 - (sigma1*sigma1)*0.5)*dt1, sigsqdt1 = sigma1*sqrdt1;
 36 | 		double drift2 = (r2 - (sigma2*sigma2)*0.5)*dt2, sigsqdt2 = sigma2*sqrdt2;
 37 | 
 38 | 		bool tag = 0;
 39 | 		bool kievent = 0;
 40 | 		s_curr_cal1 = s_curr1 / ref_s1;
 41 | 		s_curr_cal2 = s_curr2 / ref_s2;
 42 | 		do {
 43 | 			// Geometric Brownian motion
 44 | 			s_curr_cal1 = s_curr_cal1 * exp(drift1 + sigsqdt1*d_normals[n_idx]);
 45 | 			s_curr_cal2 = s_curr_cal2 * exp(drift2 + sigsqdt2*d_normals[N_STEPS*N_SIMULS + n_idx]);
 46 | 
 47 | 			// worst performer
 48 | 			s_curr_min = s_curr_cal1 < s_curr_cal2 ? s_curr_cal1 : s_curr_cal2;
 49 | 			
 50 | 			// cheeck knock-in event
 51 | 			kievent = (s_curr_min < B1) ? 1 : kievent;
 52 | 
 53 | 			// save underlying price at observation dates
 54 | 			if ((n+1) == date[cnt1]) {
 55 | 				idx[cnt1] = s_curr_min;
 56 | 				cnt1++;
 57 | 			}
 58 | 
 59 | 			n_idx++;  // random number index
 60 | 			n++;  // time stepping
 61 | 		} while (n < N_STEPS);
 62 | 
 63 | 		// check observation dates (early redemption)
 64 | 		for (int i = 0; i < length; i++) {
 65 | 			if (idx[i] >= stk[i]) {
 66 | 				payoff = payment[i];
 67 | 				tag = 1;
 68 | 				cnt2 = i;
 69 | 				break;
 70 | 			}
 71 | 		}
 72 | 		if (tag == 0) {
 73 | 			// payoff using ternary operator
 74 | 			payoff = 10000 * s_curr_min;
 75 | 			payoff = (kievent == 0) ? ((s_curr_min >= B1) ? 10000 * (1 + dummy1) : payoff) : payoff;
 76 | 			cnt2 = length - 1;
 77 | 		}
 78 | 
 79 | 		payoff = payoff*exp(-data1.discr * date[cnt2] / 360.0);
 80 | 
 81 | 		__syncthreads();
 82 | 
 83 | 		d_s[s_idx] = payoff;
 84 | 	}
 85 | }
 86 | 
 87 | void ELS2(
 88 | 	optionData option1,
 89 | 	optionData option2,
 90 | 	double * d_s,
 91 | 	double * stk,
 92 | 	double * payment,
 93 | 	double * date,
 94 | 	double * d_normals,
 95 | 	unsigned N_STEPS,
 96 | 	unsigned N_SIMULS) {
 97 | 	const unsigned BLOCK_SIZE = 1024; // # of threads in a block (1-dimension threads & block)
 98 | 	const unsigned GRID_SIZE = CEIL(N_SIMULS, BLOCK_SIZE); // # of block in a grid
 99 | 	KiELS2_kernel << <GRID_SIZE, BLOCK_SIZE >> >(
100 | 		option1, option2, d_s, stk, payment, date, d_normals, N_STEPS, N_SIMULS);
101 | }
102 | void dev_fillRand(double *A, size_t rows_A, size_t cols_A) 
103 | {
104 | 	// random number generation host API
105 | 	curandGenerator_t rnd;
106 | 	curandCreateGenerator(&rnd, CURAND_RNG_PSEUDO_MTGP32);
107 | 	curandSetPseudoRandomGeneratorSeed(rnd, (unsigned long long)time(NULL));
108 | 	curandGenerateNormalDouble(rnd, A, rows_A*cols_A, 0.0, 1.0);
109 | }
110 | 
111 | void dev_mmul(const double *A, const double *B, double *C, const int m, const int k, const int n) {
112 | 	// GPU matrix multiplication
113 |     int lda = m,ldb = k,ldc = m;
114 |     const double alf = 1;
115 |     const double bet = 0;
116 |     const double *alpha = &alf;
117 |     const double *beta = &bet;
118 | 
119 |     // Create a handle for CUBLAS
120 |     cublasHandle_t handle;
121 |     cublasCreate(&handle);
122 | 
123 |     // Do the actual multiplication
124 |     cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
125 | 
126 |     // Destroy the handle
127 |     cublasDestroy(handle);
128 | }


--------------------------------------------------------------------------------
/three-asset ELS/code/kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "kernel.h"
  2 | #include <curand.h>
  3 | #include <time.h>
  4 | #include <cublas_v2.h>
  5 | 
  6 | __global__ void KiELS3_kernel(
  7 | 	optionData data1,
  8 | 	optionData data2,
  9 | 	optionData data3,
 10 | 	double * d_s,
 11 | 	double * stk, 
 12 | 	double * payment, 
 13 | 	double * date,
 14 | 	double * d_normals,
 15 | 	unsigned N_STEPS,
 16 | 	unsigned N_SIMULS)
 17 | {
 18 | 	int s_idx = threadIdx.x + blockIdx.x * blockDim.x; // thread index
 19 | 	int n_idx = (s_idx) * N_STEPS; // for random number indexing	
 20 | 
 21 | 	if (s_idx < N_SIMULS) {
 22 | 		double s_curr1 = data1.S0; double sigma1 = data1.sigma; double r1 = data1.r; double dt1 = data1.dt; double sqrdt1 = data1.sqrdt; double B1 = data1.B; double dummy1 = data1.dummy;
 23 | 		double s_curr2 = data2.S0; double sigma2 = data2.sigma; double r2 = data2.r; double dt2 = data2.dt; double sqrdt2 = data2.sqrdt; double B2 = data2.B; double dummy2 = data2.dummy;
 24 | 		double s_curr3 = data3.S0; double sigma3 = data3.sigma; double r3 = data3.r; double dt3 = data3.dt; double sqrdt3 = data3.sqrdt; double B3 = data3.B; double dummy3 = data3.dummy;
 25 | 
 26 | 		double ref_s1 = data1.S0_ref; double ref_s2 = data2.S0_ref; double ref_s3 = data3.S0_ref;
 27 | 
 28 | 		double s_curr_cal1 = -1.0, s_curr_cal2 = -1.0, s_curr_cal3 = -1.0;
 29 | 		double s_curr_min = -1.0;
 30 | 		double payoff = 0.0;
 31 | 		unsigned int cnt1 = 0;
 32 | 		unsigned int cnt2 = 0;
 33 | 		double idx[length] = { 0 };
 34 | 
 35 | 		double drift1 = (r1 - (sigma1*sigma1)*0.5)*dt1, sigsqdt1 = sigma1*sqrdt1;
 36 | 		double drift2 = (r2 - (sigma2*sigma2)*0.5)*dt2, sigsqdt2 = sigma2*sqrdt2;
 37 | 		double drift3 = (r3 - (sigma3*sigma3)*0.5)*dt3, sigsqdt3 = sigma3*sqrdt3;
 38 | 
 39 | 		int n = 0;
 40 | 		bool tag = 0;
 41 | 		bool kievent = 0;
 42 | 
 43 | 		s_curr_cal1 = s_curr1 / ref_s1;
 44 | 		s_curr_cal2 = s_curr2 / ref_s2;
 45 | 		s_curr_cal3 = s_curr3 / ref_s3;
 46 | 
 47 | 		do {
 48 | 			// Geometric Brownian motion
 49 | 			s_curr_cal1 = s_curr_cal1 * exp(drift1 + sigsqdt1*d_normals[n_idx]);
 50 | 			s_curr_cal2 = s_curr_cal2 * exp(drift2 + sigsqdt2*d_normals[N_STEPS * N_SIMULS + n_idx]);
 51 | 			s_curr_cal3 = s_curr_cal3 * exp(drift3 + sigsqdt3*d_normals[2 * N_STEPS * N_SIMULS + n_idx]);
 52 | 			
 53 | 			// worst performer
 54 | 			s_curr_min = MIN_USERDEFINE(s_curr_cal1, s_curr_cal2);
 55 | 			s_curr_min = MIN_USERDEFINE(s_curr_min, s_curr_cal3);
 56 | 
 57 | 			// cheeck knock-in event
 58 | 			kievent = (s_curr_min < B1) ? 1 : kievent;
 59 | 
 60 | 			// save underlying price at observation dates
 61 | 			if ((n+1) == date[cnt1]) {
 62 | 				idx[cnt1] = s_curr_min;
 63 | 				cnt1++;
 64 | 			}
 65 | 
 66 | 			n_idx++;  // random number index
 67 | 			n++;  // time stepping
 68 | 		} while (n < N_STEPS);
 69 | 		
 70 | 		// check observation dates (early redemption)
 71 | 		for (int i = 0; i < length; i++) {
 72 | 			if (idx[i] >= stk[i]) {
 73 | 				payoff = payment[i];
 74 | 				tag = 1;
 75 | 				cnt2 = i;
 76 | 				break;
 77 | 			}
 78 | 		}
 79 | 		if (tag == 0) {
 80 | 			// payoff using ternary operator
 81 | 			payoff = 10000 * s_curr_min;
 82 | 			payoff = (kievent == 0) ? ((s_curr_min >= B1) ? 10000 * (1 + dummy1) : payoff) : payoff;
 83 | 			cnt2 = length - 1;
 84 | 		}
 85 | 		
 86 | 		payoff = payoff * exp(-data1.discr * date[cnt2] / 360.0);
 87 | 
 88 | 		__syncthreads();
 89 | 
 90 | 		d_s[s_idx] = payoff;
 91 | 	}
 92 | }
 93 | 
 94 | void ELS3(
 95 | 	optionData option1,
 96 | 	optionData option2,
 97 | 	optionData option3,
 98 | 	double * d_s,
 99 | 	double * stk,
100 | 	double * payment,
101 | 	double * date,
102 | 	double * d_normals,
103 | 	unsigned N_STEPS,
104 | 	unsigned N_SIMULS) {
105 | 	const unsigned BLOCK_SIZE = 1024; // # of threads in a block (1-dimension threads & block)
106 | 	const unsigned GRID_SIZE = CEIL(N_SIMULS, BLOCK_SIZE); // # of block in a grid
107 | 	KiELS3_kernel << <GRID_SIZE, BLOCK_SIZE >> >(
108 | 		option1, option2, option3, d_s, stk, payment, date, d_normals, N_STEPS, N_SIMULS);
109 | }
110 | 
111 | void dev_fillRand(double *A, size_t rows_A, size_t cols_A) 
112 | {
113 | 	// random number generation host API
114 | 	curandGenerator_t rnd;
115 | 	curandCreateGenerator(&rnd, CURAND_RNG_PSEUDO_MTGP32);
116 | 	curandSetPseudoRandomGeneratorSeed(rnd, (unsigned long long)time(NULL));
117 | 	curandGenerateNormalDouble(rnd, A, rows_A*cols_A, 0.0, 1.0);
118 | }
119 | 
120 | void dev_mmul(const double *A, const double *B, double *C, const int m, const int k, const int n) {
121 | 	// GPU matrix multiplication
122 |     int lda = m, ldb = k, ldc = m;
123 |     const double alf = 1;
124 |     const double bet = 0;
125 |     const double *alpha = &alf;
126 |     const double *beta = &bet;
127 | 
128 |     // Create a handle for CUBLAS
129 |     cublasHandle_t handle;
130 |     cublasCreate(&handle);
131 | 
132 |     // Do the actual multiplication
133 |     cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
134 | 
135 |     // Destroy the handle
136 |     cublasDestroy(handle);
137 | }


--------------------------------------------------------------------------------
/one-asset ELS/code/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <vector>
  3 | #include <time.h>
  4 | #include <math.h>
  5 | #include <iostream>
  6 | #include <time.h>
  7 | #include <cuda_runtime.h>
  8 | #include "kernel.h"
  9 | #include "dev_array.h"
 10 | #include <curand.h>
 11 | 
 12 | #include <algorithm>
 13 | 
 14 | using namespace std;
 15 | 
 16 | int main() {
 17 | 	try {
 18 | 		// Variables
 19 | 		// declare variables and constants		
 20 | 		const double T = 3.0;
 21 | 		const size_t N_SIMULS = 10000;
 22 | 		const size_t N_STEPS = 360 * (int)T; // calendar convention : 360days
 23 | 
 24 | 		const double B = 0.6; // Knock-in barrier
 25 | 
 26 | 		const double S0_1 = 100.0;
 27 | 		const double sig1 = 0.3;
 28 | 
 29 | 		const double r = 0.0165;
 30 | 		const double discr = 0.0165;
 31 | 		const double dummy = 0.075;
 32 | 
 33 | 		const double stk[] = { 0.95, 0.9, 0.85, 0.8, 0.75, 0.7};
 34 | 		const double coupon[] = { 0.0125, 0.025, 0.0375, 0.05, 0.0625, 0.075 };
 35 | 
 36 | 		// this exmample set 6 observation dates.
 37 | 		const double date[] = { ceil(N_STEPS * 1.0 / length), ceil(N_STEPS * 2.0 / length), 
 38 | 			ceil(N_STEPS * 3.0 / length), ceil(N_STEPS * 4.0 / length),
 39 | 			ceil(N_STEPS * 5.0 / length), ceil(N_STEPS * 6.0 / length) };
 40 | 
 41 | 		bool flag = 1; // Greek flag (0 : not calcute greeks, 1 : calculate greeks)
 42 | 		double diff = 0.01; // 1% diff for greeks
 43 | 		///////////////////////////////////////////////
 44 | 
 45 | 		// make variables
 46 | 		const size_t N_NORMALS = N_SIMULS*N_STEPS;
 47 | 		double dt = double(T) / double(N_STEPS);
 48 | 		double sqrdt = sqrt(dt);
 49 | 
 50 | 		// exception handling
 51 | 		if (!(sizeof(stk) == sizeof(coupon)) && (sizeof(coupon) == sizeof(date)) && (length == (sizeof(coupon) == sizeof(date)))) {
 52 | 			cout << "Size error!" << endl;
 53 | 			return 0;
 54 | 		}
 55 | 
 56 | 		// generate info arrays
 57 | 		const unsigned Size = sizeof(stk) / sizeof(double);
 58 | 		double payment[Size] = { 0 };
 59 | 		for (int i = 0; i < Size; i++) {
 60 | 			payment[i] = 10000 * (1 + coupon[i]);
 61 | 		}
 62 | 
 63 | 		dev_array<double> d_stk(Size); d_stk.set(stk, Size);
 64 | 		dev_array<double> d_payment(Size); d_payment.set(payment, Size);
 65 | 		dev_array<double> d_date(Size); d_date.set(date, Size);
 66 | 
 67 | 		// generate blank arrays
 68 | 		double zeros[N_SIMULS] = {0};
 69 | 		vector<double> s(N_SIMULS);
 70 | 		dev_array<double> d_s(N_SIMULS);
 71 | 
 72 | 		dev_array<double> d_normals(N_NORMALS);
 73 | 
 74 | 		optionData o1(S0_1, S0_1, r, discr, T, sig1, dt, sqrdt, B, dummy);
 75 | 		optionData o2(S0_1 * (1.0 + 0.5*diff), S0_1, r, discr, T, sig1, dt, sqrdt, B, dummy); // for greeks
 76 | 		optionData o3(S0_1 * (1.0 - 0.5*diff), S0_1, r, discr, T, sig1, dt, sqrdt, B, dummy); // for greeks
 77 | 
 78 | 		// make a book
 79 | 		optionData book[] = {o1, o2, o3};
 80 | 		
 81 | 		cout << "****************** INFO ******************\n";
 82 | 		cout << "Strike for ELS : ";
 83 | 		for (int i = 0; i < Size; i++) cout << stk[i] << " ";
 84 | 		cout << endl;
 85 | 		cout << "Coupon for ELS : ";
 86 | 		for (int i = 0; i < Size; i++) cout << coupon[i] << " ";
 87 | 		cout << endl;
 88 | 		cout << "Date for ELS : ";
 89 | 		for (int i = 0; i < Size; i++) cout << date[i] << " ";
 90 | 		cout << endl << endl;
 91 | 		cout << "Number of Paths: " << N_SIMULS << "\n";
 92 | 		cout << "Number of Steps: " << N_STEPS << "\n";
 93 | 		cout << "Underlying Initial Price: " << S0_1 << "\n";
 94 | 		cout << "Barrier: " << B << "\n";
 95 | 		cout << "Time to Maturity: " << T << " years\n";
 96 | 		cout << "Risk-free Interest Rate: " << r << "\n";
 97 | 		cout << "Discount Rate: " << discr << "\n";
 98 | 		cout << "Volatility: " << sig1 << "\n";
 99 | 		cout << "Face Value: " << 10000 << "\n";
100 | 
101 | 
102 | 		// call the kernel
103 | 		if (!flag) {
104 | 			// generate random numbers (host API)
105 | 			curandGenerator_t curandGenerator;
106 | 			curandCreateGenerator(&curandGenerator, CURAND_RNG_PSEUDO_MTGP32);
107 | 			curandSetPseudoRandomGeneratorSeed(curandGenerator, time(NULL));
108 | 			curandGenerateNormalDouble(curandGenerator, d_normals.getData(), N_NORMALS, 0.0, 1.0);
109 | 			double t1 = double(clock()) / CLOCKS_PER_SEC;
110 | 
111 | 			KiELS1(book[0], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(),
112 | 				d_normals.getData(), N_STEPS, N_SIMULS);	
113 | 
114 | 			cudaDeviceSynchronize();	
115 | 
116 | 			// copy results from device to host
117 | 			d_s.get(&s[0], N_SIMULS);
118 | 			cudaFree(d_s.getData());
119 | 
120 | 			// compute the payoff average
121 | 			double gpu_sum = 0.0;
122 | 			for (size_t i = 0; i<N_SIMULS; i++) {
123 | 				gpu_sum += s[i];
124 | 			}
125 | 
126 | 			gpu_sum /= N_SIMULS;
127 | 			double t2 = double(clock()) / CLOCKS_PER_SEC;
128 | 
129 | 
130 | 			cout << "****************** PRICE ******************\n";
131 | 			cout << "Option Price (GPU): " << gpu_sum << "\n";
132 | 			cout << "******************* TIME *****************\n";
133 | 			cout << "GPU Monte Carlo Computation: " << (t2 - t1)*1e3 << " ms\n";
134 | 			cout << "******************* END *****************\n";
135 | 		}
136 | 		else {
137 | 			double payoff[3] = {}; // save 3 payoff for obtaining price & greeks
138 | 			double t1 = double(clock()) / CLOCKS_PER_SEC;
139 | 			// generate random numbers  (host API)
140 | 			// recycle random number for greeks
141 | 			curandGenerator_t curandGenerator;
142 | 			curandCreateGenerator(&curandGenerator, CURAND_RNG_PSEUDO_MTGP32);
143 | 			curandSetPseudoRandomGeneratorSeed(curandGenerator, time(NULL));
144 | 			curandGenerateNormalDouble(curandGenerator, d_normals.getData(), N_NORMALS, 0.0, 1.0);
145 | 
146 | 			for (int i = 0; i < 3; i++) {	
147 | 				KiELS1(book[i], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(),
148 | 					d_normals.getData(), N_STEPS, N_SIMULS);	
149 | 
150 | 				cudaDeviceSynchronize();	
151 | 
152 | 				// copy results from device to host
153 | 				d_s.get(&s[0], N_SIMULS);
154 | 
155 | 				// compute the payoff average
156 | 				double gpu_sum = 0.0;
157 | 				for (size_t j = 0; j < N_SIMULS; j++) {
158 | 					gpu_sum += s[j];
159 | 				}
160 | 				gpu_sum /= N_SIMULS;
161 | 
162 | 				payoff[i] = gpu_sum;
163 | 
164 | 			}
165 | 			double t2 = double(clock()) / CLOCKS_PER_SEC;
166 | 
167 | 			double delta = (payoff[1] - payoff[2]) / (diff*S0_1); // central difference
168 | 			double gamma = (payoff[2] - 2*payoff[0] + payoff[1]) / 
169 | 				((0.5*diff*S0_1)*(0.5*diff*S0_1));
170 | 			cout << "****************** PRICE, GREEK ******************\n";
171 | 			cout << "Option Price (GPU): " << payoff[0] << "\n";
172 | 			cout << "Option Delta (GPU): " << delta << "\n";
173 | 			cout << "Option Gamma (GPU): " << gamma << "\n";
174 | 			cout << "******************* TIME *****************\n";
175 | 			cout << "GPU Monte Carlo Computation: " << (t2 - t1)*1e3 << " ms\n";
176 | 			cout << "******************* END *****************\n";
177 | 
178 | 			// destroy generator
179 | 			curandDestroyGenerator(curandGenerator);
180 | 		}		
181 | 	}
182 | 	catch (exception& e) {
183 | 		cout << "exception: " << e.what() << "\n";
184 | 	}
185 | }


--------------------------------------------------------------------------------
/two-asset ELS/code/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <vector>
  3 | #include <math.h>
  4 | #include <iostream>
  5 | #include <time.h>
  6 | #include <cuda_runtime.h>
  7 | #include "kernel.h"
  8 | #include "dev_array.h"
  9 | #include "dev_matrix.h"
 10 | #include <curand.h>
 11 | #include "chol.h"
 12 | 
 13 | #include <algorithm>
 14 | 
 15 | using namespace std;
 16 | 
 17 | int main() {
 18 | 	try {
 19 | 		// Variables
 20 | 		// declare variables and constants		
 21 | 		const double T = 3.0;
 22 | 		const size_t N_SIMULS = 10000;
 23 | 		const size_t N_STEPS = 360 * (int)T; // calendar convention : 360days
 24 | 		const double B = 0.6;
 25 | 		const double S0_1 = 2081.18; const double S0_2 = 3674.05;
 26 | 		const double sig1 = 0.2379; const double sig2 = 0.2330;
 27 | 
 28 | 		const double r1 = 0.0165;
 29 | 		const double r2 = 0.0185;
 30 | 		const double discr = 0.0165;
 31 | 		const double dummy = 0.075;	
 32 | 
 33 | 		// this exmample set 6 observation dates.
 34 | 		const double stk[] = { 0.95, 0.9, 0.85, 0.8, 0.75, 0.7 };
 35 | 		const double coupon[] = { 0.0125, 0.025, 0.0375, 0.05, 0.0625, 0.075 };
 36 | 		const double date[] = { ceil(N_STEPS * 1.0 / length), ceil(N_STEPS * 2.0 / length),
 37 | 			ceil(N_STEPS * 3.0 / length), ceil(N_STEPS * 4.0 / length),
 38 | 			ceil(N_STEPS * 5.0 / length), ceil(N_STEPS * 6.0 / length) };
 39 | 
 40 | 		const double rho = 0.5; // correlation between underlying 1 and 2
 41 | 		double M[2 * 2] = { 0 };
 42 | 
 43 | 		makeChol2(M, rho); // cholesky decomposition for correlated random number
 44 | 
 45 | 		double diff = 0.01; // 1% diff for greeks
 46 | 		///////////////////////////////////////////////
 47 | 
 48 | 		// make variables
 49 | 		const size_t N_NORMALS = N_SIMULS*N_STEPS;
 50 | 		double dt = double(T) / double(N_STEPS);
 51 | 		double sqrdt = sqrt(dt);
 52 | 
 53 | 		// exception handling
 54 | 		if (!(sizeof(stk) == sizeof(coupon)) && (sizeof(coupon) == sizeof(date)) && (length == (sizeof(coupon) == sizeof(date)))) {
 55 | 			cout << "Size error!" << endl;
 56 | 			return 0;
 57 | 		}
 58 | 
 59 | 		// generate info arrays
 60 | 		const unsigned Size = sizeof(stk) / sizeof(double);
 61 | 		double payment[Size] = { 0 };
 62 | 		for (int i = 0; i < Size; i++) {
 63 | 			payment[i] = 10000 * (1 + coupon[i]);
 64 | 		}
 65 | 
 66 | 		dev_array<double> d_stk(Size); d_stk.set(stk, Size);
 67 | 		dev_array<double> d_payment(Size); d_payment.set(payment, Size);
 68 | 		dev_array<double> d_date(Size); d_date.set(date, Size);
 69 | 
 70 | 		// generate blank arrays
 71 | 		vector<double> s(N_SIMULS);
 72 | 		dev_array<double> d_s(N_SIMULS);
 73 | 		dev_matrix<double> d_normals(N_NORMALS, 2); // initial random number in GPU
 74 | 		dev_matrix<double> chol(2, 2);
 75 | 
 76 | 		dev_matrix<double> d_normals_rev(N_NORMALS, 2); // correlated random number in GPU
 77 | 		chol.set(M, 2, 2);
 78 | 
 79 | 		// generate random numbers (host API)
 80 | 		dev_fillRand(d_normals.getData(), N_NORMALS, 2);
 81 | 
 82 | 		// make a correlated random number (GPU matrix multiplication using cublas)
 83 | 		dev_mmul(d_normals.getData(), chol.getData(), d_normals_rev.getData(), N_NORMALS, 2, 2);
 84 | 
 85 | 		d_normals.~dev_matrix(); chol.~dev_matrix(); // destruct unnecessary array for memory space
 86 | 
 87 | 		optionData o1(S0_1, S0_1, r1, discr, T, sig1, dt, sqrdt, B, dummy); // zero tick
 88 | 		optionData o2(S0_2, S0_2, r2, discr, T, sig2, dt, sqrdt, B, dummy);
 89 | 		optionData o3(S0_1 * (1.0 + 0.5*diff), S0_1, r1, discr, T, sig1, dt, sqrdt, B, dummy); // up tick
 90 | 		optionData o4(S0_2 * (1.0 + 0.5*diff), S0_2, r2, discr, T, sig2, dt, sqrdt, B, dummy);
 91 | 		optionData o5(S0_1 * (1.0 - 0.5*diff), S0_1, r1, discr, T, sig1, dt, sqrdt, B, dummy); // down tick
 92 | 		optionData o6(S0_2 * (1.0 - 0.5*diff), S0_2, r2, discr, T, sig2, dt, sqrdt, B, dummy);
 93 | 
 94 | 		// make a book
 95 | 		optionData book[] = { o1, o2, o3, o4, o5, o6 };
 96 | 
 97 | 		double payoff[5] = {};
 98 | 		double gpu_sum;
 99 | 		double t1 = double(clock()) / CLOCKS_PER_SEC;
100 | 		for (int i = 0; i < 5; i++) {
101 | 			// call the kernel
102 | 			if (i == 0) { // s1:0 s2:0
103 | 				ELS2(book[0], book[1], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
104 | 			}
105 | 			else if (i == 1) // s1:0 s2:+
106 | 			{
107 | 				ELS2(book[0], book[3], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
108 | 			}
109 | 			else if (i == 2) // s1:0 s2:-
110 | 			{
111 | 				ELS2(book[0], book[5], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
112 | 			}
113 | 			else if (i == 3) // s1:+ s2:0
114 | 			{
115 | 				ELS2(book[1], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
116 | 			}
117 | 			else if (i == 4) // s1:- s2:0
118 | 			{
119 | 				ELS2(book[1], book[4], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
120 | 			}
121 | 
122 | 			cudaDeviceSynchronize();
123 | 
124 | 			// copy results from device to host
125 | 			d_s.get(&s[0], N_SIMULS);
126 | 
127 | 			// compute the payoff average
128 | 			gpu_sum = 0.0;
129 | 			for (size_t j = 0; j < N_SIMULS; j++) {
130 | 				gpu_sum += s[j];
131 | 			}
132 | 
133 | 			gpu_sum /= N_SIMULS;
134 | 			payoff[i] = gpu_sum;
135 | 		}
136 | 		double t2 = double(clock()) / CLOCKS_PER_SEC;
137 | 
138 | 		double delta1 = (payoff[1] - payoff[2]) / (diff*S0_1);
139 | 		double delta2 = (payoff[4] - payoff[3]) / (diff*S0_2);
140 | 		double gamma1 = (payoff[1] - 2 * payoff[0] + payoff[2]) /
141 | 			((0.5*diff*S0_1)*(0.5*diff*S0_1));
142 | 		double gamma2 = (payoff[4] - 2 * payoff[0] + payoff[3]) /
143 | 			((0.5*diff*S0_2)*(0.5*diff*S0_2));
144 | 
145 | 		cout << "****************** INFO ******************\n";
146 | 		cout << "Strike for ELS : ";
147 | 		for (int i = 0; i < Size; i++) cout << stk[i] << " ";
148 | 		cout << endl;
149 | 		cout << "Coupon for ELS : ";
150 | 		for (int i = 0; i < Size; i++) cout << coupon[i] << " ";
151 | 		cout << endl;
152 | 		cout << "Date for ELS : ";
153 | 		for (int i = 0; i < Size; i++) cout << date[i] << " ";
154 | 		cout << endl << endl;
155 | 
156 | 		cout << "Number of Paths: " << N_SIMULS << "\n";
157 | 		cout << "Number of Steps: " << N_STEPS << "\n";
158 | 		cout << "Underlying Initial Price: " << S0_1 << " " << S0_2 << "\n";
159 | 		cout << "Barrier: " << B << "\n";
160 | 		cout << "Time to Maturity: " << T << " years\n";
161 | 		cout << "Risk-free Interest Rate 1: " << r1 << "\n";
162 | 		cout << "Risk-free Interest Rate 2: " << r2 << "\n";
163 | 		cout << "Discount rate: " << discr << "\n";
164 | 		cout << "Volatility: " << sig1 << " " << sig2 << "\n";
165 | 		cout << "Face Value: " << 10000 << "\n";
166 | 		cout << "****************** PRICE, GREEK ******************\n";
167 | 		cout << "Option Price (GPU): " << gpu_sum << "\n";
168 | 		cout << "Option Delta1 (GPU): " << delta1 << "\n";
169 | 		cout << "Option Gamma1 (GPU): " << gamma1 << "\n";
170 | 		cout << "Option Delta2 (GPU): " << delta2 << "\n";
171 | 		cout << "Option Gamma2 (GPU): " << gamma2 << "\n";
172 | 		cout << "******************* TIME *****************\n";
173 | 		cout << "GPU Monte Carlo Computation: " << (t2 - t1)*1e3 << " ms\n";
174 | 		cout << "******************* END *****************\n";
175 | 	}
176 | 	catch (exception& e) {
177 | 		cout << "exception: " << e.what() << "\n";
178 | 	}
179 | }


--------------------------------------------------------------------------------
/three-asset ELS/code/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <vector>
  3 | #include <math.h>
  4 | #include <iostream>
  5 | #include <time.h>
  6 | #include <cuda_runtime.h>
  7 | #include "kernel.h"
  8 | #include "dev_array.h"
  9 | #include "dev_matrix.h"
 10 | #include <curand.h>
 11 | #include "chol.h"
 12 | 
 13 | #include <algorithm>
 14 | 
 15 | using namespace std;
 16 | 
 17 | int main() {
 18 | 	try {
 19 | 		// Variables
 20 | 		// declare variables and constants		
 21 | 		const double T = 3.0;
 22 | 		const size_t N_SIMULS = 10000;
 23 | 		const size_t N_STEPS = 360 * (int)T; // calendar convention : 360days
 24 | 		const double B = 0.6;
 25 | 		const double S0_1 = 2081.18; const double S0_2 = 3674.05; const double S0_3 = 27094.93;
 26 | 		const double sig1 = 0.2379; const double sig2 = 0.2330; const double sig3 = 0.2857;
 27 | 		
 28 | 		const double r1 = 0.0165;
 29 | 		const double r2 = 0.0185;
 30 | 		const double r3 = 0.0105;
 31 | 		const double discr = 0.0165;
 32 | 		const double dummy = 0.075;
 33 | 
 34 | 		// this exmample set 6 observation dates.
 35 | 		const double stk[] = { 0.95, 0.9, 0.85, 0.8, 0.75, 0.7 };
 36 | 		const double coupon[] = { 0.0125, 0.025, 0.0375, 0.05, 0.0625, 0.075 };
 37 | 		const double date[] = { ceil(N_STEPS * 1.0 / length), ceil(N_STEPS * 2.0 / length),
 38 | 			ceil(N_STEPS * 3.0 / length), ceil(N_STEPS * 4.0 / length),
 39 | 			ceil(N_STEPS * 5.0 / length), ceil(N_STEPS * 6.0 / length) };
 40 | 
 41 | 		// correlation between underlying x and y
 42 | 		const double rho12 = 0.5;
 43 | 		const double rho23 = 0.5;
 44 | 		const double rho13 = 0.5;
 45 | 
 46 | 		const double rho[] = { rho12, rho13, rho23 };
 47 | 		const unsigned rho_size = sizeof(rho) / sizeof(*rho);
 48 | 		double M[rho_size*rho_size] = { 0 };
 49 | 
 50 | 		makeChol3(M, rho); // cholesky decomposition for correlated random number
 51 | 
 52 | 		bool flag = 1; // Greek flag
 53 | 		double diff = 0.01; // 1% diff for greeks
 54 | 		///////////////////////////////////////////////
 55 | 
 56 | 		// make variables
 57 | 		const size_t N_NORMALS = N_SIMULS*N_STEPS;
 58 | 		double dt = double(T) / double(N_STEPS);
 59 | 		double sqrdt = sqrt(dt);
 60 | 
 61 | 		// exception handling
 62 | 		if (!(sizeof(stk) == sizeof(coupon)) && (sizeof(coupon) == sizeof(date)) && (length == (sizeof(coupon) == sizeof(date)))) {
 63 | 			cout << "Size error!" << endl;
 64 | 			return 0;
 65 | 		}
 66 | 
 67 | 		// generate info arrays
 68 | 		const unsigned Size = sizeof(stk) / sizeof(double);
 69 | 		double payment[Size] = { 0 };
 70 | 		for (int i = 0; i < Size; i++) {
 71 | 			payment[i] = 10000 * (1 + coupon[i]);
 72 | 		}
 73 | 
 74 | 		dev_array<double> d_stk(Size); d_stk.set(stk, Size);
 75 | 		dev_array<double> d_payment(Size); d_payment.set(payment, Size);
 76 | 		dev_array<double> d_date(Size); d_date.set(date, Size);
 77 | 
 78 | 		// generate blank arrays
 79 | 		vector<double> s(N_SIMULS);
 80 | 		dev_array<double> d_s(N_SIMULS);
 81 | 		dev_matrix<double> d_normals(N_NORMALS, 3); // initial random number in GPU
 82 | 		dev_matrix<double> chol(3, 3);
 83 | 
 84 | 		dev_matrix<double> d_normals_rev(N_NORMALS, 3); // correlated random number in GPU
 85 | 		chol.set(M, 3, 3);
 86 | 
 87 | 		// generate random numbers (host API)
 88 | 		dev_fillRand(d_normals.getData(), N_NORMALS, 3);
 89 | 
 90 | 		// make a correlated random number (GPU matrix multiplication using cublas)
 91 | 		dev_mmul(d_normals.getData(), chol.getData(), d_normals_rev.getData(), N_NORMALS, 3, 3);
 92 | 
 93 | 		d_normals.~dev_matrix(); chol.~dev_matrix(); // destruct unnecessary array for memory space
 94 | 
 95 | 		optionData o1(S0_1, S0_1, r1, discr, T, sig1, dt, sqrdt, B, dummy);  // zero tick
 96 | 		optionData o2(S0_2, S0_2, r2, discr, T, sig2, dt, sqrdt, B, dummy);
 97 | 		optionData o3(S0_3, S0_3, r3, discr, T, sig3, dt, sqrdt, B, dummy);
 98 | 		optionData o4(S0_1 * (1.0 + 0.5*diff), S0_1, r1, discr, T, sig1, dt, sqrdt, B, dummy); // up tick
 99 | 		optionData o5(S0_2 * (1.0 + 0.5*diff), S0_2, r2, discr, T, sig2, dt, sqrdt, B, dummy);
100 | 		optionData o6(S0_3 * (1.0 + 0.5*diff), S0_3, r3, discr, T, sig3, dt, sqrdt, B, dummy);
101 | 		optionData o7(S0_1 * (1.0 - 0.5*diff), S0_1, r1, discr, T, sig1, dt, sqrdt, B, dummy); // down tick
102 | 		optionData o8(S0_2 * (1.0 - 0.5*diff), S0_2, r2, discr, T, sig2, dt, sqrdt, B, dummy);
103 | 		optionData o9(S0_3 * (1.0 - 0.5*diff), S0_3, r3, discr, T, sig3, dt, sqrdt, B, dummy);
104 | 
105 | 		// make a book
106 | 		optionData book[] = { o1, o2, o3, o4, o5, o6, o7, o8, o9 };
107 | 
108 | 		double payoff[9] = {};
109 | 		double gpu_sum;
110 | 
111 | 		double t1 = double(clock()) / CLOCKS_PER_SEC;
112 | 		for (int i = 0; i < 9; i++) {
113 | 			// call the kernel
114 | 
115 | 			if (i == 0) { // s1:0 s2:0 s3:0 
116 | 				ELS3(book[0], book[1], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
117 | 			}
118 | 			else if (i == 1) // s1:0 s2:+ s3:0 
119 | 			{
120 | 				ELS3(book[0], book[4], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
121 | 			}
122 | 			else if (i == 2) // s1:0 s2:0 s3:+ 
123 | 			{
124 | 				ELS3(book[0], book[1], book[5], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
125 | 			}
126 | 			else if (i == 3) // s1:0 s2:- s3:0
127 | 			{
128 | 				ELS3(book[0], book[7], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
129 | 			}
130 | 			else if (i == 4) // s1:0 s2:0 s3:-
131 | 			{
132 | 				ELS3(book[0], book[1], book[8], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
133 | 			}
134 | 			else if (i == 5) // s1:+ s2:0 s3:0 
135 | 			{
136 | 				ELS3(book[3], book[1], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
137 | 			}
138 | 			else if (i == 6) // s1:- s2:0 s3:0 
139 | 			{
140 | 				ELS3(book[6], book[1], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
141 | 			}
142 | 			else if (i == 7) // s1:0 s2:+ s3:0
143 | 			{
144 | 				ELS3(book[0], book[5], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
145 | 			}
146 | 			else if (i == 8) // s1:0 s2:- s3:0
147 | 			{
148 | 				ELS3(book[0], book[7], book[2], d_s.getData(), d_stk.getData(), d_payment.getData(), d_date.getData(), d_normals_rev.getData(), N_STEPS, N_SIMULS);
149 | 			}
150 | 
151 | 			cudaDeviceSynchronize();
152 | 
153 | 			// copy results from device to host
154 | 			d_s.get(&s[0], N_SIMULS);
155 | 
156 | 			// compute the payoff average
157 | 			gpu_sum = 0.0;
158 | 			for (size_t i = 0; i < N_SIMULS; i++) {
159 | 				gpu_sum += s[i];
160 | 			}
161 | 
162 | 			gpu_sum /= N_SIMULS;
163 | 			payoff[i] = gpu_sum;
164 | 		}
165 | 		double t2 = double(clock()) / CLOCKS_PER_SEC;
166 | 
167 | 		double delta1 = (payoff[5] - payoff[6]) / (diff*S0_1);
168 | 		double delta2 = (payoff[1] - payoff[3]) / (diff*S0_2);
169 | 		double delta3 = (payoff[2] - payoff[4]) / (diff*S0_3);
170 | 		double gamma1 = (payoff[5] - 2 * payoff[0] + payoff[6]) /
171 | 			((0.5*diff*S0_1)*(0.5*diff*S0_1));
172 | 		double gamma2 = (payoff[7] - 2 * payoff[0] + payoff[8]) /
173 | 			((0.5*diff*S0_2)*(0.5*diff*S0_2));
174 | 		double gamma3 = (payoff[2] - 2 * payoff[0] + payoff[4]) /
175 | 			((0.5*diff*S0_3)*(0.5*diff*S0_3));
176 | 
177 | 		cout << "****************** INFO ******************\n";
178 | 		cout << "Strike for ELS : ";
179 | 		for (int i = 0; i < Size; i++) cout << stk[i] << " ";
180 | 		cout << endl;
181 | 		cout << "Coupon for ELS : ";
182 | 		for (int i = 0; i < Size; i++) cout << coupon[i] << " ";
183 | 		cout << endl;
184 | 		cout << "Date for ELS : ";
185 | 		for (int i = 0; i < Size; i++) cout << date[i] << " ";
186 | 		cout << endl << endl;
187 | 
188 | 		cout << "Number of Paths: " << N_SIMULS << "\n";
189 | 		cout << "Number of Steps: " << N_STEPS << "\n";
190 | 		cout << "Underlying Initial Price: " << S0_1 << " " << S0_2 << " " << S0_3 << "\n";
191 | 		cout << "Barrier: " << B << "\n";
192 | 		cout << "Time to Maturity: " << T << " years\n";
193 | 		cout << "Risk-free Interest Rate 1: " << r1 << "\n";
194 | 		cout << "Risk-free Interest Rate 2: " << r2 << "\n";
195 | 		cout << "Risk-free Interest Rate 3: " << r3 << "\n";
196 | 		cout << "Discount rate: " << discr << "\n";
197 | 		cout << "Volatility: " << sig1 << " " << sig2 << " " << sig3 << "\n";
198 | 		cout << "Face Value: " << 10000 << "\n";
199 | 		cout << "****************** PRICE, GREEK ******************\n";
200 | 		cout << "Option Price (GPU): " << gpu_sum << "\n";
201 | 		cout << "Option Delta1 (GPU): " << delta1 << "\n";
202 | 		cout << "Option Gamma1 (GPU): " << gamma1 << "\n";
203 | 		cout << "Option Delta2 (GPU): " << delta2 << "\n";
204 | 		cout << "Option Gamma2 (GPU): " << gamma2 << "\n";
205 | 		cout << "Option Delta3 (GPU): " << delta3 << "\n";
206 | 		cout << "Option Gamma3 (GPU): " << gamma3 << "\n";
207 | 		cout << "******************* TIME *****************\n";
208 | 		cout << "GPU Monte Carlo Computation: " << (t2 - t1)*1e3 << " ms\n";
209 | 		cout << "******************* END *****************\n";
210 | 
211 | 		// destroy generator
212 | 		// curandDestroyGenerator(curandGenerator1);
213 | 	}
214 | 	catch (exception& e) {
215 | 		cout << "exception: " << e.what() << "\n";
216 | 	}
217 | }


--------------------------------------------------------------------------------